mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Merge pull request #35612 from gmarek/scheduler2
Automatic merge from submit-queue split scheduler priorities into separate files In the current state it's really hard to find a thing one is looking for, if he doesn't know already know where to look. cc @davidopp
This commit is contained in:
		@@ -13,16 +13,23 @@ load(
 | 
				
			|||||||
go_library(
 | 
					go_library(
 | 
				
			||||||
    name = "go_default_library",
 | 
					    name = "go_default_library",
 | 
				
			||||||
    srcs = [
 | 
					    srcs = [
 | 
				
			||||||
 | 
					        "balanced_resource_allocation.go",
 | 
				
			||||||
 | 
					        "image_locality.go",
 | 
				
			||||||
        "interpod_affinity.go",
 | 
					        "interpod_affinity.go",
 | 
				
			||||||
 | 
					        "least_requested.go",
 | 
				
			||||||
        "metadata.go",
 | 
					        "metadata.go",
 | 
				
			||||||
 | 
					        "most_requested.go",
 | 
				
			||||||
        "node_affinity.go",
 | 
					        "node_affinity.go",
 | 
				
			||||||
        "priorities.go",
 | 
					        "node_label.go",
 | 
				
			||||||
 | 
					        "node_prefer_avoid_pods.go",
 | 
				
			||||||
        "selector_spreading.go",
 | 
					        "selector_spreading.go",
 | 
				
			||||||
        "taint_toleration.go",
 | 
					        "taint_toleration.go",
 | 
				
			||||||
 | 
					        "test_util.go",
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
    tags = ["automanaged"],
 | 
					    tags = ["automanaged"],
 | 
				
			||||||
    deps = [
 | 
					    deps = [
 | 
				
			||||||
        "//pkg/api:go_default_library",
 | 
					        "//pkg/api:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/api/resource:go_default_library",
 | 
				
			||||||
        "//pkg/api/unversioned:go_default_library",
 | 
					        "//pkg/api/unversioned:go_default_library",
 | 
				
			||||||
        "//pkg/labels:go_default_library",
 | 
					        "//pkg/labels:go_default_library",
 | 
				
			||||||
        "//pkg/util/node:go_default_library",
 | 
					        "//pkg/util/node:go_default_library",
 | 
				
			||||||
@@ -39,8 +46,14 @@ go_library(
 | 
				
			|||||||
go_test(
 | 
					go_test(
 | 
				
			||||||
    name = "go_default_test",
 | 
					    name = "go_default_test",
 | 
				
			||||||
    srcs = [
 | 
					    srcs = [
 | 
				
			||||||
 | 
					        "balanced_resource_allocation_test.go",
 | 
				
			||||||
 | 
					        "image_locality_test.go",
 | 
				
			||||||
        "interpod_affinity_test.go",
 | 
					        "interpod_affinity_test.go",
 | 
				
			||||||
 | 
					        "least_requested_test.go",
 | 
				
			||||||
 | 
					        "most_requested_test.go",
 | 
				
			||||||
        "node_affinity_test.go",
 | 
					        "node_affinity_test.go",
 | 
				
			||||||
 | 
					        "node_label_test.go",
 | 
				
			||||||
 | 
					        "node_prefer_avoid_pods_test.go",
 | 
				
			||||||
        "priorities_test.go",
 | 
					        "priorities_test.go",
 | 
				
			||||||
        "selector_spreading_test.go",
 | 
					        "selector_spreading_test.go",
 | 
				
			||||||
        "taint_toleration_test.go",
 | 
					        "taint_toleration_test.go",
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -0,0 +1,116 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
						"math"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"github.com/golang/glog"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
 | 
				
			||||||
 | 
					const (
 | 
				
			||||||
 | 
						mb         int64 = 1024 * 1024
 | 
				
			||||||
 | 
						minImgSize int64 = 23 * mb
 | 
				
			||||||
 | 
						maxImgSize int64 = 1000 * mb
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Also used in most/least_requested nad metadata.
 | 
				
			||||||
 | 
					// TODO: despaghettify it
 | 
				
			||||||
 | 
					func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
 | 
				
			||||||
 | 
						result := &schedulercache.Resource{}
 | 
				
			||||||
 | 
						for i := range pod.Spec.Containers {
 | 
				
			||||||
 | 
							container := &pod.Spec.Containers[i]
 | 
				
			||||||
 | 
							cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
 | 
				
			||||||
 | 
							result.MilliCPU += cpu
 | 
				
			||||||
 | 
							result.Memory += memory
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return result
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
				
			||||||
 | 
						node := nodeInfo.Node()
 | 
				
			||||||
 | 
						if node == nil {
 | 
				
			||||||
 | 
							return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						allocatableResources := nodeInfo.AllocatableResource()
 | 
				
			||||||
 | 
						totalResources := *podRequests
 | 
				
			||||||
 | 
						totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 | 
				
			||||||
 | 
						totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
 | 
				
			||||||
 | 
						memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
 | 
				
			||||||
 | 
						score := int(0)
 | 
				
			||||||
 | 
						if cpuFraction >= 1 || memoryFraction >= 1 {
 | 
				
			||||||
 | 
							// if requested >= capacity, the corresponding host should never be preferred.
 | 
				
			||||||
 | 
							score = 0
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
 | 
				
			||||||
 | 
							// respectively. Multilying the absolute value of the difference by 10 scales the value to
 | 
				
			||||||
 | 
							// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
 | 
				
			||||||
 | 
							// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
 | 
				
			||||||
 | 
							diff := math.Abs(cpuFraction - memoryFraction)
 | 
				
			||||||
 | 
							score = int(10 - diff*10)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if glog.V(10) {
 | 
				
			||||||
 | 
							// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 | 
				
			||||||
 | 
							// not logged. There is visible performance gain from it.
 | 
				
			||||||
 | 
							glog.V(10).Infof(
 | 
				
			||||||
 | 
								"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
 | 
				
			||||||
 | 
								pod.Name, node.Name,
 | 
				
			||||||
 | 
								allocatableResources.MilliCPU, allocatableResources.Memory,
 | 
				
			||||||
 | 
								totalResources.MilliCPU, totalResources.Memory,
 | 
				
			||||||
 | 
								score,
 | 
				
			||||||
 | 
							)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return schedulerapi.HostPriority{
 | 
				
			||||||
 | 
							Host:  node.Name,
 | 
				
			||||||
 | 
							Score: score,
 | 
				
			||||||
 | 
						}, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func fractionOfCapacity(requested, capacity int64) float64 {
 | 
				
			||||||
 | 
						if capacity == 0 {
 | 
				
			||||||
 | 
							return 1
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return float64(requested) / float64(capacity)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// BalancedResourceAllocation favors nodes with balanced resource usage rate.
 | 
				
			||||||
 | 
					// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
 | 
				
			||||||
 | 
					// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
 | 
				
			||||||
 | 
					// close the two metrics are to each other.
 | 
				
			||||||
 | 
					// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
 | 
				
			||||||
 | 
					// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
 | 
				
			||||||
 | 
					func BalancedResourceAllocationMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
				
			||||||
 | 
						var nonZeroRequest *schedulercache.Resource
 | 
				
			||||||
 | 
						if priorityMeta, ok := meta.(*priorityMetadata); ok {
 | 
				
			||||||
 | 
							nonZeroRequest = priorityMeta.nonZeroRequest
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							// We couldn't parse metadatat - fallback to computing it.
 | 
				
			||||||
 | 
							nonZeroRequest = getNonZeroRequests(pod)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,263 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"reflect"
 | 
				
			||||||
 | 
						"testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api/resource"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestBalancedResourceAllocation(t *testing.T) {
 | 
				
			||||||
 | 
						labels1 := map[string]string{
 | 
				
			||||||
 | 
							"foo": "bar",
 | 
				
			||||||
 | 
							"baz": "blah",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						labels2 := map[string]string{
 | 
				
			||||||
 | 
							"bar": "foo",
 | 
				
			||||||
 | 
							"baz": "blah",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						machine1Spec := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine1",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						machine2Spec := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine2",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						noResources := api.PodSpec{
 | 
				
			||||||
 | 
							Containers: []api.Container{},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						cpuOnly := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine1",
 | 
				
			||||||
 | 
							Containers: []api.Container{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("1000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("0"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("2000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("0"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						cpuOnly2 := cpuOnly
 | 
				
			||||||
 | 
						cpuOnly2.NodeName = "machine2"
 | 
				
			||||||
 | 
						cpuAndMemory := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine2",
 | 
				
			||||||
 | 
							Containers: []api.Container{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("1000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("2000"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("2000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("3000"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						tests := []struct {
 | 
				
			||||||
 | 
							pod          *api.Pod
 | 
				
			||||||
 | 
							pods         []*api.Pod
 | 
				
			||||||
 | 
							nodes        []*api.Node
 | 
				
			||||||
 | 
							expectedList schedulerapi.HostPriorityList
 | 
				
			||||||
 | 
							test         string
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores (remaining resources) on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 0 / 4000 = 0%
 | 
				
			||||||
 | 
									Memory Fraction: 0 / 10000 = 0%
 | 
				
			||||||
 | 
									Node1 Score: 10 - (0-0)*10 = 10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores (remaining resources) on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 0 / 4000 = 0 %
 | 
				
			||||||
 | 
									Memory Fraction: 0 / 10000 = 0%
 | 
				
			||||||
 | 
									Node2 Score: 10 - (0-0)*10 = 10
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
 | 
				
			||||||
 | 
								test:         "nothing scheduled, nothing requested",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 3000 / 4000= 75%
 | 
				
			||||||
 | 
									Memory Fraction: 5000 / 10000 = 50%
 | 
				
			||||||
 | 
									Node1 Score: 10 - (0.75-0.5)*10 = 7
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 3000 / 6000= 50%
 | 
				
			||||||
 | 
									Memory Fraction: 5000/10000 = 50%
 | 
				
			||||||
 | 
									Node2 Score: 10 - (0.5-0.5)*10 = 10
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 10}},
 | 
				
			||||||
 | 
								test:         "nothing scheduled, resources requested, differently sized machines",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 0 / 4000= 0%
 | 
				
			||||||
 | 
									Memory Fraction: 0 / 10000 = 0%
 | 
				
			||||||
 | 
									Node1 Score: 10 - (0-0)*10 = 10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 0 / 4000= 0%
 | 
				
			||||||
 | 
									Memory Fraction: 0 / 10000 = 0%
 | 
				
			||||||
 | 
									Node2 Score: 10 - (0-0)*10 = 10
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
 | 
				
			||||||
 | 
								test:         "no resources requested, pods scheduled",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
 | 
				
			||||||
 | 
									{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 6000 / 10000 = 60%
 | 
				
			||||||
 | 
									Memory Fraction: 0 / 20000 = 0%
 | 
				
			||||||
 | 
									Node1 Score: 10 - (0.6-0)*10 = 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 6000 / 10000 = 60%
 | 
				
			||||||
 | 
									Memory Fraction: 5000 / 20000 = 25%
 | 
				
			||||||
 | 
									Node2 Score: 10 - (0.6-0.25)*10 = 6
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}},
 | 
				
			||||||
 | 
								test:         "no resources requested, pods scheduled with resources",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
 | 
				
			||||||
 | 
									{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 6000 / 10000 = 60%
 | 
				
			||||||
 | 
									Memory Fraction: 5000 / 20000 = 25%
 | 
				
			||||||
 | 
									Node1 Score: 10 - (0.6-0.25)*10 = 6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 6000 / 10000 = 60%
 | 
				
			||||||
 | 
									Memory Fraction: 10000 / 20000 = 50%
 | 
				
			||||||
 | 
									Node2 Score: 10 - (0.6-0.5)*10 = 9
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}},
 | 
				
			||||||
 | 
								test:         "resources requested, pods scheduled with resources",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 6000 / 10000 = 60%
 | 
				
			||||||
 | 
									Memory Fraction: 5000 / 20000 = 25%
 | 
				
			||||||
 | 
									Node1 Score: 10 - (0.6-0.25)*10 = 6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 6000 / 10000 = 60%
 | 
				
			||||||
 | 
									Memory Fraction: 10000 / 50000 = 20%
 | 
				
			||||||
 | 
									Node2 Score: 10 - (0.6-0.2)*10 = 6
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}},
 | 
				
			||||||
 | 
								test:         "resources requested, pods scheduled with resources, differently sized machines",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
 | 
				
			||||||
 | 
									Memory Fraction: 0 / 10000 = 0
 | 
				
			||||||
 | 
									Node1 Score: 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
 | 
				
			||||||
 | 
									Memory Fraction 5000 / 10000 = 50%
 | 
				
			||||||
 | 
									Node2 Score: 0
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuOnly},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
 | 
				
			||||||
 | 
								test:         "requested resources exceed node capacity",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
 | 
				
			||||||
 | 
								test:         "zero node resources, pods scheduled with resources",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, test := range tests {
 | 
				
			||||||
 | 
							nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
 | 
				
			||||||
 | 
							list, err := priorityFunction(BalancedResourceAllocationMap, nil)(test.pod, nodeNameToInfo, test.nodes)
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								t.Errorf("unexpected error: %v", err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if !reflect.DeepEqual(test.expectedList, list) {
 | 
				
			||||||
 | 
								t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										79
									
								
								plugin/pkg/scheduler/algorithm/priorities/image_locality.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								plugin/pkg/scheduler/algorithm/priorities/image_locality.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,79 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
 | 
				
			||||||
 | 
					// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
 | 
				
			||||||
 | 
					// based on the total size of those images.
 | 
				
			||||||
 | 
					// - If none of the images are present, this node will be given the lowest priority.
 | 
				
			||||||
 | 
					// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
 | 
				
			||||||
 | 
					func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
				
			||||||
 | 
						node := nodeInfo.Node()
 | 
				
			||||||
 | 
						if node == nil {
 | 
				
			||||||
 | 
							return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						var sumSize int64
 | 
				
			||||||
 | 
						for i := range pod.Spec.Containers {
 | 
				
			||||||
 | 
							sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return schedulerapi.HostPriority{
 | 
				
			||||||
 | 
							Host:  node.Name,
 | 
				
			||||||
 | 
							Score: calculateScoreFromSize(sumSize),
 | 
				
			||||||
 | 
						}, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
 | 
				
			||||||
 | 
					// 1. Split image size range into 10 buckets.
 | 
				
			||||||
 | 
					// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
 | 
				
			||||||
 | 
					func calculateScoreFromSize(sumSize int64) int {
 | 
				
			||||||
 | 
						var score int
 | 
				
			||||||
 | 
						switch {
 | 
				
			||||||
 | 
						case sumSize == 0 || sumSize < minImgSize:
 | 
				
			||||||
 | 
							// score == 0 means none of the images required by this pod are present on this
 | 
				
			||||||
 | 
							// node or the total size of the images present is too small to be taken into further consideration.
 | 
				
			||||||
 | 
							score = 0
 | 
				
			||||||
 | 
						// If existing images' total size is larger than max, just make it highest priority.
 | 
				
			||||||
 | 
						case sumSize >= maxImgSize:
 | 
				
			||||||
 | 
							score = 10
 | 
				
			||||||
 | 
						default:
 | 
				
			||||||
 | 
							score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						// Return which bucket the given size belongs to
 | 
				
			||||||
 | 
						return score
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
 | 
				
			||||||
 | 
					func checkContainerImageOnNode(node *api.Node, container *api.Container) int64 {
 | 
				
			||||||
 | 
						for _, image := range node.Status.Images {
 | 
				
			||||||
 | 
							for _, name := range image.Names {
 | 
				
			||||||
 | 
								if container.Image == name {
 | 
				
			||||||
 | 
									// Should return immediately.
 | 
				
			||||||
 | 
									return image.SizeBytes
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return 0
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										182
									
								
								plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										182
									
								
								plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,182 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"reflect"
 | 
				
			||||||
 | 
						"sort"
 | 
				
			||||||
 | 
						"testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestImageLocalityPriority(t *testing.T) {
 | 
				
			||||||
 | 
						test_40_250 := api.PodSpec{
 | 
				
			||||||
 | 
							Containers: []api.Container{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Image: "gcr.io/40",
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Image: "gcr.io/250",
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						test_40_140 := api.PodSpec{
 | 
				
			||||||
 | 
							Containers: []api.Container{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Image: "gcr.io/40",
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Image: "gcr.io/140",
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						test_min_max := api.PodSpec{
 | 
				
			||||||
 | 
							Containers: []api.Container{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Image: "gcr.io/10",
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Image: "gcr.io/2000",
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						node_40_140_2000 := api.NodeStatus{
 | 
				
			||||||
 | 
							Images: []api.ContainerImage{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Names: []string{
 | 
				
			||||||
 | 
										"gcr.io/40",
 | 
				
			||||||
 | 
										"gcr.io/40:v1",
 | 
				
			||||||
 | 
										"gcr.io/40:v1",
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									SizeBytes: int64(40 * mb),
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Names: []string{
 | 
				
			||||||
 | 
										"gcr.io/140",
 | 
				
			||||||
 | 
										"gcr.io/140:v1",
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									SizeBytes: int64(140 * mb),
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Names: []string{
 | 
				
			||||||
 | 
										"gcr.io/2000",
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									SizeBytes: int64(2000 * mb),
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						node_250_10 := api.NodeStatus{
 | 
				
			||||||
 | 
							Images: []api.ContainerImage{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Names: []string{
 | 
				
			||||||
 | 
										"gcr.io/250",
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									SizeBytes: int64(250 * mb),
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Names: []string{
 | 
				
			||||||
 | 
										"gcr.io/10",
 | 
				
			||||||
 | 
										"gcr.io/10:v1",
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									SizeBytes: int64(10 * mb),
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						tests := []struct {
 | 
				
			||||||
 | 
							pod          *api.Pod
 | 
				
			||||||
 | 
							pods         []*api.Pod
 | 
				
			||||||
 | 
							nodes        []*api.Node
 | 
				
			||||||
 | 
							expectedList schedulerapi.HostPriorityList
 | 
				
			||||||
 | 
							test         string
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								// Pod: gcr.io/40 gcr.io/250
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								// Node1
 | 
				
			||||||
 | 
								// Image: gcr.io/40 40MB
 | 
				
			||||||
 | 
								// Score: (40M-23M)/97.7M + 1 = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								// Node2
 | 
				
			||||||
 | 
								// Image: gcr.io/250 250MB
 | 
				
			||||||
 | 
								// Score: (250M-23M)/97.7M + 1 = 3
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: test_40_250},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
 | 
				
			||||||
 | 
								test:         "two images spread on two nodes, prefer the larger image one",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								// Pod: gcr.io/40 gcr.io/140
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								// Node1
 | 
				
			||||||
 | 
								// Image: gcr.io/40 40MB, gcr.io/140 140MB
 | 
				
			||||||
 | 
								// Score: (40M+140M-23M)/97.7M + 1 = 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								// Node2
 | 
				
			||||||
 | 
								// Image: not present
 | 
				
			||||||
 | 
								// Score: 0
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: test_40_140},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
 | 
				
			||||||
 | 
								test:         "two images on one node, prefer this node",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								// Pod: gcr.io/2000 gcr.io/10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								// Node1
 | 
				
			||||||
 | 
								// Image: gcr.io/2000 2000MB
 | 
				
			||||||
 | 
								// Score: 2000 > max score = 10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								// Node2
 | 
				
			||||||
 | 
								// Image: gcr.io/10 10MB
 | 
				
			||||||
 | 
								// Score: 10 < min score = 0
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: test_min_max},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}},
 | 
				
			||||||
 | 
								test:         "if exceed limit, use limit",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, test := range tests {
 | 
				
			||||||
 | 
							nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
 | 
				
			||||||
 | 
							list, err := priorityFunction(ImageLocalityPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								t.Errorf("unexpected error: %v", err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							sort.Sort(test.expectedList)
 | 
				
			||||||
 | 
							sort.Sort(list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if !reflect.DeepEqual(test.expectedList, list) {
 | 
				
			||||||
 | 
								t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func makeImageNode(node string, status api.NodeStatus) *api.Node {
 | 
				
			||||||
 | 
						return &api.Node{
 | 
				
			||||||
 | 
							ObjectMeta: api.ObjectMeta{Name: node},
 | 
				
			||||||
 | 
							Status:     status,
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										91
									
								
								plugin/pkg/scheduler/algorithm/priorities/least_requested.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								plugin/pkg/scheduler/algorithm/priorities/least_requested.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,91 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"github.com/golang/glog"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
 | 
				
			||||||
 | 
					// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
 | 
				
			||||||
 | 
					// based on the minimum of the average of the fraction of requested to capacity.
 | 
				
			||||||
 | 
					// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
 | 
				
			||||||
 | 
					func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
				
			||||||
 | 
						var nonZeroRequest *schedulercache.Resource
 | 
				
			||||||
 | 
						if priorityMeta, ok := meta.(*priorityMetadata); ok {
 | 
				
			||||||
 | 
							nonZeroRequest = priorityMeta.nonZeroRequest
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							// We couldn't parse metadata - fallback to computing it.
 | 
				
			||||||
 | 
							nonZeroRequest = getNonZeroRequests(pod)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// The unused capacity is calculated on a scale of 0-10
 | 
				
			||||||
 | 
					// 0 being the lowest priority and 10 being the highest.
 | 
				
			||||||
 | 
					// The more unused resources the higher the score is.
 | 
				
			||||||
 | 
					func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
 | 
				
			||||||
 | 
						if capacity == 0 {
 | 
				
			||||||
 | 
							return 0
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if requested > capacity {
 | 
				
			||||||
 | 
							glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
 | 
				
			||||||
 | 
								requested, capacity, node)
 | 
				
			||||||
 | 
							return 0
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return ((capacity - requested) * 10) / capacity
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Calculates host priority based on the amount of unused resources.
 | 
				
			||||||
 | 
					// 'node' has information about the resources on the node.
 | 
				
			||||||
 | 
					// 'pods' is a list of pods currently scheduled on the node.
 | 
				
			||||||
 | 
					func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
				
			||||||
 | 
						node := nodeInfo.Node()
 | 
				
			||||||
 | 
						if node == nil {
 | 
				
			||||||
 | 
							return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						allocatableResources := nodeInfo.AllocatableResource()
 | 
				
			||||||
 | 
						totalResources := *podRequests
 | 
				
			||||||
 | 
						totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 | 
				
			||||||
 | 
						totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
 | 
				
			||||||
 | 
						memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
 | 
				
			||||||
 | 
						if glog.V(10) {
 | 
				
			||||||
 | 
							// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 | 
				
			||||||
 | 
							// not logged. There is visible performance gain from it.
 | 
				
			||||||
 | 
							glog.V(10).Infof(
 | 
				
			||||||
 | 
								"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
 | 
				
			||||||
 | 
								pod.Name, node.Name,
 | 
				
			||||||
 | 
								allocatableResources.MilliCPU, allocatableResources.Memory,
 | 
				
			||||||
 | 
								totalResources.MilliCPU, totalResources.Memory,
 | 
				
			||||||
 | 
								cpuScore, memoryScore,
 | 
				
			||||||
 | 
							)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return schedulerapi.HostPriority{
 | 
				
			||||||
 | 
							Host:  node.Name,
 | 
				
			||||||
 | 
							Score: int((cpuScore + memoryScore) / 2),
 | 
				
			||||||
 | 
						}, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,263 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"reflect"
 | 
				
			||||||
 | 
						"testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api/resource"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestLeastRequested(t *testing.T) {
 | 
				
			||||||
 | 
						labels1 := map[string]string{
 | 
				
			||||||
 | 
							"foo": "bar",
 | 
				
			||||||
 | 
							"baz": "blah",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						labels2 := map[string]string{
 | 
				
			||||||
 | 
							"bar": "foo",
 | 
				
			||||||
 | 
							"baz": "blah",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						machine1Spec := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine1",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						machine2Spec := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine2",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						noResources := api.PodSpec{
 | 
				
			||||||
 | 
							Containers: []api.Container{},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						cpuOnly := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine1",
 | 
				
			||||||
 | 
							Containers: []api.Container{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("1000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("0"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("2000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("0"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						cpuOnly2 := cpuOnly
 | 
				
			||||||
 | 
						cpuOnly2.NodeName = "machine2"
 | 
				
			||||||
 | 
						cpuAndMemory := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine2",
 | 
				
			||||||
 | 
							Containers: []api.Container{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("1000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("2000"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("2000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("3000"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						tests := []struct {
 | 
				
			||||||
 | 
							pod          *api.Pod
 | 
				
			||||||
 | 
							pods         []*api.Pod
 | 
				
			||||||
 | 
							nodes        []*api.Node
 | 
				
			||||||
 | 
							expectedList schedulerapi.HostPriorityList
 | 
				
			||||||
 | 
							test         string
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores (remaining resources) on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((4000 - 0) *10) / 4000 = 10
 | 
				
			||||||
 | 
									Memory Score: ((10000 - 0) *10) / 10000 = 10
 | 
				
			||||||
 | 
									Node1 Score: (10 + 10) / 2 = 10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores (remaining resources) on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((4000 - 0) *10) / 4000 = 10
 | 
				
			||||||
 | 
									Memory Score: ((10000 - 0) *10) / 10000 = 10
 | 
				
			||||||
 | 
									Node2 Score: (10 + 10) / 2 = 10
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
 | 
				
			||||||
 | 
								test:         "nothing scheduled, nothing requested",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((4000 - 3000) *10) / 4000 = 2.5
 | 
				
			||||||
 | 
									Memory Score: ((10000 - 5000) *10) / 10000 = 5
 | 
				
			||||||
 | 
									Node1 Score: (2.5 + 5) / 2 = 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((6000 - 3000) *10) / 6000 = 5
 | 
				
			||||||
 | 
									Memory Score: ((10000 - 5000) *10) / 10000 = 5
 | 
				
			||||||
 | 
									Node2 Score: (5 + 5) / 2 = 5
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}},
 | 
				
			||||||
 | 
								test:         "nothing scheduled, resources requested, differently sized machines",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((4000 - 0) *10) / 4000 = 10
 | 
				
			||||||
 | 
									Memory Score: ((10000 - 0) *10) / 10000 = 10
 | 
				
			||||||
 | 
									Node1 Score: (10 + 10) / 2 = 10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((4000 - 0) *10) / 4000 = 10
 | 
				
			||||||
 | 
									Memory Score: ((10000 - 0) *10) / 10000 = 10
 | 
				
			||||||
 | 
									Node2 Score: (10 + 10) / 2 = 10
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
 | 
				
			||||||
 | 
								test:         "no resources requested, pods scheduled",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
 | 
				
			||||||
 | 
									{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((10000 - 6000) *10) / 10000 = 4
 | 
				
			||||||
 | 
									Memory Score: ((20000 - 0) *10) / 20000 = 10
 | 
				
			||||||
 | 
									Node1 Score: (4 + 10) / 2 = 7
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((10000 - 6000) *10) / 10000 = 4
 | 
				
			||||||
 | 
									Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
 | 
				
			||||||
 | 
									Node2 Score: (4 + 7.5) / 2 = 5
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}},
 | 
				
			||||||
 | 
								test:         "no resources requested, pods scheduled with resources",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
 | 
				
			||||||
 | 
									{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((10000 - 6000) *10) / 10000 = 4
 | 
				
			||||||
 | 
									Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
 | 
				
			||||||
 | 
									Node1 Score: (4 + 7.5) / 2 = 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((10000 - 6000) *10) / 10000 = 4
 | 
				
			||||||
 | 
									Memory Score: ((20000 - 10000) *10) / 20000 = 5
 | 
				
			||||||
 | 
									Node2 Score: (4 + 5) / 2 = 4
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}},
 | 
				
			||||||
 | 
								test:         "resources requested, pods scheduled with resources",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((10000 - 6000) *10) / 10000 = 4
 | 
				
			||||||
 | 
									Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
 | 
				
			||||||
 | 
									Node1 Score: (4 + 7.5) / 2 = 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((10000 - 6000) *10) / 10000 = 4
 | 
				
			||||||
 | 
									Memory Score: ((50000 - 10000) *10) / 50000 = 8
 | 
				
			||||||
 | 
									Node2 Score: (4 + 8) / 2 = 6
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}},
 | 
				
			||||||
 | 
								test:         "resources requested, pods scheduled with resources, differently sized machines",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((4000 - 6000) *10) / 4000 = 0
 | 
				
			||||||
 | 
									Memory Score: ((10000 - 0) *10) / 10000 = 10
 | 
				
			||||||
 | 
									Node1 Score: (0 + 10) / 2 = 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: ((4000 - 6000) *10) / 4000 = 0
 | 
				
			||||||
 | 
									Memory Score: ((10000 - 5000) *10) / 10000 = 5
 | 
				
			||||||
 | 
									Node2 Score: (0 + 5) / 2 = 2
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuOnly},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}},
 | 
				
			||||||
 | 
								test:         "requested resources exceed node capacity",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
 | 
				
			||||||
 | 
								test:         "zero node resources, pods scheduled with resources",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, test := range tests {
 | 
				
			||||||
 | 
							nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
 | 
				
			||||||
 | 
							list, err := priorityFunction(LeastRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								t.Errorf("unexpected error: %v", err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if !reflect.DeepEqual(test.expectedList, list) {
 | 
				
			||||||
 | 
								t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										94
									
								
								plugin/pkg/scheduler/algorithm/priorities/most_requested.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								plugin/pkg/scheduler/algorithm/priorities/most_requested.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,94 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"github.com/golang/glog"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// MostRequestedPriority is a priority function that favors nodes with most requested resources.
 | 
				
			||||||
 | 
					// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
 | 
				
			||||||
 | 
					// based on the maximum of the average of the fraction of requested to capacity.
 | 
				
			||||||
 | 
					// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
 | 
				
			||||||
 | 
					func MostRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
				
			||||||
 | 
						var nonZeroRequest *schedulercache.Resource
 | 
				
			||||||
 | 
						if priorityMeta, ok := meta.(*priorityMetadata); ok {
 | 
				
			||||||
 | 
							nonZeroRequest = priorityMeta.nonZeroRequest
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							// We couldn't parse metadatat - fallback to computing it.
 | 
				
			||||||
 | 
							nonZeroRequest = getNonZeroRequests(pod)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// The used capacity is calculated on a scale of 0-10
 | 
				
			||||||
 | 
					// 0 being the lowest priority and 10 being the highest.
 | 
				
			||||||
 | 
					// The more resources are used the higher the score is. This function
 | 
				
			||||||
 | 
					// is almost a reversed version of least_requested_priority.calculatUnusedScore
 | 
				
			||||||
 | 
					// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
 | 
				
			||||||
 | 
					// keep the final formula clean and not to modify the widely used (by users
 | 
				
			||||||
 | 
					// in their default scheduling policies) calculateUSedScore.
 | 
				
			||||||
 | 
					func calculateUsedScore(requested int64, capacity int64, node string) int64 {
 | 
				
			||||||
 | 
						if capacity == 0 {
 | 
				
			||||||
 | 
							return 0
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if requested > capacity {
 | 
				
			||||||
 | 
							glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
 | 
				
			||||||
 | 
								requested, capacity, node)
 | 
				
			||||||
 | 
							return 0
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return (requested * 10) / capacity
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Calculate the resource used on a node.  'node' has information about the resources on the node.
 | 
				
			||||||
 | 
					// 'pods' is a list of pods currently scheduled on the node.
 | 
				
			||||||
 | 
					func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
				
			||||||
 | 
						node := nodeInfo.Node()
 | 
				
			||||||
 | 
						if node == nil {
 | 
				
			||||||
 | 
							return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						allocatableResources := nodeInfo.AllocatableResource()
 | 
				
			||||||
 | 
						totalResources := *podRequests
 | 
				
			||||||
 | 
						totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 | 
				
			||||||
 | 
						totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
 | 
				
			||||||
 | 
						memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
 | 
				
			||||||
 | 
						if glog.V(10) {
 | 
				
			||||||
 | 
							// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 | 
				
			||||||
 | 
							// not logged. There is visible performance gain from it.
 | 
				
			||||||
 | 
							glog.V(10).Infof(
 | 
				
			||||||
 | 
								"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
 | 
				
			||||||
 | 
								pod.Name, node.Name,
 | 
				
			||||||
 | 
								allocatableResources.MilliCPU, allocatableResources.Memory,
 | 
				
			||||||
 | 
								totalResources.MilliCPU, totalResources.Memory,
 | 
				
			||||||
 | 
								cpuScore, memoryScore,
 | 
				
			||||||
 | 
							)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return schedulerapi.HostPriority{
 | 
				
			||||||
 | 
							Host:  node.Name,
 | 
				
			||||||
 | 
							Score: int((cpuScore + memoryScore) / 2),
 | 
				
			||||||
 | 
						}, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										182
									
								
								plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										182
									
								
								plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,182 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"reflect"
 | 
				
			||||||
 | 
						"testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api/resource"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestMostRequested(t *testing.T) {
 | 
				
			||||||
 | 
						labels1 := map[string]string{
 | 
				
			||||||
 | 
							"foo": "bar",
 | 
				
			||||||
 | 
							"baz": "blah",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						labels2 := map[string]string{
 | 
				
			||||||
 | 
							"bar": "foo",
 | 
				
			||||||
 | 
							"baz": "blah",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						noResources := api.PodSpec{
 | 
				
			||||||
 | 
							Containers: []api.Container{},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						cpuOnly := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine1",
 | 
				
			||||||
 | 
							Containers: []api.Container{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("1000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("0"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("2000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("0"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						cpuOnly2 := cpuOnly
 | 
				
			||||||
 | 
						cpuOnly2.NodeName = "machine2"
 | 
				
			||||||
 | 
						cpuAndMemory := api.PodSpec{
 | 
				
			||||||
 | 
							NodeName: "machine2",
 | 
				
			||||||
 | 
							Containers: []api.Container{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("1000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("2000"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Resources: api.ResourceRequirements{
 | 
				
			||||||
 | 
										Requests: api.ResourceList{
 | 
				
			||||||
 | 
											"cpu":    resource.MustParse("2000m"),
 | 
				
			||||||
 | 
											"memory": resource.MustParse("3000"),
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						tests := []struct {
 | 
				
			||||||
 | 
							pod          *api.Pod
 | 
				
			||||||
 | 
							pods         []*api.Pod
 | 
				
			||||||
 | 
							nodes        []*api.Node
 | 
				
			||||||
 | 
							expectedList schedulerapi.HostPriorityList
 | 
				
			||||||
 | 
							test         string
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores (used resources) on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: (0 * 10  / 4000 = 0
 | 
				
			||||||
 | 
									Memory Score: (0 * 10) / 10000 = 0
 | 
				
			||||||
 | 
									Node1 Score: (0 + 0) / 2 = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores (used resources) on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: (0 * 10 / 4000 = 0
 | 
				
			||||||
 | 
									Memory Score: (0 * 10 / 10000 = 0
 | 
				
			||||||
 | 
									Node2 Score: (0 + 0) / 2 = 0
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
 | 
				
			||||||
 | 
								test:         "nothing scheduled, nothing requested",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: (3000 * 10 / 4000 = 7.5
 | 
				
			||||||
 | 
									Memory Score: (5000 * 10) / 10000 = 5
 | 
				
			||||||
 | 
									Node1 Score: (7.5 + 5) / 2 = 6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: (3000 * 10 / 6000 = 5
 | 
				
			||||||
 | 
									Memory Score: (5000 * 10 / 10000 = 5
 | 
				
			||||||
 | 
									Node2 Score: (5 + 5) / 2 = 5
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}},
 | 
				
			||||||
 | 
								test:         "nothing scheduled, resources requested, differently sized machines",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: (6000 * 10) / 10000 = 6
 | 
				
			||||||
 | 
									Memory Score: (0 * 10) / 20000 = 10
 | 
				
			||||||
 | 
									Node1 Score: (6 + 0) / 2 = 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: (6000 * 10) / 10000 = 6
 | 
				
			||||||
 | 
									Memory Score: (5000 * 10) / 20000 = 2.5
 | 
				
			||||||
 | 
									Node2 Score: (6 + 2.5) / 2 = 4
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: noResources},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}},
 | 
				
			||||||
 | 
								test:         "no resources requested, pods scheduled with resources",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
 | 
				
			||||||
 | 
									{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
									Node1 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: (6000 * 10) / 10000 = 6
 | 
				
			||||||
 | 
									Memory Score: (5000 * 10) / 20000 = 2.5
 | 
				
			||||||
 | 
									Node1 Score: (6 + 2.5) / 2 = 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									Node2 scores on 0-10 scale
 | 
				
			||||||
 | 
									CPU Score: (6000 * 10) / 10000 = 6
 | 
				
			||||||
 | 
									Memory Score: (10000 * 10) / 20000 = 5
 | 
				
			||||||
 | 
									Node2 Score: (6 + 5) / 2 = 5
 | 
				
			||||||
 | 
								*/
 | 
				
			||||||
 | 
								pod:          &api.Pod{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}},
 | 
				
			||||||
 | 
								test:         "resources requested, pods scheduled with resources",
 | 
				
			||||||
 | 
								pods: []*api.Pod{
 | 
				
			||||||
 | 
									{Spec: cpuOnly},
 | 
				
			||||||
 | 
									{Spec: cpuAndMemory},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, test := range tests {
 | 
				
			||||||
 | 
							nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
 | 
				
			||||||
 | 
							list, err := priorityFunction(MostRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								t.Errorf("unexpected error: %v", err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if !reflect.DeepEqual(test.expectedList, list) {
 | 
				
			||||||
 | 
								t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										60
									
								
								plugin/pkg/scheduler/algorithm/priorities/node_label.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								plugin/pkg/scheduler/algorithm/priorities/node_label.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,60 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/labels"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type NodeLabelPrioritizer struct {
 | 
				
			||||||
 | 
						label    string
 | 
				
			||||||
 | 
						presence bool
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
 | 
				
			||||||
 | 
						labelPrioritizer := &NodeLabelPrioritizer{
 | 
				
			||||||
 | 
							label:    label,
 | 
				
			||||||
 | 
							presence: presence,
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
 | 
				
			||||||
 | 
					// If presence is true, prioritizes nodes that have the specified label, regardless of value.
 | 
				
			||||||
 | 
					// If presence is false, prioritizes nodes that do not have the specified label.
 | 
				
			||||||
 | 
					func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
				
			||||||
 | 
						node := nodeInfo.Node()
 | 
				
			||||||
 | 
						if node == nil {
 | 
				
			||||||
 | 
							return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						exists := labels.Set(node.Labels).Has(n.label)
 | 
				
			||||||
 | 
						score := 0
 | 
				
			||||||
 | 
						if (exists && n.presence) || (!exists && !n.presence) {
 | 
				
			||||||
 | 
							score = 10
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return schedulerapi.HostPriority{
 | 
				
			||||||
 | 
							Host:  node.Name,
 | 
				
			||||||
 | 
							Score: score,
 | 
				
			||||||
 | 
						}, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										121
									
								
								plugin/pkg/scheduler/algorithm/priorities/node_label_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								plugin/pkg/scheduler/algorithm/priorities/node_label_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,121 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"reflect"
 | 
				
			||||||
 | 
						"sort"
 | 
				
			||||||
 | 
						"testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestNewNodeLabelPriority(t *testing.T) {
 | 
				
			||||||
 | 
						label1 := map[string]string{"foo": "bar"}
 | 
				
			||||||
 | 
						label2 := map[string]string{"bar": "foo"}
 | 
				
			||||||
 | 
						label3 := map[string]string{"bar": "baz"}
 | 
				
			||||||
 | 
						tests := []struct {
 | 
				
			||||||
 | 
							nodes        []*api.Node
 | 
				
			||||||
 | 
							label        string
 | 
				
			||||||
 | 
							presence     bool
 | 
				
			||||||
 | 
							expectedList schedulerapi.HostPriorityList
 | 
				
			||||||
 | 
							test         string
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								nodes: []*api.Node{
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
 | 
				
			||||||
 | 
								label:        "baz",
 | 
				
			||||||
 | 
								presence:     true,
 | 
				
			||||||
 | 
								test:         "no match found, presence true",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								nodes: []*api.Node{
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
 | 
				
			||||||
 | 
								label:        "baz",
 | 
				
			||||||
 | 
								presence:     false,
 | 
				
			||||||
 | 
								test:         "no match found, presence false",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								nodes: []*api.Node{
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
 | 
				
			||||||
 | 
								label:        "foo",
 | 
				
			||||||
 | 
								presence:     true,
 | 
				
			||||||
 | 
								test:         "one match found, presence true",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								nodes: []*api.Node{
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
 | 
				
			||||||
 | 
								label:        "foo",
 | 
				
			||||||
 | 
								presence:     false,
 | 
				
			||||||
 | 
								test:         "one match found, presence false",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								nodes: []*api.Node{
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
 | 
				
			||||||
 | 
								label:        "bar",
 | 
				
			||||||
 | 
								presence:     true,
 | 
				
			||||||
 | 
								test:         "two matches found, presence true",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								nodes: []*api.Node{
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
 | 
				
			||||||
 | 
									{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
 | 
				
			||||||
 | 
								label:        "bar",
 | 
				
			||||||
 | 
								presence:     false,
 | 
				
			||||||
 | 
								test:         "two matches found, presence false",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, test := range tests {
 | 
				
			||||||
 | 
							nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
 | 
				
			||||||
 | 
							list, err := priorityFunction(NewNodeLabelPriority(test.label, test.presence))(nil, nodeNameToInfo, test.nodes)
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								t.Errorf("unexpected error: %v", err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							// sort the two lists to avoid failures on account of different ordering
 | 
				
			||||||
 | 
							sort.Sort(test.expectedList)
 | 
				
			||||||
 | 
							sort.Sort(list)
 | 
				
			||||||
 | 
							if !reflect.DeepEqual(test.expectedList, list) {
 | 
				
			||||||
 | 
								t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,60 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2015 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func CalculateNodePreferAvoidPodsPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
				
			||||||
 | 
						node := nodeInfo.Node()
 | 
				
			||||||
 | 
						if node == nil {
 | 
				
			||||||
 | 
							return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						controllerRef := priorityutil.GetControllerRef(pod)
 | 
				
			||||||
 | 
						if controllerRef != nil {
 | 
				
			||||||
 | 
							// Ignore pods that are owned by other controller than ReplicationController
 | 
				
			||||||
 | 
							// or ReplicaSet.
 | 
				
			||||||
 | 
							if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
 | 
				
			||||||
 | 
								controllerRef = nil
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if controllerRef == nil {
 | 
				
			||||||
 | 
							return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						avoids, err := api.GetAvoidPodsFromNodeAnnotations(node.Annotations)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							// If we cannot get annotation, assume it's schedulable there.
 | 
				
			||||||
 | 
							return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						for i := range avoids.PreferAvoidPods {
 | 
				
			||||||
 | 
							avoid := &avoids.PreferAvoidPods[i]
 | 
				
			||||||
 | 
							if controllerRef != nil {
 | 
				
			||||||
 | 
								if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
 | 
				
			||||||
 | 
									return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,155 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"reflect"
 | 
				
			||||||
 | 
						"sort"
 | 
				
			||||||
 | 
						"testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestNodePreferAvoidPriority(t *testing.T) {
 | 
				
			||||||
 | 
						annotations1 := map[string]string{
 | 
				
			||||||
 | 
							api.PreferAvoidPodsAnnotationKey: `
 | 
				
			||||||
 | 
												{
 | 
				
			||||||
 | 
												    "preferAvoidPods": [
 | 
				
			||||||
 | 
												        {
 | 
				
			||||||
 | 
												            "podSignature": {
 | 
				
			||||||
 | 
												                "podController": {
 | 
				
			||||||
 | 
												                    "apiVersion": "v1",
 | 
				
			||||||
 | 
												                    "kind": "ReplicationController",
 | 
				
			||||||
 | 
												                    "name": "foo",
 | 
				
			||||||
 | 
												                    "uid": "abcdef123456",
 | 
				
			||||||
 | 
												                    "controller": true
 | 
				
			||||||
 | 
												                }
 | 
				
			||||||
 | 
												            },
 | 
				
			||||||
 | 
												            "reason": "some reason",
 | 
				
			||||||
 | 
												            "message": "some message"
 | 
				
			||||||
 | 
												        }
 | 
				
			||||||
 | 
												    ]
 | 
				
			||||||
 | 
												}`,
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						annotations2 := map[string]string{
 | 
				
			||||||
 | 
							api.PreferAvoidPodsAnnotationKey: `
 | 
				
			||||||
 | 
												{
 | 
				
			||||||
 | 
												    "preferAvoidPods": [
 | 
				
			||||||
 | 
												        {
 | 
				
			||||||
 | 
												            "podSignature": {
 | 
				
			||||||
 | 
												                "podController": {
 | 
				
			||||||
 | 
												                    "apiVersion": "v1",
 | 
				
			||||||
 | 
												                    "kind": "ReplicaSet",
 | 
				
			||||||
 | 
												                    "name": "foo",
 | 
				
			||||||
 | 
												                    "uid": "qwert12345",
 | 
				
			||||||
 | 
												                    "controller": true
 | 
				
			||||||
 | 
												                }
 | 
				
			||||||
 | 
												            },
 | 
				
			||||||
 | 
												            "reason": "some reason",
 | 
				
			||||||
 | 
												            "message": "some message"
 | 
				
			||||||
 | 
												        }
 | 
				
			||||||
 | 
												    ]
 | 
				
			||||||
 | 
												}`,
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						testNodes := []*api.Node{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								ObjectMeta: api.ObjectMeta{Name: "machine1", Annotations: annotations1},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								ObjectMeta: api.ObjectMeta{Name: "machine2", Annotations: annotations2},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								ObjectMeta: api.ObjectMeta{Name: "machine3"},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						trueVar := true
 | 
				
			||||||
 | 
						tests := []struct {
 | 
				
			||||||
 | 
							pod          *api.Pod
 | 
				
			||||||
 | 
							nodes        []*api.Node
 | 
				
			||||||
 | 
							expectedList schedulerapi.HostPriorityList
 | 
				
			||||||
 | 
							test         string
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								pod: &api.Pod{
 | 
				
			||||||
 | 
									ObjectMeta: api.ObjectMeta{
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										OwnerReferences: []api.OwnerReference{
 | 
				
			||||||
 | 
											{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								nodes:        testNodes,
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
 | 
				
			||||||
 | 
								test:         "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								pod: &api.Pod{
 | 
				
			||||||
 | 
									ObjectMeta: api.ObjectMeta{
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										OwnerReferences: []api.OwnerReference{
 | 
				
			||||||
 | 
											{Kind: "RandomController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								nodes:        testNodes,
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
 | 
				
			||||||
 | 
								test:         "ownership by random controller should be ignored",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								pod: &api.Pod{
 | 
				
			||||||
 | 
									ObjectMeta: api.ObjectMeta{
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										OwnerReferences: []api.OwnerReference{
 | 
				
			||||||
 | 
											{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456"},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								nodes:        testNodes,
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
 | 
				
			||||||
 | 
								test:         "owner without Controller field set should be ignored",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								pod: &api.Pod{
 | 
				
			||||||
 | 
									ObjectMeta: api.ObjectMeta{
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										OwnerReferences: []api.OwnerReference{
 | 
				
			||||||
 | 
											{Kind: "ReplicaSet", Name: "foo", UID: "qwert12345", Controller: &trueVar},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								nodes:        testNodes,
 | 
				
			||||||
 | 
								expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 10}},
 | 
				
			||||||
 | 
								test:         "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, test := range tests {
 | 
				
			||||||
 | 
							nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
 | 
				
			||||||
 | 
							list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								t.Errorf("unexpected error: %v", err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							// sort the two lists to avoid failures on account of different ordering
 | 
				
			||||||
 | 
							sort.Sort(test.expectedList)
 | 
				
			||||||
 | 
							sort.Sort(list)
 | 
				
			||||||
 | 
							if !reflect.DeepEqual(test.expectedList, list) {
 | 
				
			||||||
 | 
								t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -1,367 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
Copyright 2014 The Kubernetes Authors.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
					 | 
				
			||||||
you may not use this file except in compliance with the License.
 | 
					 | 
				
			||||||
You may obtain a copy of the License at
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Unless required by applicable law or agreed to in writing, software
 | 
					 | 
				
			||||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
					 | 
				
			||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					 | 
				
			||||||
See the License for the specific language governing permissions and
 | 
					 | 
				
			||||||
limitations under the License.
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
package priorities
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import (
 | 
					 | 
				
			||||||
	"fmt"
 | 
					 | 
				
			||||||
	"math"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	"github.com/golang/glog"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/pkg/api"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/pkg/labels"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 | 
					 | 
				
			||||||
	priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 | 
					 | 
				
			||||||
	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
 | 
					 | 
				
			||||||
	result := &schedulercache.Resource{}
 | 
					 | 
				
			||||||
	for i := range pod.Spec.Containers {
 | 
					 | 
				
			||||||
		container := &pod.Spec.Containers[i]
 | 
					 | 
				
			||||||
		cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
 | 
					 | 
				
			||||||
		result.MilliCPU += cpu
 | 
					 | 
				
			||||||
		result.Memory += memory
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return result
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// The unused capacity is calculated on a scale of 0-10
 | 
					 | 
				
			||||||
// 0 being the lowest priority and 10 being the highest.
 | 
					 | 
				
			||||||
// The more unused resources the higher the score is.
 | 
					 | 
				
			||||||
func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
 | 
					 | 
				
			||||||
	if capacity == 0 {
 | 
					 | 
				
			||||||
		return 0
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if requested > capacity {
 | 
					 | 
				
			||||||
		glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
 | 
					 | 
				
			||||||
			requested, capacity, node)
 | 
					 | 
				
			||||||
		return 0
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return ((capacity - requested) * 10) / capacity
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// The used capacity is calculated on a scale of 0-10
 | 
					 | 
				
			||||||
// 0 being the lowest priority and 10 being the highest.
 | 
					 | 
				
			||||||
// The more resources are used the higher the score is. This function
 | 
					 | 
				
			||||||
// is almost a reversed version of calculatUnusedScore (10 - calculateUnusedScore).
 | 
					 | 
				
			||||||
// The main difference is in rounding. It was added to keep the
 | 
					 | 
				
			||||||
// final formula clean and not to modify the widely used (by users
 | 
					 | 
				
			||||||
// in their default scheduling policies) calculateUSedScore.
 | 
					 | 
				
			||||||
func calculateUsedScore(requested int64, capacity int64, node string) int64 {
 | 
					 | 
				
			||||||
	if capacity == 0 {
 | 
					 | 
				
			||||||
		return 0
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if requested > capacity {
 | 
					 | 
				
			||||||
		glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
 | 
					 | 
				
			||||||
			requested, capacity, node)
 | 
					 | 
				
			||||||
		return 0
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return (requested * 10) / capacity
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Calculates host priority based on the amount of unused resources.
 | 
					 | 
				
			||||||
// 'node' has information about the resources on the node.
 | 
					 | 
				
			||||||
// 'pods' is a list of pods currently scheduled on the node.
 | 
					 | 
				
			||||||
func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
					 | 
				
			||||||
	node := nodeInfo.Node()
 | 
					 | 
				
			||||||
	if node == nil {
 | 
					 | 
				
			||||||
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	allocatableResources := nodeInfo.AllocatableResource()
 | 
					 | 
				
			||||||
	totalResources := *podRequests
 | 
					 | 
				
			||||||
	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 | 
					 | 
				
			||||||
	totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
 | 
					 | 
				
			||||||
	memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
 | 
					 | 
				
			||||||
	if glog.V(10) {
 | 
					 | 
				
			||||||
		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 | 
					 | 
				
			||||||
		// not logged. There is visible performance gain from it.
 | 
					 | 
				
			||||||
		glog.V(10).Infof(
 | 
					 | 
				
			||||||
			"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
 | 
					 | 
				
			||||||
			pod.Name, node.Name,
 | 
					 | 
				
			||||||
			allocatableResources.MilliCPU, allocatableResources.Memory,
 | 
					 | 
				
			||||||
			totalResources.MilliCPU, totalResources.Memory,
 | 
					 | 
				
			||||||
			cpuScore, memoryScore,
 | 
					 | 
				
			||||||
		)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return schedulerapi.HostPriority{
 | 
					 | 
				
			||||||
		Host:  node.Name,
 | 
					 | 
				
			||||||
		Score: int((cpuScore + memoryScore) / 2),
 | 
					 | 
				
			||||||
	}, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Calculate the resource used on a node.  'node' has information about the resources on the node.
 | 
					 | 
				
			||||||
// 'pods' is a list of pods currently scheduled on the node.
 | 
					 | 
				
			||||||
func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
					 | 
				
			||||||
	node := nodeInfo.Node()
 | 
					 | 
				
			||||||
	if node == nil {
 | 
					 | 
				
			||||||
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	allocatableResources := nodeInfo.AllocatableResource()
 | 
					 | 
				
			||||||
	totalResources := *podRequests
 | 
					 | 
				
			||||||
	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 | 
					 | 
				
			||||||
	totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
 | 
					 | 
				
			||||||
	memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
 | 
					 | 
				
			||||||
	if glog.V(10) {
 | 
					 | 
				
			||||||
		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 | 
					 | 
				
			||||||
		// not logged. There is visible performance gain from it.
 | 
					 | 
				
			||||||
		glog.V(10).Infof(
 | 
					 | 
				
			||||||
			"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
 | 
					 | 
				
			||||||
			pod.Name, node.Name,
 | 
					 | 
				
			||||||
			allocatableResources.MilliCPU, allocatableResources.Memory,
 | 
					 | 
				
			||||||
			totalResources.MilliCPU, totalResources.Memory,
 | 
					 | 
				
			||||||
			cpuScore, memoryScore,
 | 
					 | 
				
			||||||
		)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return schedulerapi.HostPriority{
 | 
					 | 
				
			||||||
		Host:  node.Name,
 | 
					 | 
				
			||||||
		Score: int((cpuScore + memoryScore) / 2),
 | 
					 | 
				
			||||||
	}, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
 | 
					 | 
				
			||||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
 | 
					 | 
				
			||||||
// based on the minimum of the average of the fraction of requested to capacity.
 | 
					 | 
				
			||||||
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
 | 
					 | 
				
			||||||
func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
					 | 
				
			||||||
	var nonZeroRequest *schedulercache.Resource
 | 
					 | 
				
			||||||
	if priorityMeta, ok := meta.(*priorityMetadata); ok {
 | 
					 | 
				
			||||||
		nonZeroRequest = priorityMeta.nonZeroRequest
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		// We couldn't parse metadata - fallback to computing it.
 | 
					 | 
				
			||||||
		nonZeroRequest = getNonZeroRequests(pod)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
 | 
					 | 
				
			||||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
 | 
					 | 
				
			||||||
// based on the maximum of the average of the fraction of requested to capacity.
 | 
					 | 
				
			||||||
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
 | 
					 | 
				
			||||||
func MostRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
					 | 
				
			||||||
	var nonZeroRequest *schedulercache.Resource
 | 
					 | 
				
			||||||
	if priorityMeta, ok := meta.(*priorityMetadata); ok {
 | 
					 | 
				
			||||||
		nonZeroRequest = priorityMeta.nonZeroRequest
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		// We couldn't parse metadatat - fallback to computing it.
 | 
					 | 
				
			||||||
		nonZeroRequest = getNonZeroRequests(pod)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
type NodeLabelPrioritizer struct {
 | 
					 | 
				
			||||||
	label    string
 | 
					 | 
				
			||||||
	presence bool
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
 | 
					 | 
				
			||||||
	labelPrioritizer := &NodeLabelPrioritizer{
 | 
					 | 
				
			||||||
		label:    label,
 | 
					 | 
				
			||||||
		presence: presence,
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
 | 
					 | 
				
			||||||
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
 | 
					 | 
				
			||||||
// If presence is false, prioritizes nodes that do not have the specified label.
 | 
					 | 
				
			||||||
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
					 | 
				
			||||||
	node := nodeInfo.Node()
 | 
					 | 
				
			||||||
	if node == nil {
 | 
					 | 
				
			||||||
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	exists := labels.Set(node.Labels).Has(n.label)
 | 
					 | 
				
			||||||
	score := 0
 | 
					 | 
				
			||||||
	if (exists && n.presence) || (!exists && !n.presence) {
 | 
					 | 
				
			||||||
		score = 10
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return schedulerapi.HostPriority{
 | 
					 | 
				
			||||||
		Host:  node.Name,
 | 
					 | 
				
			||||||
		Score: score,
 | 
					 | 
				
			||||||
	}, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
 | 
					 | 
				
			||||||
const (
 | 
					 | 
				
			||||||
	mb         int64 = 1024 * 1024
 | 
					 | 
				
			||||||
	minImgSize int64 = 23 * mb
 | 
					 | 
				
			||||||
	maxImgSize int64 = 1000 * mb
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
 | 
					 | 
				
			||||||
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
 | 
					 | 
				
			||||||
// based on the total size of those images.
 | 
					 | 
				
			||||||
// - If none of the images are present, this node will be given the lowest priority.
 | 
					 | 
				
			||||||
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
 | 
					 | 
				
			||||||
func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
					 | 
				
			||||||
	node := nodeInfo.Node()
 | 
					 | 
				
			||||||
	if node == nil {
 | 
					 | 
				
			||||||
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	var sumSize int64
 | 
					 | 
				
			||||||
	for i := range pod.Spec.Containers {
 | 
					 | 
				
			||||||
		sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return schedulerapi.HostPriority{
 | 
					 | 
				
			||||||
		Host:  node.Name,
 | 
					 | 
				
			||||||
		Score: calculateScoreFromSize(sumSize),
 | 
					 | 
				
			||||||
	}, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
 | 
					 | 
				
			||||||
func checkContainerImageOnNode(node *api.Node, container *api.Container) int64 {
 | 
					 | 
				
			||||||
	for _, image := range node.Status.Images {
 | 
					 | 
				
			||||||
		for _, name := range image.Names {
 | 
					 | 
				
			||||||
			if container.Image == name {
 | 
					 | 
				
			||||||
				// Should return immediately.
 | 
					 | 
				
			||||||
				return image.SizeBytes
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return 0
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
 | 
					 | 
				
			||||||
// 1. Split image size range into 10 buckets.
 | 
					 | 
				
			||||||
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
 | 
					 | 
				
			||||||
func calculateScoreFromSize(sumSize int64) int {
 | 
					 | 
				
			||||||
	var score int
 | 
					 | 
				
			||||||
	switch {
 | 
					 | 
				
			||||||
	case sumSize == 0 || sumSize < minImgSize:
 | 
					 | 
				
			||||||
		// score == 0 means none of the images required by this pod are present on this
 | 
					 | 
				
			||||||
		// node or the total size of the images present is too small to be taken into further consideration.
 | 
					 | 
				
			||||||
		score = 0
 | 
					 | 
				
			||||||
	// If existing images' total size is larger than max, just make it highest priority.
 | 
					 | 
				
			||||||
	case sumSize >= maxImgSize:
 | 
					 | 
				
			||||||
		score = 10
 | 
					 | 
				
			||||||
	default:
 | 
					 | 
				
			||||||
		score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	// Return which bucket the given size belongs to
 | 
					 | 
				
			||||||
	return score
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// BalancedResourceAllocation favors nodes with balanced resource usage rate.
 | 
					 | 
				
			||||||
// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
 | 
					 | 
				
			||||||
// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
 | 
					 | 
				
			||||||
// close the two metrics are to each other.
 | 
					 | 
				
			||||||
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
 | 
					 | 
				
			||||||
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
 | 
					 | 
				
			||||||
func BalancedResourceAllocationMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
					 | 
				
			||||||
	var nonZeroRequest *schedulercache.Resource
 | 
					 | 
				
			||||||
	if priorityMeta, ok := meta.(*priorityMetadata); ok {
 | 
					 | 
				
			||||||
		nonZeroRequest = priorityMeta.nonZeroRequest
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		// We couldn't parse metadatat - fallback to computing it.
 | 
					 | 
				
			||||||
		nonZeroRequest = getNonZeroRequests(pod)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
					 | 
				
			||||||
	node := nodeInfo.Node()
 | 
					 | 
				
			||||||
	if node == nil {
 | 
					 | 
				
			||||||
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	allocatableResources := nodeInfo.AllocatableResource()
 | 
					 | 
				
			||||||
	totalResources := *podRequests
 | 
					 | 
				
			||||||
	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 | 
					 | 
				
			||||||
	totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
 | 
					 | 
				
			||||||
	memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
 | 
					 | 
				
			||||||
	score := int(0)
 | 
					 | 
				
			||||||
	if cpuFraction >= 1 || memoryFraction >= 1 {
 | 
					 | 
				
			||||||
		// if requested >= capacity, the corresponding host should never be preferred.
 | 
					 | 
				
			||||||
		score = 0
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
 | 
					 | 
				
			||||||
		// respectively. Multilying the absolute value of the difference by 10 scales the value to
 | 
					 | 
				
			||||||
		// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
 | 
					 | 
				
			||||||
		// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
 | 
					 | 
				
			||||||
		diff := math.Abs(cpuFraction - memoryFraction)
 | 
					 | 
				
			||||||
		score = int(10 - diff*10)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if glog.V(10) {
 | 
					 | 
				
			||||||
		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 | 
					 | 
				
			||||||
		// not logged. There is visible performance gain from it.
 | 
					 | 
				
			||||||
		glog.V(10).Infof(
 | 
					 | 
				
			||||||
			"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
 | 
					 | 
				
			||||||
			pod.Name, node.Name,
 | 
					 | 
				
			||||||
			allocatableResources.MilliCPU, allocatableResources.Memory,
 | 
					 | 
				
			||||||
			totalResources.MilliCPU, totalResources.Memory,
 | 
					 | 
				
			||||||
			score,
 | 
					 | 
				
			||||||
		)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return schedulerapi.HostPriority{
 | 
					 | 
				
			||||||
		Host:  node.Name,
 | 
					 | 
				
			||||||
		Score: score,
 | 
					 | 
				
			||||||
	}, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func fractionOfCapacity(requested, capacity int64) float64 {
 | 
					 | 
				
			||||||
	if capacity == 0 {
 | 
					 | 
				
			||||||
		return 1
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return float64(requested) / float64(capacity)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func CalculateNodePreferAvoidPodsPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
					 | 
				
			||||||
	node := nodeInfo.Node()
 | 
					 | 
				
			||||||
	if node == nil {
 | 
					 | 
				
			||||||
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	controllerRef := priorityutil.GetControllerRef(pod)
 | 
					 | 
				
			||||||
	if controllerRef != nil {
 | 
					 | 
				
			||||||
		// Ignore pods that are owned by other controller than ReplicationController
 | 
					 | 
				
			||||||
		// or ReplicaSet.
 | 
					 | 
				
			||||||
		if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
 | 
					 | 
				
			||||||
			controllerRef = nil
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if controllerRef == nil {
 | 
					 | 
				
			||||||
		return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	avoids, err := api.GetAvoidPodsFromNodeAnnotations(node.Annotations)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		// If we cannot get annotation, assume it's schedulable there.
 | 
					 | 
				
			||||||
		return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	for i := range avoids.PreferAvoidPods {
 | 
					 | 
				
			||||||
		avoid := &avoids.PreferAvoidPods[i]
 | 
					 | 
				
			||||||
		if controllerRef != nil {
 | 
					 | 
				
			||||||
			if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
 | 
					 | 
				
			||||||
				return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										60
									
								
								plugin/pkg/scheduler/algorithm/priorities/test_util.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								plugin/pkg/scheduler/algorithm/priorities/test_util.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,60 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package priorities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api/resource"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 | 
				
			||||||
 | 
						schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func makeNode(node string, milliCPU, memory int64) *api.Node {
 | 
				
			||||||
 | 
						return &api.Node{
 | 
				
			||||||
 | 
							ObjectMeta: api.ObjectMeta{Name: node},
 | 
				
			||||||
 | 
							Status: api.NodeStatus{
 | 
				
			||||||
 | 
								Capacity: api.ResourceList{
 | 
				
			||||||
 | 
									"cpu":    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
 | 
				
			||||||
 | 
									"memory": *resource.NewQuantity(memory, resource.BinarySI),
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								Allocatable: api.ResourceList{
 | 
				
			||||||
 | 
									"cpu":    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
 | 
				
			||||||
 | 
									"memory": *resource.NewQuantity(memory, resource.BinarySI),
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction {
 | 
				
			||||||
 | 
						return func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
 | 
				
			||||||
 | 
							result := make(schedulerapi.HostPriorityList, 0, len(nodes))
 | 
				
			||||||
 | 
							for i := range nodes {
 | 
				
			||||||
 | 
								hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name])
 | 
				
			||||||
 | 
								if err != nil {
 | 
				
			||||||
 | 
									return nil, err
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								result = append(result, hostResult)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if reduceFn != nil {
 | 
				
			||||||
 | 
								if err := reduceFn(pod, result); err != nil {
 | 
				
			||||||
 | 
									return nil, err
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return result, nil
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user