Merge pull request #19083 from resouer/allocatable

Use Allocatable to replace Capacity
2025-11-03 19:58:17 +00:00 · 2016-01-21 16:05:05 -08:00
parent 69b28ce7e5 0202a206b8
commit 76f02d562a
5 changed files with 142 additions and 13 deletions
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go
@@ -254,9 +254,9 @@ func getResourceRequest(pod *api.Pod) resourceRequest {
 	return result
 }

-func CheckPodsExceedingFreeResources(pods []*api.Pod, capacity api.ResourceList) (fitting []*api.Pod, notFittingCPU, notFittingMemory []*api.Pod) {
-	totalMilliCPU := capacity.Cpu().MilliValue()
-	totalMemory := capacity.Memory().Value()
+func CheckPodsExceedingFreeResources(pods []*api.Pod, allocatable api.ResourceList) (fitting []*api.Pod, notFittingCPU, notFittingMemory []*api.Pod) {
+	totalMilliCPU := allocatable.Cpu().MilliValue()
+	totalMemory := allocatable.Memory().Value()
 	milliCPURequested := int64(0)
 	memoryRequested := int64(0)
 	for _, pod := range pods {
@@ -292,8 +292,9 @@ func (r *ResourceFit) PodFitsResources(pod *api.Pod, existingPods []*api.Pod, no
 		return false, err
 	}

-	if int64(len(existingPods))+1 > info.Status.Capacity.Pods().Value() {
-		glog.V(10).Infof("Cannot schedule Pod %+v, because Node %+v is full, running %v out of %v Pods.", podName(pod), node, len(existingPods), info.Status.Capacity.Pods().Value())
+	allocatable := info.Status.Allocatable
+	if int64(len(existingPods))+1 > allocatable.Pods().Value() {
+		glog.V(10).Infof("Cannot schedule Pod %+v, because Node %+v is full, running %v out of %v Pods.", podName(pod), node, len(existingPods), allocatable.Pods().Value())
 		return false, ErrExceededMaxPodNumber
 	}

@@ -303,7 +304,7 @@ func (r *ResourceFit) PodFitsResources(pod *api.Pod, existingPods []*api.Pod, no
 	}

 	pods := append(existingPods, pod)
-	_, exceedingCPU, exceedingMemory := CheckPodsExceedingFreeResources(pods, info.Status.Capacity)
+	_, exceedingCPU, exceedingMemory := CheckPodsExceedingFreeResources(pods, allocatable)
 	if len(exceedingCPU) > 0 {
 		glog.V(10).Infof("Cannot schedule Pod %+v, because Node %v does not have sufficient CPU", podName(pod), node)
 		return false, ErrInsufficientFreeCPU
@@ -312,7 +313,7 @@ func (r *ResourceFit) PodFitsResources(pod *api.Pod, existingPods []*api.Pod, no
 		glog.V(10).Infof("Cannot schedule Pod %+v, because Node %v does not have sufficient Memory", podName(pod), node)
 		return false, ErrInsufficientFreeMemory
 	}
-	glog.V(10).Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.", podName(pod), node, len(pods)-1, info.Status.Capacity.Pods().Value())
+	glog.V(10).Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.", podName(pod), node, len(pods)-1, allocatable.Pods().Value())
 	return true, nil
 }

--- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
@@ -54,6 +54,14 @@ func makeResources(milliCPU int64, memory int64, pods int64) api.NodeResources {
 	}
 }

+func makeAllocatableResources(milliCPU int64, memory int64, pods int64) api.ResourceList {
+	return api.ResourceList{
+		api.ResourceCPU:    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
+		api.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
+		api.ResourcePods:   *resource.NewQuantity(pods, resource.DecimalSI),
+	}
+}
+
 func newResourcePod(usage ...resourceRequest) *api.Pod {
 	containers := []api.Container{}
 	for _, req := range usage {
@@ -130,7 +138,7 @@ func TestPodFitsResources(t *testing.T) {
 	}

 	for _, test := range enoughPodsTests {
-		node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity}}
+		node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)}}

 		fit := ResourceFit{FakeNodeInfo(node)}
 		fits, err := fit.PodFitsResources(test.pod, test.existingPods, "machine")
@@ -178,7 +186,7 @@ func TestPodFitsResources(t *testing.T) {
 		},
 	}
 	for _, test := range notEnoughPodsTests {
-		node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1).Capacity}}
+		node := api.Node{Status: api.NodeStatus{Capacity: api.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 1)}}

 		fit := ResourceFit{FakeNodeInfo(node)}
 		fits, err := fit.PodFitsResources(test.pod, test.existingPods, "machine")
--- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go
@@ -76,8 +76,8 @@ func getNonzeroRequests(requests *api.ResourceList) (int64, int64) {
 func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) schedulerapi.HostPriority {
 	totalMilliCPU := int64(0)
 	totalMemory := int64(0)
-	capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
-	capacityMemory := node.Status.Capacity.Memory().Value()
+	capacityMilliCPU := node.Status.Allocatable.Cpu().MilliValue()
+	capacityMemory := node.Status.Allocatable.Memory().Value()

 	for _, existingPod := range pods {
 		for _, container := range existingPod.Spec.Containers {
@@ -208,8 +208,8 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap
 		totalMemory += memory
 	}

-	capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
-	capacityMemory := node.Status.Capacity.Memory().Value()
+	capacityMilliCPU := node.Status.Allocatable.Cpu().MilliValue()
+	capacityMemory := node.Status.Allocatable.Memory().Value()

 	cpuFraction := fractionOfCapacity(totalMilliCPU, capacityMilliCPU)
 	memoryFraction := fractionOfCapacity(totalMemory, capacityMemory)
--- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
@@ -38,6 +38,10 @@ func makeNode(node string, milliCPU, memory int64) api.Node {
 				"cpu":    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
 				"memory": *resource.NewQuantity(memory, resource.BinarySI),
 			},
+			Allocatable: api.ResourceList{
+				"cpu":    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
+				"memory": *resource.NewQuantity(memory, resource.BinarySI),
+			},
 		},
 	}
 }
--- a/test/integration/scheduler_test.go
+++ b/test/integration/scheduler_test.go
@@ -449,3 +449,119 @@ func createPod(name string, annotation map[string]string) *api.Pod {
 		},
 	}
 }
+
+// This test will verify scheduler can work well regardless of whether kubelet is allocatable aware or not.
+func TestAllocatable(t *testing.T) {
+	framework.DeleteAllEtcdKeys()
+
+	var m *master.Master
+	s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
+		m.Handler.ServeHTTP(w, req)
+	}))
+	defer s.Close()
+
+	masterConfig := framework.NewIntegrationTestMasterConfig()
+	m = master.New(masterConfig)
+
+	// 1. create and start default-scheduler
+	restClient := client.NewOrDie(&client.Config{Host: s.URL, GroupVersion: testapi.Default.GroupVersion()})
+
+	schedulerConfigFactory := factory.NewConfigFactory(restClient, api.DefaultSchedulerName)
+	schedulerConfig, err := schedulerConfigFactory.Create()
+	if err != nil {
+		t.Fatalf("Couldn't create scheduler config: %v", err)
+	}
+	eventBroadcaster := record.NewBroadcaster()
+	schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: api.DefaultSchedulerName})
+	eventBroadcaster.StartRecordingToSink(restClient.Events(""))
+	scheduler.New(schedulerConfig).Run()
+	// default-scheduler will be stopped later
+	defer close(schedulerConfig.StopEverything)
+
+	// 2. create a node without allocatable awareness
+	node := &api.Node{
+		ObjectMeta: api.ObjectMeta{Name: "node-allocatable-scheduler-test-node"},
+		Spec:       api.NodeSpec{Unschedulable: false},
+		Status: api.NodeStatus{
+			Capacity: api.ResourceList{
+				api.ResourcePods:   *resource.NewQuantity(32, resource.DecimalSI),
+				api.ResourceCPU:    *resource.NewMilliQuantity(30, resource.DecimalSI),
+				api.ResourceMemory: *resource.NewQuantity(30, resource.BinarySI),
+			},
+		},
+	}
+
+	allocNode, err := restClient.Nodes().Create(node)
+	if err != nil {
+		t.Fatalf("Failed to create node: %v", err)
+	}
+
+	// 3. create resource pod which requires less than Capacity
+	podResource := &api.Pod{
+		ObjectMeta: api.ObjectMeta{Name: "pod-test-allocatable"},
+		Spec: api.PodSpec{
+			Containers: []api.Container{
+				{
+					Name:  "container",
+					Image: "kubernetes/pause:go",
+					Resources: api.ResourceRequirements{
+						Requests: api.ResourceList{
+							api.ResourceCPU:    *resource.NewMilliQuantity(20, resource.DecimalSI),
+							api.ResourceMemory: *resource.NewQuantity(20, resource.BinarySI),
+						},
+					},
+				},
+			},
+		},
+	}
+
+	testAllocPod, err := restClient.Pods(api.NamespaceDefault).Create(podResource)
+	if err != nil {
+		t.Fatalf("Test allocatable unawareness failed to create pod: %v", err)
+	}
+
+	// 4. Test: this test pod should be scheduled since api-server will use Capacity as Allocatable
+	err = wait.Poll(time.Second, time.Second*5, podScheduled(restClient, testAllocPod.Namespace, testAllocPod.Name))
+	if err != nil {
+		t.Errorf("Test allocatable unawareness: %s Pod not scheduled: %v", testAllocPod.Name, err)
+	} else {
+		t.Logf("Test allocatable unawareness: %s Pod scheduled", testAllocPod.Name)
+	}
+
+	// 5. Change the node status to allocatable aware, note that Allocatable is less than Pod's requirement
+	allocNode.Status = api.NodeStatus{
+		Capacity: api.ResourceList{
+			api.ResourcePods:   *resource.NewQuantity(32, resource.DecimalSI),
+			api.ResourceCPU:    *resource.NewMilliQuantity(30, resource.DecimalSI),
+			api.ResourceMemory: *resource.NewQuantity(30, resource.BinarySI),
+		},
+		Allocatable: api.ResourceList{
+			api.ResourcePods:   *resource.NewQuantity(32, resource.DecimalSI),
+			api.ResourceCPU:    *resource.NewMilliQuantity(10, resource.DecimalSI),
+			api.ResourceMemory: *resource.NewQuantity(10, resource.BinarySI),
+		},
+	}
+
+	if _, err := restClient.Nodes().UpdateStatus(allocNode); err != nil {
+		t.Fatalf("Failed to update node with Status.Allocatable: %v", err)
+	}
+
+	if err := restClient.Pods(api.NamespaceDefault).Delete(podResource.Name, &api.DeleteOptions{}); err != nil {
+		t.Fatalf("Failed to remove first resource pod: %v", err)
+	}
+
+	// 6. Make another pod with different name, same resource request
+	podResource.ObjectMeta.Name = "pod-test-allocatable2"
+	testAllocPod2, err := restClient.Pods(api.NamespaceDefault).Create(podResource)
+	if err != nil {
+		t.Fatalf("Test allocatable awareness failed to create pod: %v", err)
+	}
+
+	// 7. Test: this test pod should not be scheduled since it request more than Allocatable
+	err = wait.Poll(time.Second, time.Second*5, podScheduled(restClient, testAllocPod2.Namespace, testAllocPod2.Name))
+	if err == nil {
+		t.Errorf("Test allocatable awareness: %s Pod got scheduled unexpectly, %v", testAllocPod2.Name, err)
+	} else {
+		t.Logf("Test allocatable awareness: %s Pod not scheduled as expected", testAllocPod2.Name)
+	}
+}