mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 02:08:13 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1248 lines
		
	
	
		
			58 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			1248 lines
		
	
	
		
			58 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
| Copyright 2021 The Kubernetes Authors.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License.
 | |
| */
 | |
| 
 | |
| package scheduler
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"fmt"
 | |
| 	"testing"
 | |
| 	"time"
 | |
| 
 | |
| 	v1 "k8s.io/api/core/v1"
 | |
| 	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
 | |
| 	apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
 | |
| 	"k8s.io/apimachinery/pkg/api/errors"
 | |
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | |
| 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 | |
| 	"k8s.io/apimachinery/pkg/runtime"
 | |
| 	"k8s.io/apimachinery/pkg/runtime/schema"
 | |
| 	"k8s.io/apimachinery/pkg/types"
 | |
| 	"k8s.io/apimachinery/pkg/util/sets"
 | |
| 	"k8s.io/apimachinery/pkg/util/uuid"
 | |
| 	"k8s.io/apimachinery/pkg/util/wait"
 | |
| 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | |
| 	"k8s.io/client-go/dynamic"
 | |
| 	"k8s.io/client-go/kubernetes"
 | |
| 	featuregatetesting "k8s.io/component-base/featuregate/testing"
 | |
| 	"k8s.io/klog/v2"
 | |
| 	configv1 "k8s.io/kube-scheduler/config/v1"
 | |
| 	apiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing"
 | |
| 	"k8s.io/kubernetes/pkg/features"
 | |
| 	"k8s.io/kubernetes/pkg/scheduler"
 | |
| 	configtesting "k8s.io/kubernetes/pkg/scheduler/apis/config/testing"
 | |
| 	"k8s.io/kubernetes/pkg/scheduler/framework"
 | |
| 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
 | |
| 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
 | |
| 	frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
 | |
| 	st "k8s.io/kubernetes/pkg/scheduler/testing"
 | |
| 	testfwk "k8s.io/kubernetes/test/integration/framework"
 | |
| 	testutils "k8s.io/kubernetes/test/integration/util"
 | |
| 	imageutils "k8s.io/kubernetes/test/utils/image"
 | |
| 	"k8s.io/utils/pointer"
 | |
| 	"k8s.io/utils/ptr"
 | |
| )
 | |
| 
 | |
| func TestSchedulingGates(t *testing.T) {
 | |
| 	tests := []struct {
 | |
| 		name     string
 | |
| 		pods     []*v1.Pod
 | |
| 		schedule []string
 | |
| 		delete   []string
 | |
| 		rmGates  []string
 | |
| 	}{
 | |
| 		{
 | |
| 			name: "regular pods",
 | |
| 			pods: []*v1.Pod{
 | |
| 				st.MakePod().Name("p1").Container("pause").Obj(),
 | |
| 				st.MakePod().Name("p2").Container("pause").Obj(),
 | |
| 			},
 | |
| 			schedule: []string{"p1", "p2"},
 | |
| 		},
 | |
| 		{
 | |
| 			name: "one pod carrying scheduling gates",
 | |
| 			pods: []*v1.Pod{
 | |
| 				st.MakePod().Name("p1").SchedulingGates([]string{"foo"}).Container("pause").Obj(),
 | |
| 				st.MakePod().Name("p2").Container("pause").Obj(),
 | |
| 			},
 | |
| 			schedule: []string{"p2"},
 | |
| 		},
 | |
| 		{
 | |
| 			name: "two pod carrying scheduling gates, and remove gates of one pod",
 | |
| 			pods: []*v1.Pod{
 | |
| 				st.MakePod().Name("p1").SchedulingGates([]string{"foo"}).Container("pause").Obj(),
 | |
| 				st.MakePod().Name("p2").SchedulingGates([]string{"bar"}).Container("pause").Obj(),
 | |
| 				st.MakePod().Name("p3").Container("pause").Obj(),
 | |
| 			},
 | |
| 			schedule: []string{"p3"},
 | |
| 			rmGates:  []string{"p2"},
 | |
| 		},
 | |
| 		{
 | |
| 			name: "gated pod schedulable after deleting the scheduled pod and removing gate",
 | |
| 			pods: []*v1.Pod{
 | |
| 				st.MakePod().Name("p1").SchedulingGates([]string{"foo"}).Container("pause").Obj(),
 | |
| 				st.MakePod().Name("p2").Container("pause").Obj(),
 | |
| 			},
 | |
| 			schedule: []string{"p2"},
 | |
| 			delete:   []string{"p2"},
 | |
| 			rmGates:  []string{"p1"},
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	for _, tt := range tests {
 | |
| 		t.Run(tt.name, func(t *testing.T) {
 | |
| 			// Use zero backoff seconds to bypass backoffQ.
 | |
| 			// It's intended to not start the scheduler's queue, and hence to
 | |
| 			// not start any flushing logic. We will pop and schedule the Pods manually later.
 | |
| 			testCtx := testutils.InitTestSchedulerWithOptions(
 | |
| 				t,
 | |
| 				testutils.InitTestAPIServer(t, "pod-scheduling-gates", nil),
 | |
| 				0,
 | |
| 				scheduler.WithPodInitialBackoffSeconds(0),
 | |
| 				scheduler.WithPodMaxBackoffSeconds(0),
 | |
| 			)
 | |
| 			testutils.SyncSchedulerInformerFactory(testCtx)
 | |
| 
 | |
| 			cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx
 | |
| 
 | |
| 			// Create node, so we can schedule pods.
 | |
| 			node := st.MakeNode().Name("node").Obj()
 | |
| 			if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 				t.Fatal("Failed to create node")
 | |
| 
 | |
| 			}
 | |
| 
 | |
| 			// Create pods.
 | |
| 			for _, p := range tt.pods {
 | |
| 				p.Namespace = ns
 | |
| 				if _, err := cs.CoreV1().Pods(ns).Create(ctx, p, metav1.CreateOptions{}); err != nil {
 | |
| 					t.Fatalf("Failed to create Pod %q: %v", p.Name, err)
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// Wait for the pods to be present in the scheduling queue.
 | |
| 			if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
 | |
| 				pendingPods, _ := testCtx.Scheduler.SchedulingQueue.PendingPods()
 | |
| 				return len(pendingPods) == len(tt.pods), nil
 | |
| 			}); err != nil {
 | |
| 				t.Fatal(err)
 | |
| 			}
 | |
| 
 | |
| 			// Schedule pods.
 | |
| 			for _, podName := range tt.schedule {
 | |
| 				testCtx.Scheduler.ScheduleOne(testCtx.Ctx)
 | |
| 				if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, ns, podName)); err != nil {
 | |
| 					t.Fatalf("Failed to schedule %s", podName)
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// Delete pods, which triggers AssignedPodDelete event in the scheduling queue.
 | |
| 			for _, podName := range tt.delete {
 | |
| 				if err := cs.CoreV1().Pods(ns).Delete(ctx, podName, metav1.DeleteOptions{}); err != nil {
 | |
| 					t.Fatalf("Error calling Delete on %s", podName)
 | |
| 				}
 | |
| 				if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, testutils.PodDeleted(ctx, cs, ns, podName)); err != nil {
 | |
| 					t.Fatalf("Failed to delete %s", podName)
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// Ensure gated pods are not in ActiveQ
 | |
| 			if len(testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()) > 0 {
 | |
| 				t.Fatal("Expected no schedulable pods")
 | |
| 			}
 | |
| 
 | |
| 			// Remove scheduling gates from the pod spec.
 | |
| 			for _, podName := range tt.rmGates {
 | |
| 				patch := `{"spec": {"schedulingGates": null}}`
 | |
| 				if _, err := cs.CoreV1().Pods(ns).Patch(ctx, podName, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{}); err != nil {
 | |
| 					t.Fatalf("Failed to patch pod %v: %v", podName, err)
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// Schedule pods which no longer have gates.
 | |
| 			for _, podName := range tt.rmGates {
 | |
| 				testCtx.Scheduler.ScheduleOne(testCtx.Ctx)
 | |
| 				if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, ns, podName)); err != nil {
 | |
| 					t.Fatalf("Failed to schedule %s", podName)
 | |
| 				}
 | |
| 			}
 | |
| 		})
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // TestCoreResourceEnqueue verify Pods failed by in-tree default plugins can be
 | |
| // moved properly upon their registered events.
 | |
| func TestCoreResourceEnqueue(t *testing.T) {
 | |
| 	tests := []struct {
 | |
| 		name string
 | |
| 		// initialNodes is the list of Nodes to be created at first.
 | |
| 		initialNodes []*v1.Node
 | |
| 		// initialPods is the list of Pods to be created at first if it's not empty.
 | |
| 		// Note that the scheduler won't schedule those Pods,
 | |
| 		// meaning, those Pods should be already scheduled basically; they should have .spec.nodename.
 | |
| 		initialPods []*v1.Pod
 | |
| 		// pods are the list of Pods to be created.
 | |
| 		// All of them are expected to be unschedulable at first.
 | |
| 		pods []*v1.Pod
 | |
| 		// triggerFn is the function that triggers the event to move Pods.
 | |
| 		triggerFn func(testCtx *testutils.TestContext) error
 | |
| 		// wantRequeuedPods is the map of Pods that are expected to be requeued after triggerFn.
 | |
| 		wantRequeuedPods sets.Set[string]
 | |
| 		// enableSchedulingQueueHint indicates which feature gate value(s) the test case should run with.
 | |
| 		// By default, it's {true, false}
 | |
| 		enableSchedulingQueueHint []bool
 | |
| 	}{
 | |
| 		{
 | |
| 			name:         "Pod without a required toleration to a node isn't requeued to activeQ",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod1 doesn't have the required toleration and will be rejected by the TaintToleration plugin.
 | |
| 				//   (TaintToleration plugin is evaluated before NodeResourcesFit plugin.)
 | |
| 				// - Pod2 has the required toleration, but requests a large amount of CPU - will be rejected by the NodeResourcesFit plugin.
 | |
| 				st.MakePod().Name("pod1").Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod2").Toleration(v1.TaintNodeNotReady).Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a NodeChange event by increasing CPU capacity.
 | |
| 				// It makes Pod2 schedulable.
 | |
| 				// Pod1 is not requeued because the Node is still unready and it doesn't have the required toleration.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().UpdateStatus(testCtx.Ctx, st.MakeNode().Name("fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update the node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod2"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the PodAffinity plugin is requeued when a new Node is created and turned to ready",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Label("node", "fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj()},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Label("anti", "anti").Name("pod1").PodAntiAffinityExists("anti", "node", st.PodAntiAffinityWithRequiredReq).Container("image").Node("fake-node").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod2 will be rejected by the PodAffinity plugin.
 | |
| 				st.MakePod().Label("anti", "anti").Name("pod2").PodAntiAffinityExists("anti", "node", st.PodAntiAffinityWithRequiredReq).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a NodeCreated event.
 | |
| 				// Note that this Node has a un-ready taint and pod2 should be requeued ideally because unschedulable plugins registered for pod2 is PodAffinity.
 | |
| 				// However, due to preCheck, it's not requeueing pod2 to activeQ.
 | |
| 				// It'll be fixed by the removal of preCheck in the future.
 | |
| 				// https://github.com/kubernetes/kubernetes/issues/110175
 | |
| 				node := st.MakeNode().Name("fake-node2").Label("node", "fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj()
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to create a new node: %w", err)
 | |
| 				}
 | |
| 
 | |
| 				// As a mitigation of an issue described above, all plugins subscribing Node/Add event register UpdateNodeTaint too.
 | |
| 				// So, this removal of taint moves pod2 to activeQ.
 | |
| 				node.Spec.Taints = nil
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, node, metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to remove taints off the node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod2"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the NodeAffinity plugin is requeued when a Node is updated",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node1").Label("group", "a").Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod1 will be rejected by the NodeAffinity plugin.
 | |
| 				st.MakePod().Name("pod1").NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(),
 | |
| 				// - Pod2 will be rejected by the NodeAffinity plugin.
 | |
| 				st.MakePod().Name("pod2").NodeAffinityIn("group", []string{"c"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a NodeUpdate event to change label.
 | |
| 				// It causes pod1 to be requeued.
 | |
| 				// It causes pod2 not to be requeued.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, st.MakeNode().Name("fake-node1").Label("group", "b").Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to remove taints off the node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod1"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod updated with toleration requeued to activeQ",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Taints([]v1.Taint{{Key: "taint-key", Effect: v1.TaintEffectNoSchedule}}).Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod1 doesn't have the required toleration and will be rejected by the TaintToleration plugin.
 | |
| 				st.MakePod().Name("pod1").Req(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a PodUpdate event by adding a toleration to Pod1.
 | |
| 				// It makes Pod1 schedulable.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Update(testCtx.Ctx, st.MakePod().Name("pod1").Req(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Container("image").Toleration("taint-key").Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update the pod: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod1"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the NodeResourcesFit plugin is requeued when the Pod is updated to scale down",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod1 requests a large amount of CPU and will be rejected by the NodeResourcesFit plugin.
 | |
| 				st.MakePod().Name("pod1").Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a PodUpdate event by reducing cpu requested by pod1.
 | |
| 				// It makes Pod1 schedulable.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Update(testCtx.Ctx, st.MakePod().Name("pod1").Req(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Container("image").Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update the pod: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod1"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the NodeResourcesFit plugin is requeued when a Pod is deleted",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj()},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Req(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Container("image").Node("fake-node").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod2 request will be rejected because there are not enough free resources on the Node
 | |
| 				st.MakePod().Name("pod2").Req(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger an assigned Pod1 delete event.
 | |
| 				// It makes Pod2 schedulable.
 | |
| 				if err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Delete(testCtx.Ctx, "pod1", metav1.DeleteOptions{GracePeriodSeconds: new(int64)}); err != nil {
 | |
| 					return fmt.Errorf("failed to delete pod1: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod2"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the NodeResourcesFit plugin is requeued when a Node is created",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node1").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod1 requests a large amount of CPU and will be rejected by the NodeResourcesFit plugin.
 | |
| 				st.MakePod().Name("pod1").Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Container("image").Obj(),
 | |
| 				// - Pod2 requests a large amount of CPU and will be rejected by the NodeResourcesFit plugin.
 | |
| 				st.MakePod().Name("pod2").Req(map[v1.ResourceName]string{v1.ResourceCPU: "5"}).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a NodeCreated event.
 | |
| 				// It makes Pod1 schedulable. Pod2 is not requeued because of having too high requests.
 | |
| 				node := st.MakeNode().Name("fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj()
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to create a new node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod1"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the NodeResourcesFit plugin is requeued when a Node is updated",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod1 requests a large amount of CPU and will be rejected by the NodeResourcesFit plugin.
 | |
| 				st.MakePod().Name("pod1").Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Container("image").Obj(),
 | |
| 				// - Pod2 requests a large amount of CPU and will be rejected by the NodeResourcesFit plugin.
 | |
| 				st.MakePod().Name("pod2").Req(map[v1.ResourceName]string{v1.ResourceCPU: "5"}).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a NodeUpdate event that increases the CPU capacity of fake-node.
 | |
| 				// It makes Pod1 schedulable. Pod2 is not requeued because of having too high requests.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().UpdateStatus(testCtx.Ctx, st.MakeNode().Name("fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update fake-node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod1"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Updating pod condition doesn't retry scheduling if the Pod was rejected by TaintToleration",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod1 doesn't have the required toleration and will be rejected by the TaintToleration plugin.
 | |
| 				st.MakePod().Name("pod1").Container("image").Obj(),
 | |
| 			},
 | |
| 			// Simulate a Pod update by directly calling `SchedulingQueue.Update` instead of actually updating a Pod
 | |
| 			// because we don't have a way to confirm the scheduler has handled a Pod update event at the moment.
 | |
| 			// TODO: actually update a Pod update and confirm the scheduler has handled a Pod update event with a metric.
 | |
| 			// https://github.com/kubernetes/kubernetes/pull/122234#discussion_r1597456808
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) (err error) {
 | |
| 				// Trigger a Pod Condition update event.
 | |
| 				// It will not make pod1 schedulable
 | |
| 				var (
 | |
| 					oldPod *v1.Pod
 | |
| 					newPod *v1.Pod
 | |
| 				)
 | |
| 				if oldPod, err = testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, "pod1", metav1.GetOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to get the pod: %w", err)
 | |
| 				}
 | |
| 				newPod = oldPod.DeepCopy()
 | |
| 				newPod.Status.Conditions[0].Message = "injected message"
 | |
| 
 | |
| 				testCtx.Scheduler.SchedulingQueue.Update(klog.FromContext(testCtx.Ctx), oldPod, newPod)
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.Set[string]{},
 | |
| 			// This behaviour is only true when enabling QHint
 | |
| 			// because QHint of TaintToleration would decide to ignore a Pod update.
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			// The test case makes sure that PreFilter plugins returning PreFilterResult are also inserted into pInfo.UnschedulablePlugins
 | |
| 			// meaning, they're taken into consideration during requeuing flow of the queue.
 | |
| 			// https://github.com/kubernetes/kubernetes/issues/122018
 | |
| 			name: "Pod rejected by the PreFilter of NodeAffinity plugin and Filter of NodeResourcesFit is requeued based on both plugins",
 | |
| 			initialNodes: []*v1.Node{
 | |
| 				st.MakeNode().Name("fake-node").Label("node", "fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj(),
 | |
| 				st.MakeNode().Name("fake-node2").Label("node", "fake-node2").Label("zone", "zone1").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod1 will be rejected by the NodeAffinity plugin and NodeResourcesFit plugin.
 | |
| 				st.MakePod().Label("unscheduled", "plugins").Name("pod1").NodeAffinityIn("metadata.name", []string{"fake-node"}, st.NodeSelectorTypeMatchFields).Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Because of preCheck, we cannot determine which unschedulable plugins are registered for pod1.
 | |
| 				// So, not the ideal way as the integration test though,
 | |
| 				// here we check the unschedulable plugins by directly using the SchedulingQueue function for now.
 | |
| 				// We can change here to assess unschedPlugin by triggering cluster events like other test cases
 | |
| 				// after QHint is graduated and preCheck is removed.
 | |
| 				pInfo, ok := testCtx.Scheduler.SchedulingQueue.GetPod("pod1", testCtx.NS.Name)
 | |
| 				if !ok || pInfo == nil {
 | |
| 					return fmt.Errorf("pod1 is not found in the scheduling queue")
 | |
| 				}
 | |
| 
 | |
| 				if pInfo.Pod.Name != "pod1" {
 | |
| 					return fmt.Errorf("unexpected pod info: %#v", pInfo)
 | |
| 				}
 | |
| 
 | |
| 				if pInfo.UnschedulablePlugins.Difference(sets.New(names.NodeAffinity, names.NodeResourcesFit)).Len() != 0 {
 | |
| 					return fmt.Errorf("unexpected unschedulable plugin(s) is registered in pod1: %v", pInfo.UnschedulablePlugins.UnsortedList())
 | |
| 				}
 | |
| 
 | |
| 				return nil
 | |
| 			},
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the PodAffinity plugin is requeued when deleting the existed pod's label to make it match the podAntiAffinity",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Label("node", "fake-node").Obj()},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Label("anti1", "anti1").Label("anti2", "anti2").Container("image").Node("fake-node").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod2 and pod3 will be rejected by the PodAffinity plugin.
 | |
| 				st.MakePod().Name("pod2").Label("anti1", "anti1").PodAntiAffinityExists("anti1", "node", st.PodAntiAffinityWithRequiredReq).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod3").Label("anti2", "anti2").PodAntiAffinityExists("anti2", "node", st.PodAntiAffinityWithRequiredReq).Container("image").Obj(),
 | |
| 			},
 | |
| 
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Delete the pod's label 'anti1' which will make it match pod2's antiAffinity.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Update(testCtx.Ctx, st.MakePod().Name("pod1").Label("anti2", "anti2").Container("image").Node("fake-node").Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update pod1: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod2"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the PodAffinity plugin is requeued when updating the existed pod's label to make it match the pod's podAffinity",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Label("node", "fake-node").Obj()},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Container("image").Node("fake-node").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod2 and pod3 will be rejected by the PodAffinity plugin.
 | |
| 				st.MakePod().Name("pod2").PodAffinityExists("aaa", "node", st.PodAffinityWithRequiredReq).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod3").PodAffinityExists("bbb", "node", st.PodAffinityWithRequiredReq).Container("image").Obj(),
 | |
| 			},
 | |
| 
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Add label to the pod which will make it match pod2's nodeAffinity.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Update(testCtx.Ctx, st.MakePod().Name("pod1").Label("aaa", "bbb").Container("image").Node("fake-node").Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update pod1: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod2"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 
 | |
| 		{
 | |
| 			name:         "Pod rejected by the PodAffinity plugin is requeued when updating the label of the node to make it match the pod affinity",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Label("node", "fake-node").Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - pod1 and pod2 will be rejected by the PodAffinity plugin.
 | |
| 				st.MakePod().Name("pod1").PodAffinityExists("bbb", "zone", st.PodAffinityWithRequiredReq).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod2").PodAffinityExists("ccc", "region", st.PodAffinityWithRequiredReq).Container("image").Obj(),
 | |
| 			},
 | |
| 
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Add label to the node which will make it match pod1's podAffinity.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, st.MakeNode().Name("fake-node").Label("zone", "zone1").Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update pod1: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod1"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected with hostport by the NodePorts plugin is requeued when pod with common hostport is deleted",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Label("node", "fake-node").Obj()},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Container("image").ContainerPort([]v1.ContainerPort{{ContainerPort: 8080, HostPort: 8080}}).Node("fake-node").Obj(),
 | |
| 				st.MakePod().Name("pod2").Container("image").ContainerPort([]v1.ContainerPort{{ContainerPort: 8080, HostPort: 8081}}).Node("fake-node").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod3").Container("image").ContainerPort([]v1.ContainerPort{{ContainerPort: 8080, HostPort: 8080}}).Obj(),
 | |
| 				st.MakePod().Name("pod4").Container("image").ContainerPort([]v1.ContainerPort{{ContainerPort: 8080, HostPort: 8081}}).Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger an assigned Pod delete event.
 | |
| 				// Because Pod1 and Pod3 have common port, deleting Pod1 makes Pod3 schedulable.
 | |
| 				// By setting GracePeriodSeconds to 0, allowing Pod3 to be requeued immediately after Pod1 is deleted.
 | |
| 				if err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Delete(testCtx.Ctx, "pod1", metav1.DeleteOptions{GracePeriodSeconds: new(int64)}); err != nil {
 | |
| 					return fmt.Errorf("failed to delete Pod: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod3"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected with hostport by the NodePorts plugin is requeued when new node is created",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Label("node", "fake-node").Obj()},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Container("image").ContainerPort([]v1.ContainerPort{{ContainerPort: 8080, HostPort: 8080}}).Node("fake-node").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod2").Container("image").ContainerPort([]v1.ContainerPort{{ContainerPort: 8080, HostPort: 8080}}).Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a NodeCreated event.
 | |
| 				// It makes Pod2 schedulable.
 | |
| 				node := st.MakeNode().Name("fake-node2").Label("node", "fake-node2").Obj()
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to create a new node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod2"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the NodeUnschedulable plugin is requeued when the node is turned to ready",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Unschedulable(true).Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a NodeUpdate event to change the Node to ready.
 | |
| 				// It makes Pod1 schedulable.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, st.MakeNode().Name("fake-node").Unschedulable(false).Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update the node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod1"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the NodeUnschedulable plugin is requeued when a new node is created",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node1").Unschedulable(true).Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a NodeCreated event.
 | |
| 				// It makes Pod1 schedulable.
 | |
| 				node := st.MakeNode().Name("fake-node2").Obj()
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to create a new node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.New("pod1"),
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pod rejected by the NodeUnschedulable plugin isn't requeued when another unschedulable node is created",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node1").Unschedulable(true).Obj()},
 | |
| 			pods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger a NodeCreated event, but with unschedulable node, which wouldn't make Pod1 schedulable.
 | |
| 				node := st.MakeNode().Name("fake-node2").Unschedulable(true).Obj()
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to create a new node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods: sets.Set[string]{},
 | |
| 			// This test case is valid only when QHint is enabled
 | |
| 			// because QHint filters out a node creation made in triggerFn.
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pods with PodTopologySpread should be requeued when a Pod with matching label is scheduled",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Label("node", "fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj()},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Label("key1", "val").Container("image").Node("fake-node").Obj(),
 | |
| 				st.MakePod().Name("pod2").Label("key2", "val").Container("image").Node("fake-node").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod3 and Pod4 will be rejected by the PodTopologySpread plugin.
 | |
| 				st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod4").Label("key2", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key2").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger an assigned Pod add event.
 | |
| 				// It should requeue pod3 only because this pod only has key1 label that pod3's topologyspread checks, and doesn't have key2 label that pod4's one does.
 | |
| 				pod := st.MakePod().Name("pod5").Label("key1", "val").Node("fake-node").Container("image").Obj()
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to create Pod %q: %w", pod.Name, err)
 | |
| 				}
 | |
| 
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod3"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pods with PodTopologySpread should be requeued when a scheduled Pod label is updated to match the selector",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Label("node", "fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj()},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Label("key1", "val").Container("image").Node("fake-node").Obj(),
 | |
| 				st.MakePod().Name("pod2").Label("key2", "val").Container("image").Node("fake-node").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod3 and Pod4 will be rejected by the PodTopologySpread plugin.
 | |
| 				st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod4").Label("key2", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key2").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger an assigned Pod update event.
 | |
| 				// It should requeue pod3 only because this updated pod had key1 label,
 | |
| 				// and it's related only to the label selector that pod3's topologyspread has.
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Update(testCtx.Ctx, st.MakePod().Name("pod1").Label("key3", "val").Container("image").Node("fake-node").Obj(), metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update the pod: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod3"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name:         "Pods with PodTopologySpread should be requeued when a scheduled Pod with matching label is deleted",
 | |
| 			initialNodes: []*v1.Node{st.MakeNode().Name("fake-node").Label("node", "fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj()},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Label("key1", "val").Container("image").Node("fake-node").Obj(),
 | |
| 				st.MakePod().Name("pod2").Label("key2", "val").Container("image").Node("fake-node").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod3 and Pod4 will be rejected by the PodTopologySpread plugin.
 | |
| 				st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(2)), nil, nil, nil).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod4").Label("key2", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key2").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger an assigned Pod delete event.
 | |
| 				// It should requeue pod3 only because this pod only has key1 label that pod3's topologyspread checks, and doesn't have key2 label that pod4's one does.
 | |
| 				if err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).Delete(testCtx.Ctx, "pod1", metav1.DeleteOptions{GracePeriodSeconds: new(int64)}); err != nil {
 | |
| 					return fmt.Errorf("failed to delete Pod: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod3"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name: "Pods with PodTopologySpread should be requeued when a Node with topology label is created",
 | |
| 			initialNodes: []*v1.Node{
 | |
| 				st.MakeNode().Name("fake-node1").Label("node", "fake-node").Obj(),
 | |
| 				st.MakeNode().Name("fake-node2").Label("zone", "fake-zone").Obj(),
 | |
| 			},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Label("key1", "val").Container("image").Node("fake-node1").Obj(),
 | |
| 				st.MakePod().Name("pod2").Label("key1", "val").Container("image").Node("fake-node2").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod3 and Pod4 will be rejected by the PodTopologySpread plugin.
 | |
| 				st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(2)), nil, nil, nil).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod4").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(2)), nil, nil, nil).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger an assigned Node add event.
 | |
| 				// It should requeue pod3 only because this node only has node label, and doesn't have zone label that pod4's topologyspread requires.
 | |
| 				node := st.MakeNode().Name("fake-node3").Label("node", "fake-node").Obj()
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to create a new node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod3"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name: "Pods with PodTopologySpread should be requeued when a Node is updated to have the topology label",
 | |
| 			initialNodes: []*v1.Node{
 | |
| 				st.MakeNode().Name("fake-node1").Label("node", "fake-node").Obj(),
 | |
| 				st.MakeNode().Name("fake-node2").Label("node", "fake-node").Obj(),
 | |
| 			},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node1").Obj(),
 | |
| 				st.MakePod().Name("pod2").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node2").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod3 and Pod4 will be rejected by the PodTopologySpread plugin.
 | |
| 				st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod4").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger an assigned Node update event.
 | |
| 				// It should requeue pod4 only because this node only has zone label, and doesn't have node label that pod3 requires.
 | |
| 				node := st.MakeNode().Name("fake-node2").Label("zone", "fake-node").Obj()
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, node, metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod4"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 		{
 | |
| 			name: "Pods with PodTopologySpread should be requeued when a NodeTaint of a Node with a topology label has been updated",
 | |
| 			initialNodes: []*v1.Node{
 | |
| 				st.MakeNode().Name("fake-node1").Label("node", "fake-node").Obj(),
 | |
| 				st.MakeNode().Name("fake-node2").Label("zone", "fake-node").Obj(),
 | |
| 				st.MakeNode().Name("fake-node3").Label("zone", "fake-node").Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj(),
 | |
| 			},
 | |
| 			initialPods: []*v1.Pod{
 | |
| 				st.MakePod().Name("pod1").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node1").Obj(),
 | |
| 				st.MakePod().Name("pod2").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node2").Obj(),
 | |
| 			},
 | |
| 			pods: []*v1.Pod{
 | |
| 				// - Pod3 and Pod4 will be rejected by the PodTopologySpread plugin.
 | |
| 				st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
 | |
| 				st.MakePod().Name("pod4").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Toleration("aaa").Obj(),
 | |
| 			},
 | |
| 			triggerFn: func(testCtx *testutils.TestContext) error {
 | |
| 				// Trigger an assigned NodeTaint update event.
 | |
| 				// It should requeue pod4 only because this node only has zone label, and doesn't have node label that pod3 requires.
 | |
| 				node := st.MakeNode().Name("fake-node3").Label("zone", "fake-node").Taints([]v1.Taint{{Key: "aaa", Value: "bbb", Effect: v1.TaintEffectNoSchedule}}).Obj()
 | |
| 				if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, node, metav1.UpdateOptions{}); err != nil {
 | |
| 					return fmt.Errorf("failed to update node: %w", err)
 | |
| 				}
 | |
| 				return nil
 | |
| 			},
 | |
| 			wantRequeuedPods:          sets.New("pod4"),
 | |
| 			enableSchedulingQueueHint: []bool{true},
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	for _, tt := range tests {
 | |
| 		if len(tt.enableSchedulingQueueHint) == 0 {
 | |
| 			tt.enableSchedulingQueueHint = []bool{true, false}
 | |
| 		}
 | |
| 
 | |
| 		for _, featureEnabled := range tt.enableSchedulingQueueHint {
 | |
| 			t.Run(fmt.Sprintf("%s [SchedulerQueueingHints enabled: %v]", tt.name, featureEnabled), func(t *testing.T) {
 | |
| 				featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, featureEnabled)
 | |
| 				featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)
 | |
| 
 | |
| 				// Use zero backoff seconds to bypass backoffQ.
 | |
| 				// It's intended to not start the scheduler's queue, and hence to
 | |
| 				// not start any flushing logic. We will pop and schedule the Pods manually later.
 | |
| 				testCtx := testutils.InitTestSchedulerWithOptions(
 | |
| 					t,
 | |
| 					testutils.InitTestAPIServer(t, "core-res-enqueue", nil),
 | |
| 					0,
 | |
| 					scheduler.WithPodInitialBackoffSeconds(0),
 | |
| 					scheduler.WithPodMaxBackoffSeconds(0),
 | |
| 				)
 | |
| 				testutils.SyncSchedulerInformerFactory(testCtx)
 | |
| 
 | |
| 				defer testCtx.Scheduler.SchedulingQueue.Close()
 | |
| 
 | |
| 				cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx
 | |
| 				// Create one Node with a taint.
 | |
| 				for _, node := range tt.initialNodes {
 | |
| 					if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 						t.Fatalf("Failed to create an initial Node %q: %v", node.Name, err)
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				for _, pod := range tt.initialPods {
 | |
| 					if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil {
 | |
| 						t.Fatalf("Failed to create an initial Pod %q: %v", pod.Name, err)
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				for _, pod := range tt.pods {
 | |
| 					if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil {
 | |
| 						t.Fatalf("Failed to create Pod %q: %v", pod.Name, err)
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				// Wait for the tt.pods to be present in the scheduling queue.
 | |
| 				if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
 | |
| 					pendingPods, _ := testCtx.Scheduler.SchedulingQueue.PendingPods()
 | |
| 					return len(pendingPods) == len(tt.pods), nil
 | |
| 				}); err != nil {
 | |
| 					t.Fatal(err)
 | |
| 				}
 | |
| 
 | |
| 				t.Log("Confirmed Pods in the scheduling queue, starting to schedule them")
 | |
| 
 | |
| 				// Pop all pods out. They should be unschedulable.
 | |
| 				for i := 0; i < len(tt.pods); i++ {
 | |
| 					testCtx.Scheduler.ScheduleOne(testCtx.Ctx)
 | |
| 				}
 | |
| 				// Wait for the tt.pods to be still present in the scheduling queue.
 | |
| 				if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
 | |
| 					pendingPods, _ := testCtx.Scheduler.SchedulingQueue.PendingPods()
 | |
| 					return len(pendingPods) == len(tt.pods), nil
 | |
| 				}); err != nil {
 | |
| 					t.Fatal(err)
 | |
| 				}
 | |
| 
 | |
| 				t.Log("finished initial schedulings for all Pods, will trigger triggerFn")
 | |
| 
 | |
| 				err := tt.triggerFn(testCtx)
 | |
| 				if err != nil {
 | |
| 					t.Fatalf("Failed to trigger the event: %v", err)
 | |
| 				}
 | |
| 
 | |
| 				t.Log("triggered tt.triggerFn, will check if tt.requeuedPods are requeued")
 | |
| 
 | |
| 				// Wait for the tt.pods to be still present in the scheduling queue.
 | |
| 				var requeuedPods sets.Set[string]
 | |
| 				if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
 | |
| 					requeuedPods = sets.Set[string]{} // reset
 | |
| 					for _, requeuedPod := range testCtx.Scheduler.SchedulingQueue.PodsInActiveQ() {
 | |
| 						requeuedPods.Insert(requeuedPod.Name)
 | |
| 					}
 | |
| 
 | |
| 					return requeuedPods.Equal(tt.wantRequeuedPods), nil
 | |
| 				}); err != nil {
 | |
| 					t.Fatalf("Expect Pods %v to be requeued, but %v are requeued actually", tt.wantRequeuedPods, requeuedPods)
 | |
| 				}
 | |
| 			})
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| var _ framework.FilterPlugin = &fakeCRPlugin{}
 | |
| var _ framework.EnqueueExtensions = &fakeCRPlugin{}
 | |
| 
 | |
| type fakeCRPlugin struct{}
 | |
| 
 | |
| func (f *fakeCRPlugin) Name() string {
 | |
| 	return "fakeCRPlugin"
 | |
| }
 | |
| 
 | |
| func (f *fakeCRPlugin) Filter(_ context.Context, _ *framework.CycleState, _ *v1.Pod, _ *framework.NodeInfo) *framework.Status {
 | |
| 	return framework.NewStatus(framework.Unschedulable, "always fail")
 | |
| }
 | |
| 
 | |
| // EventsToRegister returns the possible events that may make a Pod
 | |
| // failed by this plugin schedulable.
 | |
| func (f *fakeCRPlugin) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
 | |
| 	return []framework.ClusterEventWithHint{
 | |
| 		{Event: framework.ClusterEvent{Resource: "foos.v1.example.com", ActionType: framework.All}},
 | |
| 	}, nil
 | |
| }
 | |
| 
 | |
| // TestCustomResourceEnqueue constructs a fake plugin that registers custom resources
 | |
| // to verify Pods failed by this plugin can be moved properly upon CR events.
 | |
| func TestCustomResourceEnqueue(t *testing.T) {
 | |
| 	// Start API Server with apiextensions supported.
 | |
| 	server := apiservertesting.StartTestServerOrDie(
 | |
| 		t, apiservertesting.NewDefaultTestServerOptions(),
 | |
| 		[]string{"--disable-admission-plugins=ServiceAccount,TaintNodesByCondition", "--runtime-config=api/all=true"},
 | |
| 		testfwk.SharedEtcd(),
 | |
| 	)
 | |
| 	testCtx := &testutils.TestContext{}
 | |
| 	ctx, cancel := context.WithCancel(context.Background())
 | |
| 	testCtx.Ctx = ctx
 | |
| 	testCtx.CloseFn = func() {
 | |
| 		cancel()
 | |
| 		server.TearDownFn()
 | |
| 	}
 | |
| 
 | |
| 	apiExtensionClient := apiextensionsclient.NewForConfigOrDie(server.ClientConfig)
 | |
| 	dynamicClient := dynamic.NewForConfigOrDie(server.ClientConfig)
 | |
| 
 | |
| 	// Create a Foo CRD.
 | |
| 	fooCRD := &apiextensionsv1.CustomResourceDefinition{
 | |
| 		ObjectMeta: metav1.ObjectMeta{
 | |
| 			Name: "foos.example.com",
 | |
| 		},
 | |
| 		Spec: apiextensionsv1.CustomResourceDefinitionSpec{
 | |
| 			Group: "example.com",
 | |
| 			Scope: apiextensionsv1.NamespaceScoped,
 | |
| 			Names: apiextensionsv1.CustomResourceDefinitionNames{
 | |
| 				Plural: "foos",
 | |
| 				Kind:   "Foo",
 | |
| 			},
 | |
| 			Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
 | |
| 				{
 | |
| 					Name:    "v1",
 | |
| 					Served:  true,
 | |
| 					Storage: true,
 | |
| 					Schema: &apiextensionsv1.CustomResourceValidation{
 | |
| 						OpenAPIV3Schema: &apiextensionsv1.JSONSchemaProps{
 | |
| 							Type: "object",
 | |
| 							Properties: map[string]apiextensionsv1.JSONSchemaProps{
 | |
| 								"field": {Type: "string"},
 | |
| 							},
 | |
| 						},
 | |
| 					},
 | |
| 				},
 | |
| 			},
 | |
| 		},
 | |
| 	}
 | |
| 	var err error
 | |
| 	fooCRD, err = apiExtensionClient.ApiextensionsV1().CustomResourceDefinitions().Create(testCtx.Ctx, fooCRD, metav1.CreateOptions{})
 | |
| 	if err != nil {
 | |
| 		t.Fatal(err)
 | |
| 	}
 | |
| 
 | |
| 	registry := frameworkruntime.Registry{
 | |
| 		"fakeCRPlugin": func(_ context.Context, _ runtime.Object, fh framework.Handle) (framework.Plugin, error) {
 | |
| 			return &fakeCRPlugin{}, nil
 | |
| 		},
 | |
| 	}
 | |
| 	cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
 | |
| 		Profiles: []configv1.KubeSchedulerProfile{{
 | |
| 			SchedulerName: pointer.String(v1.DefaultSchedulerName),
 | |
| 			Plugins: &configv1.Plugins{
 | |
| 				Filter: configv1.PluginSet{
 | |
| 					Enabled: []configv1.Plugin{
 | |
| 						{Name: "fakeCRPlugin"},
 | |
| 					},
 | |
| 				},
 | |
| 			},
 | |
| 		}}})
 | |
| 
 | |
| 	testCtx.KubeConfig = server.ClientConfig
 | |
| 	testCtx.ClientSet = kubernetes.NewForConfigOrDie(server.ClientConfig)
 | |
| 	testCtx.NS, err = testCtx.ClientSet.CoreV1().Namespaces().Create(testCtx.Ctx, &v1.Namespace{
 | |
| 		ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("cr-enqueue-%v", string(uuid.NewUUID()))}}, metav1.CreateOptions{})
 | |
| 	if err != nil && !errors.IsAlreadyExists(err) {
 | |
| 		t.Fatalf("Failed to integration test ns: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	// Use zero backoff seconds to bypass backoffQ.
 | |
| 	// It's intended to not start the scheduler's queue, and hence to
 | |
| 	// not start any flushing logic. We will pop and schedule the Pods manually later.
 | |
| 	testCtx = testutils.InitTestSchedulerWithOptions(
 | |
| 		t,
 | |
| 		testCtx,
 | |
| 		0,
 | |
| 		scheduler.WithProfiles(cfg.Profiles...),
 | |
| 		scheduler.WithFrameworkOutOfTreeRegistry(registry),
 | |
| 		scheduler.WithPodInitialBackoffSeconds(0),
 | |
| 		scheduler.WithPodMaxBackoffSeconds(0),
 | |
| 	)
 | |
| 	testutils.SyncSchedulerInformerFactory(testCtx)
 | |
| 
 | |
| 	defer testutils.CleanupTest(t, testCtx)
 | |
| 
 | |
| 	cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx
 | |
| 	logger := klog.FromContext(ctx)
 | |
| 	// Create one Node.
 | |
| 	node := st.MakeNode().Name("fake-node").Obj()
 | |
| 	if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 		t.Fatalf("Failed to create Node %q: %v", node.Name, err)
 | |
| 	}
 | |
| 
 | |
| 	// Create a testing Pod.
 | |
| 	pause := imageutils.GetPauseImageName()
 | |
| 	pod := st.MakePod().Namespace(ns).Name("fake-pod").Container(pause).Obj()
 | |
| 	if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil {
 | |
| 		t.Fatalf("Failed to create Pod %q: %v", pod.Name, err)
 | |
| 	}
 | |
| 
 | |
| 	// Wait for the testing Pod to be present in the scheduling queue.
 | |
| 	if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
 | |
| 		pendingPods, _ := testCtx.Scheduler.SchedulingQueue.PendingPods()
 | |
| 		return len(pendingPods) == 1, nil
 | |
| 	}); err != nil {
 | |
| 		t.Fatal(err)
 | |
| 	}
 | |
| 
 | |
| 	// Pop fake-pod out. It should be unschedulable.
 | |
| 	podInfo := testutils.NextPodOrDie(t, testCtx)
 | |
| 	fwk, ok := testCtx.Scheduler.Profiles[podInfo.Pod.Spec.SchedulerName]
 | |
| 	if !ok {
 | |
| 		t.Fatalf("Cannot find the profile for Pod %v", podInfo.Pod.Name)
 | |
| 	}
 | |
| 	// Schedule the Pod manually.
 | |
| 	_, fitError := testCtx.Scheduler.SchedulePod(ctx, fwk, framework.NewCycleState(), podInfo.Pod)
 | |
| 	// The fitError is expected to be non-nil as it failed the fakeCRPlugin plugin.
 | |
| 	if fitError == nil {
 | |
| 		t.Fatalf("Expect Pod %v to fail at scheduling.", podInfo.Pod.Name)
 | |
| 	}
 | |
| 	testCtx.Scheduler.FailureHandler(ctx, fwk, podInfo, framework.NewStatus(framework.Unschedulable).WithError(fitError), nil, time.Now())
 | |
| 
 | |
| 	// Scheduling cycle is incremented from 0 to 1 after NextPod() is called, so
 | |
| 	// pass a number larger than 1 to move Pod to unschedulablePods.
 | |
| 	testCtx.Scheduler.SchedulingQueue.AddUnschedulableIfNotPresent(logger, podInfo, 10)
 | |
| 
 | |
| 	// Trigger a Custom Resource event.
 | |
| 	// We expect this event to trigger moving the test Pod from unschedulablePods to activeQ.
 | |
| 	crdGVR := schema.GroupVersionResource{Group: fooCRD.Spec.Group, Version: fooCRD.Spec.Versions[0].Name, Resource: "foos"}
 | |
| 	crClient := dynamicClient.Resource(crdGVR).Namespace(ns)
 | |
| 	if _, err := crClient.Create(ctx, &unstructured.Unstructured{
 | |
| 		Object: map[string]interface{}{
 | |
| 			"apiVersion": "example.com/v1",
 | |
| 			"kind":       "Foo",
 | |
| 			"metadata":   map[string]interface{}{"name": "foo1"},
 | |
| 		},
 | |
| 	}, metav1.CreateOptions{}); err != nil {
 | |
| 		t.Fatalf("Unable to create cr: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	// Now we should be able to pop the Pod from activeQ again.
 | |
| 	podInfo = testutils.NextPodOrDie(t, testCtx)
 | |
| 	if podInfo.Attempts != 2 {
 | |
| 		t.Errorf("Expected the Pod to be attempted 2 times, but got %v", podInfo.Attempts)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // TestRequeueByBindFailure verify Pods failed by bind plugin are
 | |
| // put back to the queue regardless of whether event happens or not.
 | |
| func TestRequeueByBindFailure(t *testing.T) {
 | |
| 	fakeBind := &firstFailBindPlugin{}
 | |
| 	registry := frameworkruntime.Registry{
 | |
| 		"firstFailBindPlugin": func(ctx context.Context, o runtime.Object, fh framework.Handle) (framework.Plugin, error) {
 | |
| 			binder, err := defaultbinder.New(ctx, nil, fh)
 | |
| 			if err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 
 | |
| 			fakeBind.defaultBinderPlugin = binder.(framework.BindPlugin)
 | |
| 			return fakeBind, nil
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
 | |
| 		Profiles: []configv1.KubeSchedulerProfile{{
 | |
| 			SchedulerName: pointer.String(v1.DefaultSchedulerName),
 | |
| 			Plugins: &configv1.Plugins{
 | |
| 				MultiPoint: configv1.PluginSet{
 | |
| 					Enabled: []configv1.Plugin{
 | |
| 						{Name: "firstFailBindPlugin"},
 | |
| 					},
 | |
| 					Disabled: []configv1.Plugin{
 | |
| 						{Name: names.DefaultBinder},
 | |
| 					},
 | |
| 				},
 | |
| 			},
 | |
| 		}}})
 | |
| 
 | |
| 	// Use zero backoff seconds to bypass backoffQ.
 | |
| 	testCtx := testutils.InitTestSchedulerWithOptions(
 | |
| 		t,
 | |
| 		testutils.InitTestAPIServer(t, "core-res-enqueue", nil),
 | |
| 		0,
 | |
| 		scheduler.WithPodInitialBackoffSeconds(0),
 | |
| 		scheduler.WithPodMaxBackoffSeconds(0),
 | |
| 		scheduler.WithProfiles(cfg.Profiles...),
 | |
| 		scheduler.WithFrameworkOutOfTreeRegistry(registry),
 | |
| 	)
 | |
| 	testutils.SyncSchedulerInformerFactory(testCtx)
 | |
| 
 | |
| 	go testCtx.Scheduler.Run(testCtx.Ctx)
 | |
| 
 | |
| 	cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx
 | |
| 	node := st.MakeNode().Name("fake-node").Obj()
 | |
| 	if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 		t.Fatalf("Failed to create Node %q: %v", node.Name, err)
 | |
| 	}
 | |
| 	// create a pod.
 | |
| 	pod := st.MakePod().Namespace(ns).Name("pod-1").Container(imageutils.GetPauseImageName()).Obj()
 | |
| 	if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil {
 | |
| 		t.Fatalf("Failed to create Pod %q: %v", pod.Name, err)
 | |
| 	}
 | |
| 
 | |
| 	// 1. first binding try should fail.
 | |
| 	// 2. The pod should be enqueued to activeQ/backoffQ without any event.
 | |
| 	// 3. The pod should be scheduled in the second binding try.
 | |
| 	// Here, waiting until (3).
 | |
| 	err := wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, ns, pod.Name))
 | |
| 	if err != nil {
 | |
| 		t.Fatalf("Expect pod-1 to be scheduled by the bind plugin: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	// Make sure the first binding trial was failed, and this pod is scheduled at the second trial.
 | |
| 	if fakeBind.counter != 1 {
 | |
| 		t.Fatalf("Expect pod-1 to be scheduled by the bind plugin in the second binding try: %v", err)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // firstFailBindPlugin rejects the Pod in the first Bind call.
 | |
| type firstFailBindPlugin struct {
 | |
| 	counter             int
 | |
| 	defaultBinderPlugin framework.BindPlugin
 | |
| }
 | |
| 
 | |
| func (*firstFailBindPlugin) Name() string {
 | |
| 	return "firstFailBindPlugin"
 | |
| }
 | |
| 
 | |
| func (p *firstFailBindPlugin) Bind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodename string) *framework.Status {
 | |
| 	if p.counter == 0 {
 | |
| 		// fail in the first Bind call.
 | |
| 		p.counter++
 | |
| 		return framework.NewStatus(framework.Error, "firstFailBindPlugin rejects the Pod")
 | |
| 	}
 | |
| 
 | |
| 	return p.defaultBinderPlugin.Bind(ctx, state, pod, nodename)
 | |
| }
 | |
| 
 | |
| // TestRequeueByPermitRejection verify Pods failed by permit plugins in the binding cycle are
 | |
| // put back to the queue, according to the correct scheduling cycle number.
 | |
| func TestRequeueByPermitRejection(t *testing.T) {
 | |
| 	featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, true)
 | |
| 	queueingHintCalledCounter := 0
 | |
| 	fakePermit := &fakePermitPlugin{}
 | |
| 	registry := frameworkruntime.Registry{
 | |
| 		fakePermitPluginName: func(ctx context.Context, o runtime.Object, fh framework.Handle) (framework.Plugin, error) {
 | |
| 			fakePermit = &fakePermitPlugin{
 | |
| 				frameworkHandler: fh,
 | |
| 				schedulingHint: func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
 | |
| 					queueingHintCalledCounter++
 | |
| 					return framework.Queue, nil
 | |
| 				},
 | |
| 			}
 | |
| 			return fakePermit, nil
 | |
| 		},
 | |
| 	}
 | |
| 	cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
 | |
| 		Profiles: []configv1.KubeSchedulerProfile{{
 | |
| 			SchedulerName: pointer.String(v1.DefaultSchedulerName),
 | |
| 			Plugins: &configv1.Plugins{
 | |
| 				MultiPoint: configv1.PluginSet{
 | |
| 					Enabled: []configv1.Plugin{
 | |
| 						{Name: fakePermitPluginName},
 | |
| 					},
 | |
| 				},
 | |
| 			},
 | |
| 		}}})
 | |
| 
 | |
| 	// Use zero backoff seconds to bypass backoffQ.
 | |
| 	testCtx := testutils.InitTestSchedulerWithOptions(
 | |
| 		t,
 | |
| 		testutils.InitTestAPIServer(t, "core-res-enqueue", nil),
 | |
| 		0,
 | |
| 		scheduler.WithPodInitialBackoffSeconds(0),
 | |
| 		scheduler.WithPodMaxBackoffSeconds(0),
 | |
| 		scheduler.WithProfiles(cfg.Profiles...),
 | |
| 		scheduler.WithFrameworkOutOfTreeRegistry(registry),
 | |
| 	)
 | |
| 	testutils.SyncSchedulerInformerFactory(testCtx)
 | |
| 
 | |
| 	go testCtx.Scheduler.Run(testCtx.Ctx)
 | |
| 
 | |
| 	cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx
 | |
| 	node := st.MakeNode().Name("fake-node").Obj()
 | |
| 	if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
 | |
| 		t.Fatalf("Failed to create Node %q: %v", node.Name, err)
 | |
| 	}
 | |
| 	// create a pod.
 | |
| 	pod := st.MakePod().Namespace(ns).Name("pod-1").Container(imageutils.GetPauseImageName()).Obj()
 | |
| 	if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil {
 | |
| 		t.Fatalf("Failed to create Pod %q: %v", pod.Name, err)
 | |
| 	}
 | |
| 
 | |
| 	// update node label. (causes the NodeUpdate event)
 | |
| 	node.Labels = map[string]string{"updated": ""}
 | |
| 	if _, err := cs.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}); err != nil {
 | |
| 		t.Fatalf("Failed to add labels to the node: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	// create a pod to increment the scheduling cycle number in the scheduling queue.
 | |
| 	// We can make sure NodeUpdate event, that has happened in the previous scheduling cycle, makes Pod to be enqueued to activeQ via the scheduling queue.
 | |
| 	pod = st.MakePod().Namespace(ns).Name("pod-2").Container(imageutils.GetPauseImageName()).Obj()
 | |
| 	if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil {
 | |
| 		t.Fatalf("Failed to create Pod %q: %v", pod.Name, err)
 | |
| 	}
 | |
| 
 | |
| 	// reject pod-1 to simulate the failure in Permit plugins.
 | |
| 	// This pod-1 should be enqueued to activeQ because the NodeUpdate event has happened.
 | |
| 	fakePermit.frameworkHandler.IterateOverWaitingPods(func(wp framework.WaitingPod) {
 | |
| 		if wp.GetPod().Name == "pod-1" {
 | |
| 			wp.Reject(fakePermitPluginName, "fakePermitPlugin rejects the Pod")
 | |
| 			return
 | |
| 		}
 | |
| 	})
 | |
| 
 | |
| 	// Wait for pod-2 to be scheduled.
 | |
| 	err := wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (done bool, err error) {
 | |
| 		fakePermit.frameworkHandler.IterateOverWaitingPods(func(wp framework.WaitingPod) {
 | |
| 			if wp.GetPod().Name == "pod-2" {
 | |
| 				wp.Allow(fakePermitPluginName)
 | |
| 			}
 | |
| 		})
 | |
| 
 | |
| 		return testutils.PodScheduled(cs, ns, "pod-2")(ctx)
 | |
| 	})
 | |
| 	if err != nil {
 | |
| 		t.Fatalf("Expect pod-2 to be scheduled")
 | |
| 	}
 | |
| 
 | |
| 	err = wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (done bool, err error) {
 | |
| 		pod1Found := false
 | |
| 		fakePermit.frameworkHandler.IterateOverWaitingPods(func(wp framework.WaitingPod) {
 | |
| 			if wp.GetPod().Name == "pod-1" {
 | |
| 				pod1Found = true
 | |
| 				wp.Allow(fakePermitPluginName)
 | |
| 			}
 | |
| 		})
 | |
| 		return pod1Found, nil
 | |
| 	})
 | |
| 	if err != nil {
 | |
| 		t.Fatal("Expect pod-1 to be scheduled again")
 | |
| 	}
 | |
| 
 | |
| 	if queueingHintCalledCounter != 1 {
 | |
| 		t.Fatalf("Expected the scheduling hint to be called 1 time, but %v", queueingHintCalledCounter)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| type fakePermitPlugin struct {
 | |
| 	frameworkHandler framework.Handle
 | |
| 	schedulingHint   framework.QueueingHintFn
 | |
| }
 | |
| 
 | |
| const fakePermitPluginName = "fakePermitPlugin"
 | |
| 
 | |
| func (p *fakePermitPlugin) Name() string {
 | |
| 	return fakePermitPluginName
 | |
| }
 | |
| 
 | |
| func (p *fakePermitPlugin) Permit(ctx context.Context, state *framework.CycleState, _ *v1.Pod, _ string) (*framework.Status, time.Duration) {
 | |
| 	return framework.NewStatus(framework.Wait), wait.ForeverTestTimeout
 | |
| }
 | |
| 
 | |
| func (p *fakePermitPlugin) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
 | |
| 	return []framework.ClusterEventWithHint{
 | |
| 		{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.UpdateNodeLabel}, QueueingHintFn: p.schedulingHint},
 | |
| 	}, nil
 | |
| }
 | 
