Merge pull request #132886 from macsko/send_api_calls_through_dispatcher

KEP-5229: Send API calls through dispatcher and cache
This commit is contained in:
Kubernetes Prow Robot
2025-07-28 03:58:28 -07:00
committed by GitHub
34 changed files with 3135 additions and 1841 deletions

View File

@@ -0,0 +1,72 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package apicache
import (
"context"
v1 "k8s.io/api/core/v1"
fwk "k8s.io/kube-scheduler/framework"
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
"k8s.io/kubernetes/pkg/scheduler/framework"
)
// APICache is responsible for sending API calls' requests through scheduling queue or cache.
type APICache struct {
schedulingQueue internalqueue.SchedulingQueue
cache internalcache.Cache
}
func New(schedulingQueue internalqueue.SchedulingQueue, cache internalcache.Cache) *APICache {
return &APICache{
schedulingQueue: schedulingQueue,
cache: cache,
}
}
// PatchPodStatus sends a patch request for a Pod's status through a scheduling queue.
// The patch could be first applied to the cached Pod object and then the API call is executed asynchronously.
// It returns a channel that can be used to wait for the call's completion.
func (c *APICache) PatchPodStatus(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) (<-chan error, error) {
return c.schedulingQueue.PatchPodStatus(pod, condition, nominatingInfo)
}
// BindPod sends a binding request through a cache. The binding could be first applied to the cached Pod object
// and then the API call is executed asynchronously.
// It returns a channel that can be used to wait for the call's completion.
func (c *APICache) BindPod(binding *v1.Binding) (<-chan error, error) {
return c.cache.BindPod(binding)
}
// WaitOnFinish blocks until the result of an API call is sent to the given onFinish channel
// (returned by methods BindPod or PreemptPod).
//
// It returns the error received from the channel.
// It also returns nil if the call was skipped or overwritten,
// as these are considered successful lifecycle outcomes.
func (c *APICache) WaitOnFinish(ctx context.Context, onFinish <-chan error) error {
select {
case err := <-onFinish:
if fwk.IsUnexpectedError(err) {
return err
}
case <-ctx.Done():
return ctx.Err()
}
return nil
}

View File

@@ -65,11 +65,11 @@ func (ad *APIDispatcher) SyncObject(obj metav1.Object) (metav1.Object, error) {
// Run starts the main processing loop of the APIDispatcher, which pops calls
// from the queue and dispatches them to worker goroutines for execution.
func (ad *APIDispatcher) Run(logger klog.Logger) {
go func() {
// Create a new context to allow to cancel the APICalls' execution when the APIDispatcher is closed.
ctx, cancel := context.WithCancel(context.Background())
ad.cancel = cancel
// Create a new context to allow to cancel the APICalls' execution when the APIDispatcher is closed.
ctx, cancel := context.WithCancel(context.Background())
ad.cancel = cancel
go func() {
for {
select {
case <-ctx.Done():

View File

@@ -30,6 +30,7 @@ import (
"k8s.io/klog/v2"
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
"k8s.io/kubernetes/pkg/scheduler/metrics"
)
@@ -41,9 +42,9 @@ var (
// It automatically starts a go routine that manages expiration of assumed pods.
// "ttl" is how long the assumed pod will get expired.
// "ctx" is the context that would close the background goroutine.
func New(ctx context.Context, ttl time.Duration) Cache {
func New(ctx context.Context, ttl time.Duration, apiDispatcher fwk.APIDispatcher) Cache {
logger := klog.FromContext(ctx)
cache := newCache(ctx, ttl, cleanAssumedPeriod)
cache := newCache(ctx, ttl, cleanAssumedPeriod, apiDispatcher)
cache.run(logger)
return cache
}
@@ -76,6 +77,10 @@ type cacheImpl struct {
nodeTree *nodeTree
// A map from image name to its ImageStateSummary.
imageStates map[string]*fwk.ImageStateSummary
// apiDispatcher is used for the methods that are expected to send API calls.
// It's non-nil only if the SchedulerAsyncAPICalls feature gate is enabled.
apiDispatcher fwk.APIDispatcher
}
type podState struct {
@@ -87,18 +92,19 @@ type podState struct {
bindingFinished bool
}
func newCache(ctx context.Context, ttl, period time.Duration) *cacheImpl {
func newCache(ctx context.Context, ttl, period time.Duration, apiDispatcher fwk.APIDispatcher) *cacheImpl {
logger := klog.FromContext(ctx)
return &cacheImpl{
ttl: ttl,
period: period,
stop: ctx.Done(),
nodes: make(map[string]*nodeInfoListItem),
nodeTree: newNodeTree(logger, nil),
assumedPods: sets.New[string](),
podStates: make(map[string]*podState),
imageStates: make(map[string]*fwk.ImageStateSummary),
nodes: make(map[string]*nodeInfoListItem),
nodeTree: newNodeTree(logger, nil),
assumedPods: sets.New[string](),
podStates: make(map[string]*podState),
imageStates: make(map[string]*fwk.ImageStateSummary),
apiDispatcher: apiDispatcher,
}
}
@@ -759,3 +765,17 @@ func (cache *cacheImpl) updateMetrics() {
metrics.CacheSize.WithLabelValues("pods").Set(float64(len(cache.podStates)))
metrics.CacheSize.WithLabelValues("nodes").Set(float64(len(cache.nodes)))
}
// BindPod handles the pod binding by adding a bind API call to the dispatcher.
// This method should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
func (cache *cacheImpl) BindPod(binding *v1.Binding) (<-chan error, error) {
// Don't store anything in the cache, as the pod is already assumed, and in case of a binding failure, it will be forgotten.
onFinish := make(chan error, 1)
err := cache.apiDispatcher.Add(apicalls.Implementations.PodBinding(binding), fwk.APICallOptions{
OnFinish: onFinish,
})
if fwk.IsUnexpectedError(err) {
return onFinish, err
}
return onFinish, nil
}

View File

@@ -235,7 +235,7 @@ func TestAssumePodScheduled(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, time.Second, time.Second)
cache := newCache(ctx, time.Second, time.Second, nil)
for _, pod := range tc.pods {
if err := cache.AssumePod(logger, pod); err != nil {
t.Fatalf("AssumePod failed: %v", err)
@@ -354,7 +354,7 @@ func TestExpirePod(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, tc.ttl, time.Second)
cache := newCache(ctx, tc.ttl, time.Second, nil)
for _, pod := range tc.pods {
if err := cache.AssumePod(logger, pod.pod); err != nil {
@@ -415,7 +415,7 @@ func TestAddPodWillConfirm(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, ttl, time.Second)
cache := newCache(ctx, ttl, time.Second, nil)
for _, podToAssume := range test.podsToAssume {
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
t.Fatalf("assumePod failed: %v", err)
@@ -458,7 +458,7 @@ func TestDump(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, ttl, time.Second)
cache := newCache(ctx, ttl, time.Second, nil)
for _, podToAssume := range test.podsToAssume {
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
t.Errorf("assumePod failed: %v", err)
@@ -526,7 +526,7 @@ func TestAddPodAlwaysUpdatesPodInfoInNodeInfo(t *testing.T) {
},
}
cache := newCache(ctx, ttl, time.Second)
cache := newCache(ctx, ttl, time.Second, nil)
for _, podToAssume := range test.podsToAssume {
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
t.Fatalf("assumePod failed: %v", err)
@@ -585,7 +585,7 @@ func TestAddPodWillReplaceAssumed(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, ttl, time.Second)
cache := newCache(ctx, ttl, time.Second, nil)
for _, podToAssume := range test.podsToAssume {
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
t.Fatalf("assumePod failed: %v", err)
@@ -638,7 +638,7 @@ func TestAddPodAfterExpiration(t *testing.T) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
now := time.Now()
cache := newCache(ctx, ttl, time.Second)
cache := newCache(ctx, ttl, time.Second, nil)
if err := assumeAndFinishBinding(logger, cache, test.pod, now); err != nil {
t.Fatalf("assumePod failed: %v", err)
}
@@ -703,7 +703,7 @@ func TestUpdatePod(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, ttl, time.Second)
cache := newCache(ctx, ttl, time.Second, nil)
for _, podToAdd := range test.podsToAdd {
if err := cache.AddPod(logger, podToAdd); err != nil {
t.Fatalf("AddPod failed: %v", err)
@@ -765,7 +765,7 @@ func TestUpdatePodAndGet(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, ttl, time.Second)
cache := newCache(ctx, ttl, time.Second, nil)
// trying to get an unknown pod should return an error
// podToUpdate has not been added yet
if _, err := cache.GetPod(tc.podToUpdate); err == nil {
@@ -848,7 +848,7 @@ func TestExpireAddUpdatePod(t *testing.T) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
now := time.Now()
cache := newCache(ctx, ttl, time.Second)
cache := newCache(ctx, ttl, time.Second, nil)
for _, podToAssume := range test.podsToAssume {
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
t.Fatalf("assumePod failed: %v", err)
@@ -909,7 +909,7 @@ func TestEphemeralStorageResource(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, time.Second, time.Second)
cache := newCache(ctx, time.Second, time.Second, nil)
if err := cache.AddPod(logger, test.pod); err != nil {
t.Fatalf("AddPod failed: %v", err)
}
@@ -963,7 +963,7 @@ func TestRemovePod(t *testing.T) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
nodeName := pod.Spec.NodeName
cache := newCache(ctx, time.Second, time.Second)
cache := newCache(ctx, time.Second, time.Second, nil)
// Add/Assume pod succeeds even before adding the nodes.
if tt.assume {
if err := cache.AddPod(logger, pod); err != nil {
@@ -1013,7 +1013,7 @@ func TestForgetPod(t *testing.T) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, ttl, time.Second)
cache := newCache(ctx, ttl, time.Second, nil)
for _, pod := range pods {
if err := assumeAndFinishBinding(logger, cache, pod, now); err != nil {
t.Fatalf("assumePod failed: %v", err)
@@ -1226,7 +1226,7 @@ func TestNodeOperators(t *testing.T) {
imageStates := buildImageStates(tc.nodes)
expected := buildNodeInfo(node, tc.pods, imageStates)
cache := newCache(ctx, time.Second, time.Second)
cache := newCache(ctx, time.Second, time.Second, nil)
for _, nodeItem := range tc.nodes {
cache.AddNode(logger, nodeItem)
}
@@ -1720,7 +1720,7 @@ func TestSchedulerCache_UpdateSnapshot(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache = newCache(ctx, time.Second, time.Second)
cache = newCache(ctx, time.Second, time.Second, nil)
snapshot = NewEmptySnapshot()
for _, op := range test.operations {
@@ -1954,7 +1954,7 @@ func TestSchedulerCache_updateNodeInfoSnapshotList(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache = newCache(ctx, time.Second, time.Second)
cache = newCache(ctx, time.Second, time.Second, nil)
snapshot = NewEmptySnapshot()
test.operations(t)
@@ -2060,7 +2060,7 @@ func setupCacheOf1kNodes30kPods(b *testing.B) Cache {
logger, ctx := ktesting.NewTestContext(b)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := newCache(ctx, time.Second, time.Second)
cache := newCache(ctx, time.Second, time.Second, nil)
for i := 0; i < 1000; i++ {
nodeName := fmt.Sprintf("node-%d", i)
cache.AddNode(logger, st.MakeNode().Name(nodeName).Obj())
@@ -2081,7 +2081,7 @@ func setupCacheWithAssumedPods(b *testing.B, podNum int, assumedTime time.Time)
ctx, cancel := context.WithCancel(ctx)
addedNodes := make(map[string]struct{})
defer cancel()
cache := newCache(ctx, time.Second, time.Second)
cache := newCache(ctx, time.Second, time.Second, nil)
for i := 0; i < podNum; i++ {
nodeName := fmt.Sprintf("node-%d", i/10)
if _, ok := addedNodes[nodeName]; !ok {

View File

@@ -20,74 +20,47 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
"k8s.io/kubernetes/pkg/scheduler/framework"
)
// Cache is used for testing
type Cache struct {
internalcache.Cache
AssumeFunc func(*v1.Pod)
ForgetFunc func(*v1.Pod)
IsAssumedPodFunc func(*v1.Pod) bool
GetPodFunc func(*v1.Pod) *v1.Pod
}
// AssumePod is a fake method for testing.
// AssumePod allows to mock this method for testing.
func (c *Cache) AssumePod(logger klog.Logger, pod *v1.Pod) error {
c.AssumeFunc(pod)
return nil
if c.AssumeFunc != nil {
c.AssumeFunc(pod)
return nil
}
return c.Cache.AssumePod(logger, pod)
}
// FinishBinding is a fake method for testing.
func (c *Cache) FinishBinding(logger klog.Logger, pod *v1.Pod) error { return nil }
// ForgetPod is a fake method for testing.
// ForgetPod allows to mock this method for testing.
func (c *Cache) ForgetPod(logger klog.Logger, pod *v1.Pod) error {
c.ForgetFunc(pod)
return nil
if c.ForgetFunc != nil {
c.ForgetFunc(pod)
return nil
}
return c.Cache.ForgetPod(logger, pod)
}
// AddPod is a fake method for testing.
func (c *Cache) AddPod(logger klog.Logger, pod *v1.Pod) error { return nil }
// UpdatePod is a fake method for testing.
func (c *Cache) UpdatePod(logger klog.Logger, oldPod, newPod *v1.Pod) error { return nil }
// RemovePod is a fake method for testing.
func (c *Cache) RemovePod(logger klog.Logger, pod *v1.Pod) error { return nil }
// IsAssumedPod is a fake method for testing.
// IsAssumedPod allows to mock this method for testing.
func (c *Cache) IsAssumedPod(pod *v1.Pod) (bool, error) {
return c.IsAssumedPodFunc(pod), nil
if c.IsAssumedPodFunc != nil {
return c.IsAssumedPodFunc(pod), nil
}
return c.Cache.IsAssumedPod(pod)
}
// GetPod is a fake method for testing.
// GetPod allows to mock this method for testing.
func (c *Cache) GetPod(pod *v1.Pod) (*v1.Pod, error) {
return c.GetPodFunc(pod), nil
}
// AddNode is a fake method for testing.
func (c *Cache) AddNode(logger klog.Logger, node *v1.Node) *framework.NodeInfo { return nil }
// UpdateNode is a fake method for testing.
func (c *Cache) UpdateNode(logger klog.Logger, oldNode, newNode *v1.Node) *framework.NodeInfo {
return nil
}
// RemoveNode is a fake method for testing.
func (c *Cache) RemoveNode(logger klog.Logger, node *v1.Node) error { return nil }
// UpdateSnapshot is a fake method for testing.
func (c *Cache) UpdateSnapshot(logger klog.Logger, snapshot *internalcache.Snapshot) error {
return nil
}
// NodeCount is a fake method for testing.
func (c *Cache) NodeCount() int { return 0 }
// PodCount is a fake method for testing.
func (c *Cache) PodCount() (int, error) { return 0, nil }
// Dump is a fake method for testing.
func (c *Cache) Dump() *internalcache.Dump {
return &internalcache.Dump{}
if c.GetPodFunc != nil {
return c.GetPodFunc(pod), nil
}
return c.Cache.GetPod(pod)
}

View File

@@ -114,6 +114,10 @@ type Cache interface {
// Dump produces a dump of the current cache.
Dump() *Dump
// BindPod handles the pod binding by adding a bind API call to the dispatcher.
// This method should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
BindPod(binding *v1.Binding) (<-chan error, error)
}
// Dump is a dump of the cache state.

View File

@@ -49,6 +49,7 @@ import (
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/backend/heap"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread"
"k8s.io/kubernetes/pkg/scheduler/metrics"
@@ -128,6 +129,10 @@ type SchedulingQueue interface {
// Run starts the goroutines managing the queue.
Run(logger klog.Logger)
// PatchPodStatus handles the pod status update by sending an update API call through API dispatcher.
// This method should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
PatchPodStatus(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) (<-chan error, error)
// The following functions are supposed to be used only for testing or debugging.
GetPod(name, namespace string) (*framework.QueuedPodInfo, bool)
PendingPods() ([]*v1.Pod, string)
@@ -193,6 +198,10 @@ type PriorityQueue struct {
// pluginMetricsSamplePercent is the percentage of plugin metrics to be sampled.
pluginMetricsSamplePercent int
// apiDispatcher is used for the methods that are expected to send API calls.
// It's non-nil only if the SchedulerAsyncAPICalls feature gate is enabled.
apiDispatcher fwk.APIDispatcher
// isSchedulingQueueHintEnabled indicates whether the feature gate for the scheduling queue is enabled.
isSchedulingQueueHintEnabled bool
// isPopFromBackoffQEnabled indicates whether the feature gate SchedulerPopFromBackoffQ is enabled.
@@ -224,6 +233,7 @@ type priorityQueueOptions struct {
pluginMetricsSamplePercent int
preEnqueuePluginMap map[string]map[string]framework.PreEnqueuePlugin
queueingHintMap QueueingHintMapPerProfile
apiDispatcher fwk.APIDispatcher
}
// Option configures a PriorityQueue
@@ -298,6 +308,13 @@ func WithPluginMetricsSamplePercent(percent int) Option {
}
}
// WithAPIDispatcher sets the API dispatcher.
func WithAPIDispatcher(apiDispatcher fwk.APIDispatcher) Option {
return func(o *priorityQueueOptions) {
o.apiDispatcher = apiDispatcher
}
}
var defaultPriorityQueueOptions = priorityQueueOptions{
clock: clock.RealClock{},
podInitialBackoffDuration: DefaultPodInitialBackoffDuration,
@@ -349,6 +366,7 @@ func NewPriorityQueue(
metricsRecorder: options.metricsRecorder,
pluginMetricsSamplePercent: options.pluginMetricsSamplePercent,
moveRequestCycle: -1,
apiDispatcher: options.apiDispatcher,
isSchedulingQueueHintEnabled: isSchedulingQueueHintEnabled,
isPopFromBackoffQEnabled: isPopFromBackoffQEnabled,
}
@@ -1307,6 +1325,20 @@ func (p *PriorityQueue) PendingPods() ([]*v1.Pod, string) {
return result, fmt.Sprintf(pendingPodsSummary, activeQLen, backoffQLen, len(p.unschedulablePods.podInfoMap))
}
// PatchPodStatus handles the pod status update by sending an update API call through API dispatcher.
// This method should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
func (p *PriorityQueue) PatchPodStatus(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) (<-chan error, error) {
// Don't store anything in the cache. This might be extended in the next releases.
onFinish := make(chan error, 1)
err := p.apiDispatcher.Add(apicalls.Implementations.PodStatusPatch(pod, condition, nominatingInfo), fwk.APICallOptions{
OnFinish: onFinish,
})
if fwk.IsUnexpectedError(err) {
return onFinish, err
}
return onFinish, nil
}
// Note: this function assumes the caller locks both p.lock.RLock and p.activeQ.getLock().RLock.
func (p *PriorityQueue) nominatedPodToInfo(np podRef, unlockedActiveQ unlockedActiveQueueReader) *framework.PodInfo {
pod := np.toPod()

View File

@@ -136,6 +136,20 @@ func (sched *Scheduler) addPodToSchedulingQueue(obj interface{}) {
sched.SchedulingQueue.Add(logger, pod)
}
func (sched *Scheduler) syncPodWithDispatcher(pod *v1.Pod) *v1.Pod {
enrichedObj, err := sched.APIDispatcher.SyncObject(pod)
if err != nil {
utilruntime.HandleError(fmt.Errorf("failed to sync pod %s/%s with API dispatcher: %w", pod.Namespace, pod.Name, err))
return pod
}
enrichedPod, ok := enrichedObj.(*v1.Pod)
if !ok {
utilruntime.HandleError(fmt.Errorf("cannot convert enrichedObj of type %T to *v1.Pod", enrichedObj))
return pod
}
return enrichedPod
}
func (sched *Scheduler) updatePodInSchedulingQueue(oldObj, newObj interface{}) {
start := time.Now()
logger := sched.logger
@@ -153,6 +167,12 @@ func (sched *Scheduler) updatePodInSchedulingQueue(oldObj, newObj interface{}) {
}
}
if sched.APIDispatcher != nil {
// If the API dispatcher is available, sync the new pod with the details.
// However, at the moment the updated newPod is discarded and this logic will be handled in the future releases.
_ = sched.syncPodWithDispatcher(newPod)
}
isAssumed, err := sched.Cache.IsAssumedPod(newPod)
if err != nil {
utilruntime.HandleErrorWithLogger(logger, err, "Failed to check whether pod is assumed", "pod", klog.KObj(newPod))
@@ -274,6 +294,12 @@ func (sched *Scheduler) updatePodInCache(oldObj, newObj interface{}) {
return
}
if sched.APIDispatcher != nil {
// If the API dispatcher is available, sync the new pod with the details.
// However, at the moment the updated newPod is discarded and this logic will be handled in the future releases.
_ = sched.syncPodWithDispatcher(newPod)
}
logger.V(4).Info("Update event for scheduled pod", "pod", klog.KObj(oldPod))
if err := sched.Cache.UpdatePod(logger, oldPod, newPod); err != nil {
utilruntime.HandleErrorWithLogger(logger, err, "Scheduler cache UpdatePod failed", "pod", klog.KObj(oldPod))

View File

@@ -32,26 +32,26 @@ import (
resourcealphaapi "k8s.io/api/resource/v1alpha3"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/version"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
resourceslicetracker "k8s.io/dynamic-resource-allocation/resourceslice/tracker"
"k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/dynamic/dynamicinformer"
dyfake "k8s.io/client-go/dynamic/fake"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
featuregatetesting "k8s.io/component-base/featuregate/testing"
resourceslicetracker "k8s.io/dynamic-resource-allocation/resourceslice/tracker"
"k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports"
@@ -162,16 +162,22 @@ func TestEventHandlers_MoveToActiveOnNominatedNodeUpdate(t *testing.T) {
client := fake.NewClientset(objs...)
informerFactory := informers.NewSharedInformerFactory(client, 0)
// apiDispatcher is unused in the test, but intializing it anyway.
apiDispatcher := apidispatcher.New(client, 16, apicalls.Relevances)
apiDispatcher.Run(logger)
defer apiDispatcher.Close()
recorder := metrics.NewMetricsAsyncRecorder(3, 20*time.Microsecond, ctx.Done())
queue := internalqueue.NewPriorityQueue(
newDefaultQueueSort(),
informerFactory,
internalqueue.WithMetricsRecorder(*recorder),
internalqueue.WithQueueingHintMapPerProfile(queueingHintMap),
internalqueue.WithAPIDispatcher(apiDispatcher),
// disable backoff queue
internalqueue.WithPodInitialBackoffDuration(0),
internalqueue.WithPodMaxBackoffDuration(0))
schedulerCache := internalcache.New(ctx, 30*time.Second)
schedulerCache := internalcache.New(ctx, 30*time.Second, nil)
// Put test pods into unschedulable queue
for _, pod := range unschedulablePods {
@@ -186,7 +192,7 @@ func TestEventHandlers_MoveToActiveOnNominatedNodeUpdate(t *testing.T) {
}
}
s, _, err := initScheduler(ctx, schedulerCache, queue, client, informerFactory)
s, _, err := initScheduler(ctx, schedulerCache, queue, apiDispatcher, client, informerFactory)
if err != nil {
t.Fatalf("Failed to initialize test scheduler: %v", err)
}
@@ -242,7 +248,7 @@ func TestUpdatePodInCache(t *testing.T) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
sched := &Scheduler{
Cache: internalcache.New(ctx, ttl),
Cache: internalcache.New(ctx, ttl, nil),
SchedulingQueue: internalqueue.NewTestQueue(ctx, nil),
logger: logger,
}

View File

@@ -333,7 +333,7 @@ func TestSchedulerWithExtenders(t *testing.T) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
cache := internalcache.New(ctx, time.Duration(0))
cache := internalcache.New(ctx, time.Duration(0), nil)
for _, name := range test.nodes {
cache.AddNode(logger, createNode(name))
}

View File

@@ -0,0 +1,45 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package apicalls
import (
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework"
)
const (
PodStatusPatch fwk.APICallType = "pod_status_patch"
PodBinding fwk.APICallType = "pod_binding"
)
// Relevances is a built-in mapping types to relevances.
// Types of the same relevance should only be defined for different object types.
// Misconfiguration of this map can lead to unexpected system bahavior,
// so any change has to be well tested and done with care.
// This mapping can be replaced by the out-of-tree plugin in its init() function, if needed.
var Relevances = fwk.APICallRelevances{
PodStatusPatch: 1,
PodBinding: 2,
}
// Implementation is a built-in mapping types to calls' constructors.
// It's used to construct calls' objects in the scheduler framework and for easier replacement of those.
// This mapping can be replaced by the out-of-tree plugin in its init() function, if needed.
var Implementations = framework.APICallImplementations[*PodStatusPatchCall, *PodBindingCall]{
PodStatusPatch: NewPodStatusPatchCall,
PodBinding: NewPodBindingCall,
}

View File

@@ -0,0 +1,68 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package apicalls
import (
"context"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
fwk "k8s.io/kube-scheduler/framework"
)
// PodBindingCall is used to bind the pod using the binding details.
type PodBindingCall struct {
binding *v1.Binding
}
func NewPodBindingCall(binding *v1.Binding) *PodBindingCall {
return &PodBindingCall{
binding: binding,
}
}
func (pbc *PodBindingCall) CallType() fwk.APICallType {
return PodBinding
}
func (pbc *PodBindingCall) UID() types.UID {
return pbc.binding.UID
}
func (pbc *PodBindingCall) Execute(ctx context.Context, client clientset.Interface) error {
logger := klog.FromContext(ctx)
logger.V(3).Info("Attempting to bind pod to node", "pod", klog.KObj(&pbc.binding.ObjectMeta), "node", pbc.binding.Target.Name)
return client.CoreV1().Pods(pbc.binding.Namespace).Bind(ctx, pbc.binding, metav1.CreateOptions{})
}
func (pbc *PodBindingCall) Sync(obj metav1.Object) (metav1.Object, error) {
// Don't need to store or update an object.
return obj, nil
}
func (pbc *PodBindingCall) Merge(oldCall fwk.APICall) error {
// Merge should just overwrite the previous call.
return nil
}
func (pbc *PodBindingCall) IsNoOp() bool {
return false
}

View File

@@ -0,0 +1,66 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package apicalls
import (
"testing"
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
clienttesting "k8s.io/client-go/testing"
"k8s.io/klog/v2/ktesting"
)
func TestPodBindingCall_Execute(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
binding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{
Name: "pod",
Namespace: "ns",
},
Target: v1.ObjectReference{
Name: "node",
},
}
client := fake.NewClientset()
bound := false
client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
createAction := action.(clienttesting.CreateActionImpl)
if createAction.Subresource != "binding" {
return false, nil, nil
}
bound = true
gotBinding := createAction.GetObject().(*v1.Binding)
if diff := cmp.Diff(binding, gotBinding); diff != "" {
t.Errorf("Execute() sent incorrect binding object (-want,+got):\n%s", diff)
}
return true, nil, nil
})
call := NewPodBindingCall(binding)
if err := call.Execute(ctx, client); err != nil {
t.Fatalf("Execute() returned unexpected error: %v", err)
}
if !bound {
t.Error("Expected binding API to be called")
}
}

View File

@@ -0,0 +1,183 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package apicalls
import (
"context"
"fmt"
"sync"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
fwk "k8s.io/kube-scheduler/framework"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/util"
)
// PodStatusPatchCall is used to patch the pod status.
type PodStatusPatchCall struct {
lock sync.Mutex
// executed is set at the beginning of the call's Execute
// and is used by Sync to know if the podStatus should be updated.
executed bool
// podUID is an UID of the pod.
podUID types.UID
// podRef is a reference to the pod.
podRef klog.ObjectRef
// podStatus contains the actual status of the pod.
podStatus *v1.PodStatus
// newCondition is a condition to update.
newCondition *v1.PodCondition
// nominatingInfo is a nominating info to update.
nominatingInfo *framework.NominatingInfo
}
func NewPodStatusPatchCall(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) *PodStatusPatchCall {
return &PodStatusPatchCall{
podUID: pod.UID,
podRef: klog.KObj(pod),
podStatus: pod.Status.DeepCopy(),
newCondition: condition,
nominatingInfo: nominatingInfo,
}
}
func (psuc *PodStatusPatchCall) CallType() fwk.APICallType {
return PodStatusPatch
}
func (psuc *PodStatusPatchCall) UID() types.UID {
return psuc.podUID
}
// syncStatus syncs the given status with condition and nominatingInfo. It returns true if anything was actually updated.
func syncStatus(status *v1.PodStatus, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) bool {
nnnNeedsUpdate := nominatingInfo.Mode() == framework.ModeOverride && status.NominatedNodeName != nominatingInfo.NominatedNodeName
if condition != nil {
if !podutil.UpdatePodCondition(status, condition) && !nnnNeedsUpdate {
return false
}
} else if !nnnNeedsUpdate {
return false
}
if nnnNeedsUpdate {
status.NominatedNodeName = nominatingInfo.NominatedNodeName
}
return true
}
func (psuc *PodStatusPatchCall) Execute(ctx context.Context, client clientset.Interface) error {
psuc.lock.Lock()
// Executed flag is set not to race with podStatus write in Sync afterwards.
psuc.executed = true
condition := psuc.newCondition.DeepCopy()
podStatusCopy := psuc.podStatus.DeepCopy()
psuc.lock.Unlock()
logger := klog.FromContext(ctx)
if condition != nil {
logger.V(3).Info("Updating pod condition", "pod", psuc.podRef, "conditionType", condition.Type, "conditionStatus", condition.Status, "conditionReason", condition.Reason)
}
// Sync status to have the condition and nominatingInfo applied on a podStatusCopy.
synced := syncStatus(podStatusCopy, condition, psuc.nominatingInfo)
if !synced {
logger.V(5).Info("Pod status patch call does not need to be executed because it has no effect", "pod", psuc.podRef)
return nil
}
// It's safe to run PatchPodStatus even on outdated pod object.
err := util.PatchPodStatus(ctx, client, psuc.podRef.Name, psuc.podRef.Namespace, psuc.podStatus, podStatusCopy)
if err != nil {
logger.Error(err, "Failed to patch pod status", "pod", psuc.podRef)
return err
}
return nil
}
func (psuc *PodStatusPatchCall) Sync(obj metav1.Object) (metav1.Object, error) {
pod, ok := obj.(*v1.Pod)
if !ok {
return obj, fmt.Errorf("unexpected error: object of type %T is not of type *v1.Pod", obj)
}
psuc.lock.Lock()
if !psuc.executed {
// Set podStatus only if the call execution haven't started yet,
// because otherwise it's irrelevant and might race.
psuc.podStatus = pod.Status.DeepCopy()
}
psuc.lock.Unlock()
podCopy := pod.DeepCopy()
// Sync passed pod's status with the call's condition and nominatingInfo.
synced := syncStatus(&podCopy.Status, psuc.newCondition, psuc.nominatingInfo)
if !synced {
return pod, nil
}
return podCopy, nil
}
func (psuc *PodStatusPatchCall) Merge(oldCall fwk.APICall) error {
oldPsuc, ok := oldCall.(*PodStatusPatchCall)
if !ok {
return fmt.Errorf("unexpected error: call of type %T is not of type *PodStatusPatchCall", oldCall)
}
if psuc.nominatingInfo.Mode() == framework.ModeNoop && oldPsuc.nominatingInfo.Mode() == framework.ModeOverride {
// Set a nominatingInfo from an old call if the new one is no-op.
psuc.nominatingInfo = oldPsuc.nominatingInfo
}
if psuc.newCondition == nil && oldPsuc.newCondition != nil {
// Set a condition from an old call if the new one is nil.
psuc.newCondition = oldPsuc.newCondition
}
return nil
}
// conditionNeedsUpdate checks if the pod condition needs update.
func conditionNeedsUpdate(status *v1.PodStatus, condition *v1.PodCondition) bool {
// Try to find this pod condition.
_, oldCondition := podutil.GetPodCondition(status, condition.Type)
if oldCondition == nil {
return true
}
isEqual := condition.Status == oldCondition.Status &&
condition.Reason == oldCondition.Reason &&
condition.Message == oldCondition.Message &&
condition.LastProbeTime.Equal(&oldCondition.LastProbeTime)
// Return true if one of the fields have changed.
return !isEqual
}
func (psuc *PodStatusPatchCall) IsNoOp() bool {
nnnNeedsUpdate := psuc.nominatingInfo.Mode() == framework.ModeOverride && psuc.podStatus.NominatedNodeName != psuc.nominatingInfo.NominatedNodeName
if nnnNeedsUpdate {
return false
}
if psuc.newCondition == nil {
return true
}
return !conditionNeedsUpdate(psuc.podStatus, psuc.newCondition)
}

View File

@@ -0,0 +1,250 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package apicalls
import (
"testing"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
clienttesting "k8s.io/client-go/testing"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/scheduler/framework"
)
func TestPodStatusPatchCall_IsNoOp(t *testing.T) {
podWithNode := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "uid",
},
Status: v1.PodStatus{
NominatedNodeName: "node-a",
Conditions: []v1.PodCondition{
{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
},
},
},
}
tests := []struct {
name string
pod *v1.Pod
condition *v1.PodCondition
nominatingInfo *framework.NominatingInfo
want bool
}{
{
name: "No-op when condition and node name match",
pod: podWithNode,
condition: &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride},
want: true,
},
{
name: "Not no-op when condition is different",
pod: podWithNode,
condition: &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionTrue},
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride},
want: false,
},
{
name: "Not no-op when nominated node name is different",
pod: podWithNode,
condition: &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-b", NominatingMode: framework.ModeOverride},
want: false,
},
{
name: "No-op when condition is nil and node name matches",
pod: podWithNode,
condition: nil,
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride},
want: true,
},
{
name: "Not no-op when condition is nil but node name differs",
pod: podWithNode,
condition: nil,
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-b", NominatingMode: framework.ModeOverride},
want: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
call := NewPodStatusPatchCall(test.pod, test.condition, test.nominatingInfo)
if got := call.IsNoOp(); got != test.want {
t.Errorf("Expected IsNoOp() to return %v, but got %v", test.want, got)
}
})
}
}
func TestPodStatusPatchCall_Merge(t *testing.T) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "uid",
},
}
t.Run("Merges nominating info and condition from the old call", func(t *testing.T) {
oldCall := NewPodStatusPatchCall(pod, &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
&framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride},
)
newCall := NewPodStatusPatchCall(pod, nil, &framework.NominatingInfo{NominatingMode: framework.ModeNoop})
if err := newCall.Merge(oldCall); err != nil {
t.Fatalf("Unexpected error returned by Merge(): %v", err)
}
if newCall.nominatingInfo.NominatedNodeName != "node-a" {
t.Errorf("Expected NominatedNodeName to be node-a, but got: %v", newCall.nominatingInfo.NominatedNodeName)
}
if newCall.newCondition == nil || newCall.newCondition.Type != v1.PodScheduled {
t.Errorf("Expected PodScheduled condition, but got: %v", newCall.newCondition)
}
})
t.Run("Doesn't overwrite nominating info and condition of a new call", func(t *testing.T) {
oldCall := NewPodStatusPatchCall(pod, nil, &framework.NominatingInfo{NominatingMode: framework.ModeNoop})
newCall := NewPodStatusPatchCall(pod, &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
&framework.NominatingInfo{NominatedNodeName: "node-b", NominatingMode: framework.ModeOverride})
if err := newCall.Merge(oldCall); err != nil {
t.Fatalf("Unexpected error returned by Merge(): %v", err)
}
if newCall.nominatingInfo.NominatedNodeName != "node-b" {
t.Errorf("Expected NominatedNodeName to be node-b, but got: %v", newCall.nominatingInfo.NominatedNodeName)
}
if newCall.newCondition == nil || newCall.newCondition.Type != v1.PodScheduled {
t.Errorf("Expected PodScheduled condition, but got: %v", newCall.newCondition)
}
})
}
func TestPodStatusPatchCall_Sync(t *testing.T) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "uid",
},
Status: v1.PodStatus{
NominatedNodeName: "node-a",
Conditions: []v1.PodCondition{
{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
},
},
},
}
t.Run("Syncs the status before execution and updates the pod", func(t *testing.T) {
call := NewPodStatusPatchCall(pod, nil,
&framework.NominatingInfo{NominatedNodeName: "node-c", NominatingMode: framework.ModeOverride})
updatedPod := pod.DeepCopy()
updatedPod.Status.NominatedNodeName = "node-b"
syncedObj, err := call.Sync(updatedPod)
if err != nil {
t.Fatalf("Unexpected error returned by Sync(): %v", err)
}
if call.podStatus.NominatedNodeName != "node-b" {
t.Errorf("Expected podStatus NominatedNodeName to be node-b, but got: %v", call.podStatus.NominatedNodeName)
}
syncedPod := syncedObj.(*v1.Pod)
if syncedPod.Status.NominatedNodeName != "node-c" {
t.Errorf("Expected synced pod's NominatedNodeName to be node-c, but got: %v", syncedPod.Status.NominatedNodeName)
}
})
t.Run("Doesn't sync internal status during or after execution, but updates the pod", func(t *testing.T) {
call := NewPodStatusPatchCall(pod, nil,
&framework.NominatingInfo{NominatedNodeName: "node-c", NominatingMode: framework.ModeOverride})
call.executed = true
updatedPod := pod.DeepCopy()
updatedPod.Status.NominatedNodeName = "node-b"
syncedObj, err := call.Sync(updatedPod)
if err != nil {
t.Fatalf("Unexpected error returned by Sync(): %v", err)
}
if call.podStatus.NominatedNodeName != "node-a" {
t.Errorf("Expected podStatus NominatedNodeName to be node-a, but got: %v", call.podStatus.NominatedNodeName)
}
syncedPod := syncedObj.(*v1.Pod)
if syncedPod.Status.NominatedNodeName != "node-c" {
t.Errorf("Expected synced pod's NominatedNodeName to be node-c, but got: %v", syncedPod.Status.NominatedNodeName)
}
})
}
func TestPodStatusPatchCall_Execute(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "uid",
Name: "pod",
Namespace: "ns",
},
Status: v1.PodStatus{
NominatedNodeName: "node-a",
},
}
t.Run("Successful patch", func(t *testing.T) {
client := fake.NewClientset()
patched := false
client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
patched = true
return true, nil, nil
})
call := NewPodStatusPatchCall(pod, &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
&framework.NominatingInfo{NominatingMode: framework.ModeNoop})
if err := call.Execute(ctx, client); err != nil {
t.Fatalf("Unexpected error returned by Execute(): %v", err)
}
if !patched {
t.Error("Expected patch API to be called")
}
if !call.executed {
t.Error("Expected 'executed' flag to be set during execution")
}
})
t.Run("Skip API call if patch is not needed", func(t *testing.T) {
client := fake.NewClientset()
patched := false
client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
patched = true
return true, nil, nil
})
noOpCall := NewPodStatusPatchCall(pod, nil,
&framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride})
if err := noOpCall.Execute(ctx, client); err != nil {
t.Fatalf("Unexpected error returned by Execute(): %v", err)
}
if patched {
t.Error("Expected patch API not to be called if the call is no-op")
}
})
}

View File

@@ -579,6 +579,8 @@ type Framework interface {
SetPodNominator(nominator PodNominator)
// SetPodActivator sets the PodActivator
SetPodActivator(activator PodActivator)
// SetAPICacher sets the APICacher
SetAPICacher(apiCacher APICacher)
// Close calls Close method of each plugin.
Close() error
@@ -642,6 +644,49 @@ type Handle interface {
// Parallelizer returns a parallelizer holding parallelism for scheduler.
Parallelizer() parallelize.Parallelizer
// APIDispatcher returns a fwk.APIDispatcher that can be used to dispatch API calls directly.
// This is non-nil if the SchedulerAsyncAPICalls feature gate is enabled.
APIDispatcher() fwk.APIDispatcher
// APICacher returns an APICacher that coordinates API calls with the scheduler's internal cache.
// Use this to ensure the scheduler's view of the cluster remains consistent.
// This is non-nil if the SchedulerAsyncAPICalls feature gate is enabled.
APICacher() APICacher
}
// APICacher defines methods that send API calls through the scheduler's cache
// before they are executed asynchronously by the APIDispatcher.
// This ensures the scheduler's internal state is updated optimistically,
// reflecting the intended outcome of the call.
// This methods should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
type APICacher interface {
// PatchPodStatus sends a patch request for a Pod's status.
// The patch could be first applied to the cached Pod object and then the API call is executed asynchronously.
// It returns a channel that can be used to wait for the call's completion.
PatchPodStatus(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *NominatingInfo) (<-chan error, error)
// BindPod sends a binding request. The binding could be first applied to the cached Pod object
// and then the API call is executed asynchronously.
// It returns a channel that can be used to wait for the call's completion.
BindPod(binding *v1.Binding) (<-chan error, error)
// WaitOnFinish blocks until the result of an API call is sent to the given onFinish channel
// (returned by methods BindPod or PreemptPod).
//
// It returns the error received from the channel.
// It also returns nil if the call was skipped or overwritten,
// as these are considered successful lifecycle outcomes.
// Direct onFinish channel read can be used to access these results.
WaitOnFinish(ctx context.Context, onFinish <-chan error) error
}
// APICallImplementations define constructors for each fwk.APICall that is used by the scheduler internally.
type APICallImplementations[T, K fwk.APICall] struct {
// PodStatusPatch is a constructor used to create fwk.APICall object for pod status patch.
PodStatusPatch func(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *NominatingInfo) T
// PodBinding is a constructor used to create fwk.APICall object for pod binding.
PodBinding func(binding *v1.Binding) K
}
// PreFilterResult wraps needed info for scheduler framework to act upon PreFilter phase.

View File

@@ -51,11 +51,23 @@ func (b DefaultBinder) Name() string {
// Bind binds pods to nodes using the k8s client.
func (b DefaultBinder) Bind(ctx context.Context, state fwk.CycleState, p *v1.Pod, nodeName string) *fwk.Status {
logger := klog.FromContext(ctx)
logger.V(3).Info("Attempting to bind pod to node", "pod", klog.KObj(p), "node", klog.KRef("", nodeName))
binding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Namespace: p.Namespace, Name: p.Name, UID: p.UID},
Target: v1.ObjectReference{Kind: "Node", Name: nodeName},
}
if b.handle.APICacher() != nil {
// When API cacher is available, use it to bind the pod.
onFinish, err := b.handle.APICacher().BindPod(binding)
if err != nil {
return fwk.AsStatus(err)
}
err = b.handle.APICacher().WaitOnFinish(ctx, onFinish)
if err != nil {
return fwk.AsStatus(err)
}
return nil
}
logger.V(3).Info("Attempting to bind pod to node", "pod", klog.KObj(p), "node", klog.KRef("", nodeName))
err := b.handle.ClientSet().CoreV1().Pods(binding.Namespace).Bind(ctx, binding, metav1.CreateOptions{})
if err != nil {
return fwk.AsStatus(err)

View File

@@ -19,7 +19,9 @@ package defaultbinder
import (
"context"
"errors"
"fmt"
"testing"
"time"
"github.com/google/go-cmp/cmp"
@@ -29,10 +31,19 @@ import (
"k8s.io/client-go/kubernetes/fake"
clienttesting "k8s.io/client-go/testing"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
"k8s.io/kubernetes/pkg/scheduler/metrics"
st "k8s.io/kubernetes/pkg/scheduler/testing"
)
func init() {
metrics.Register()
}
func TestDefaultBinder(t *testing.T) {
testPod := st.MakePod().Name("foo").Namespace("ns").Obj()
testNode := "foohost.kubernetes.mydomain.com"
@@ -52,37 +63,51 @@ func TestDefaultBinder(t *testing.T) {
injectErr: errors.New("binding error"),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, tt := range tests {
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", tt.name, asyncAPICallsEnabled), func(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var gotBinding *v1.Binding
client := fake.NewClientset(testPod)
client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
if action.GetSubresource() != "binding" {
return false, nil, nil
var gotBinding *v1.Binding
client := fake.NewClientset(testPod)
client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
if action.GetSubresource() != "binding" {
return false, nil, nil
}
if tt.injectErr != nil {
return true, nil, tt.injectErr
}
gotBinding = action.(clienttesting.CreateAction).GetObject().(*v1.Binding)
return true, gotBinding, nil
})
var apiDispatcher *apidispatcher.APIDispatcher
if asyncAPICallsEnabled {
apiDispatcher = apidispatcher.New(client, 16, apicalls.Relevances)
apiDispatcher.Run(logger)
defer apiDispatcher.Close()
}
if tt.injectErr != nil {
return true, nil, tt.injectErr
fh, err := frameworkruntime.NewFramework(ctx, nil, nil, frameworkruntime.WithClientSet(client), frameworkruntime.WithAPIDispatcher(apiDispatcher))
if err != nil {
t.Fatal(err)
}
if asyncAPICallsEnabled {
cache := internalcache.New(ctx, time.Duration(0), apiDispatcher)
fh.SetAPICacher(apicache.New(nil, cache))
}
binder := &DefaultBinder{handle: fh}
status := binder.Bind(ctx, nil, testPod, testNode)
if got := status.AsError(); (tt.injectErr != nil) != (got != nil) {
t.Errorf("got error %q, want %q", got, tt.injectErr)
}
if diff := cmp.Diff(tt.wantBinding, gotBinding); diff != "" {
t.Errorf("got different binding (-want, +got): %s", diff)
}
gotBinding = action.(clienttesting.CreateAction).GetObject().(*v1.Binding)
return true, gotBinding, nil
})
fh, err := frameworkruntime.NewFramework(ctx, nil, nil, frameworkruntime.WithClientSet(client))
if err != nil {
t.Fatal(err)
}
binder := &DefaultBinder{handle: fh}
status := binder.Bind(ctx, nil, testPod, testNode)
if got := status.AsError(); (tt.injectErr != nil) != (got != nil) {
t.Errorf("got error %q, want %q", got, tt.injectErr)
}
if diff := cmp.Diff(tt.wantBinding, gotBinding); diff != "" {
t.Errorf("got different binding (-want, +got): %s", diff)
}
})
}
}
}

View File

@@ -52,9 +52,12 @@ import (
apipod "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
configv1 "k8s.io/kubernetes/pkg/scheduler/apis/config/v1"
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
@@ -390,80 +393,99 @@ func TestPostFilter(t *testing.T) {
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// index the potential victim pods in the fake client so that the victims deletion logic does not fail
podItems := []v1.Pod{}
for _, pod := range tt.pods {
podItems = append(podItems, *pod)
}
cs := clientsetfake.NewClientset(&v1.PodList{Items: podItems})
informerFactory := informers.NewSharedInformerFactory(cs, 0)
podInformer := informerFactory.Core().V1().Pods().Informer()
podInformer.GetStore().Add(tt.pod)
for i := range tt.pods {
podInformer.GetStore().Add(tt.pods[i])
}
pdbInformer := informerFactory.Policy().V1().PodDisruptionBudgets().Informer()
for i := range tt.pdbs {
if err := pdbInformer.GetStore().Add(tt.pdbs[i]); err != nil {
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, tt := range tests {
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", tt.name, asyncAPICallsEnabled), func(t *testing.T) {
// index the potential victim pods in the fake client so that the victims deletion logic does not fail
podItems := []v1.Pod{}
for _, pod := range tt.pods {
podItems = append(podItems, *pod)
}
cs := clientsetfake.NewClientset(&v1.PodList{Items: podItems})
informerFactory := informers.NewSharedInformerFactory(cs, 0)
podInformer := informerFactory.Core().V1().Pods().Informer()
if err := podInformer.GetStore().Add(tt.pod); err != nil {
t.Fatal(err)
}
}
// Register NodeResourceFit as the Filter & PreFilter plugin.
registeredPlugins := []tf.RegisterPluginFunc{
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"),
tf.RegisterPluginAsExtensions("test-plugin", newTestPlugin, "PreFilter"),
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
}
var extenders []framework.Extender
if tt.extender != nil {
extenders = append(extenders, tt.extender)
}
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
f, err := tf.NewFramework(ctx, registeredPlugins, "",
frameworkruntime.WithClientSet(cs),
frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
frameworkruntime.WithInformerFactory(informerFactory),
frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
frameworkruntime.WithExtenders(extenders),
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.pods, tt.nodes)),
frameworkruntime.WithLogger(logger),
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
)
if err != nil {
t.Fatal(err)
}
p, err := New(ctx, getDefaultDefaultPreemptionArgs(), f, feature.Features{})
if err != nil {
t.Fatal(err)
}
state := framework.NewCycleState()
// Ensure <state> is populated.
if _, status, _ := f.RunPreFilterPlugins(ctx, state, tt.pod); !status.IsSuccess() {
t.Errorf("Unexpected PreFilter Status: %v", status)
}
gotResult, gotStatus := p.PostFilter(ctx, state, tt.pod, tt.filteredNodesStatuses)
// As we cannot compare two errors directly due to miss the equal method for how to compare two errors, so just need to compare the reasons.
if gotStatus.Code() == fwk.Error {
if diff := cmp.Diff(tt.wantStatus.Reasons(), gotStatus.Reasons()); diff != "" {
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
for i := range tt.pods {
if err := podInformer.GetStore().Add(tt.pods[i]); err != nil {
t.Fatal(err)
}
}
} else {
if diff := cmp.Diff(tt.wantStatus, gotStatus); diff != "" {
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
pdbInformer := informerFactory.Policy().V1().PodDisruptionBudgets().Informer()
for i := range tt.pdbs {
if err := pdbInformer.GetStore().Add(tt.pdbs[i]); err != nil {
t.Fatal(err)
}
}
}
if diff := cmp.Diff(tt.wantResult, gotResult); diff != "" {
t.Errorf("Unexpected postFilterResult (-want, +got):\n%s", diff)
}
})
// Register NodeResourceFit as the Filter & PreFilter plugin.
registeredPlugins := []tf.RegisterPluginFunc{
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"),
tf.RegisterPluginAsExtensions("test-plugin", newTestPlugin, "PreFilter"),
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
}
var extenders []framework.Extender
if tt.extender != nil {
extenders = append(extenders, tt.extender)
}
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var apiDispatcher *apidispatcher.APIDispatcher
if asyncAPICallsEnabled {
apiDispatcher = apidispatcher.New(cs, 16, apicalls.Relevances)
apiDispatcher.Run(logger)
defer apiDispatcher.Close()
}
f, err := tf.NewFramework(ctx, registeredPlugins, "",
frameworkruntime.WithClientSet(cs),
frameworkruntime.WithAPIDispatcher(apiDispatcher),
frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
frameworkruntime.WithInformerFactory(informerFactory),
frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
frameworkruntime.WithExtenders(extenders),
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.pods, tt.nodes)),
frameworkruntime.WithLogger(logger),
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
)
if err != nil {
t.Fatal(err)
}
if asyncAPICallsEnabled {
cache := internalcache.New(ctx, 100*time.Millisecond, apiDispatcher)
f.SetAPICacher(apicache.New(nil, cache))
}
p, err := New(ctx, getDefaultDefaultPreemptionArgs(), f, feature.Features{})
if err != nil {
t.Fatal(err)
}
state := framework.NewCycleState()
// Ensure <state> is populated.
if _, status, _ := f.RunPreFilterPlugins(ctx, state, tt.pod); !status.IsSuccess() {
t.Errorf("Unexpected PreFilter Status: %v", status)
}
gotResult, gotStatus := p.PostFilter(ctx, state, tt.pod, tt.filteredNodesStatuses)
// As we cannot compare two errors directly due to miss the equal method for how to compare two errors, so just need to compare the reasons.
if gotStatus.Code() == fwk.Error {
if diff := cmp.Diff(tt.wantStatus.Reasons(), gotStatus.Reasons()); diff != "" {
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
}
} else {
if diff := cmp.Diff(tt.wantStatus, gotStatus); diff != "" {
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
}
}
if diff := cmp.Diff(tt.wantResult, gotResult); diff != "" {
t.Errorf("Unexpected postFilterResult (-want, +got):\n%s", diff)
}
})
}
}
}
@@ -2075,228 +2097,241 @@ func TestPreempt(t *testing.T) {
labelKeys := []string{"hostname", "zone", "region"}
for _, asyncPreemptionEnabled := range []bool{true, false} {
for _, test := range tests {
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", test.name, asyncPreemptionEnabled), func(t *testing.T) {
client := clientsetfake.NewClientset()
informerFactory := informers.NewSharedInformerFactory(client, 0)
podInformer := informerFactory.Core().V1().Pods().Informer()
testPod := test.pod.DeepCopy()
testPods := make([]*v1.Pod, len(test.pods))
for i := range test.pods {
testPods[i] = test.pods[i].DeepCopy()
}
if err := podInformer.GetStore().Add(testPod); err != nil {
t.Fatalf("Failed to add test pod %s: %v", testPod.Name, err)
}
for i := range testPods {
if err := podInformer.GetStore().Add(testPods[i]); err != nil {
t.Fatalf("Failed to add test pod %s: %v", testPods[i], err)
}
}
// Need to protect deletedPodNames and patchedPodNames to prevent DATA RACE panic.
var mu sync.RWMutex
deletedPodNames := sets.New[string]()
patchedPodNames := sets.New[string]()
patchedPods := []*v1.Pod{}
client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
patchAction := action.(clienttesting.PatchAction)
podName := patchAction.GetName()
namespace := patchAction.GetNamespace()
patch := patchAction.GetPatch()
pod, err := informerFactory.Core().V1().Pods().Lister().Pods(namespace).Get(podName)
if err != nil {
t.Fatalf("Failed to get the original pod %s/%s before patching: %v\n", namespace, podName, err)
}
marshalledPod, err := json.Marshal(pod)
if err != nil {
t.Fatalf("Failed to marshal the original pod %s/%s: %v", namespace, podName, err)
}
updated, err := strategicpatch.StrategicMergePatch(marshalledPod, patch, v1.Pod{})
if err != nil {
t.Fatalf("Failed to apply strategic merge patch %q on pod %#v: %v", patch, marshalledPod, err)
}
updatedPod := &v1.Pod{}
if err := json.Unmarshal(updated, updatedPod); err != nil {
t.Fatalf("Failed to unmarshal updated pod %q: %v", updated, err)
}
patchedPods = append(patchedPods, updatedPod)
mu.Lock()
defer mu.Unlock()
patchedPodNames.Insert(podName)
return true, nil, nil
})
client.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
mu.Lock()
defer mu.Unlock()
deletedPodNames.Insert(action.(clienttesting.DeleteAction).GetName())
return true, nil, nil
})
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
waitingPods := frameworkruntime.NewWaitingPodsMap()
cache := internalcache.New(ctx, time.Duration(0))
for _, pod := range testPods {
if err := cache.AddPod(logger, pod.DeepCopy()); err != nil {
t.Fatalf("Failed to add pod %s: %v", pod.Name, err)
}
}
cachedNodeInfoMap := map[string]*framework.NodeInfo{}
nodes := make([]*v1.Node, len(test.nodeNames))
for i, name := range test.nodeNames {
node := st.MakeNode().Name(name).Capacity(veryLargeRes).Obj()
// Split node name by '/' to form labels in a format of
// {"hostname": node.Name[0], "zone": node.Name[1], "region": node.Name[2]}
node.ObjectMeta.Labels = make(map[string]string)
for i, label := range strings.Split(node.Name, "/") {
node.ObjectMeta.Labels[labelKeys[i]] = label
}
node.Name = node.ObjectMeta.Labels["hostname"]
t.Logf("node is added: %v. labels: %#v", node.Name, node.ObjectMeta.Labels)
cache.AddNode(logger, node)
nodes[i] = node
// Set nodeInfo to extenders to mock extenders' cache for preemption.
cachedNodeInfo := framework.NewNodeInfo()
cachedNodeInfo.SetNode(node)
cachedNodeInfoMap[node.Name] = cachedNodeInfo
}
var extenders []framework.Extender
for _, extender := range test.extenders {
// Set nodeInfoMap as extenders cached node information.
extender.CachedNodeNameToInfo = cachedNodeInfoMap
extenders = append(extenders, extender)
}
schedFramework, err := tf.NewFramework(
ctx,
[]tf.RegisterPluginFunc{
test.registerPlugin,
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
},
"",
frameworkruntime.WithClientSet(client),
frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
frameworkruntime.WithExtenders(extenders),
frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(testPods, nodes)),
frameworkruntime.WithInformerFactory(informerFactory),
frameworkruntime.WithWaitingPods(waitingPods),
frameworkruntime.WithLogger(logger),
frameworkruntime.WithPodActivator(&fakePodActivator{}),
)
if err != nil {
t.Fatal(err)
}
state := framework.NewCycleState()
// Some tests rely on PreFilter plugin to compute its CycleState.
if _, s, _ := schedFramework.RunPreFilterPlugins(ctx, state, testPod); !s.IsSuccess() {
t.Errorf("Unexpected preFilterStatus: %v", s)
}
// Call preempt and check the expected results.
features := feature.Features{
EnableAsyncPreemption: asyncPreemptionEnabled,
}
pl, err := New(ctx, getDefaultDefaultPreemptionArgs(), schedFramework, features)
if err != nil {
t.Fatal(err)
}
// so that these nodes are eligible for preemption, we set their status
// to Unschedulable.
nodeToStatusMap := framework.NewDefaultNodeToStatus()
for _, n := range nodes {
nodeToStatusMap.Set(n.Name, fwk.NewStatus(fwk.Unschedulable))
}
res, status := pl.Evaluator.Preempt(ctx, state, testPod, nodeToStatusMap)
if !status.IsSuccess() && !status.IsRejected() {
t.Errorf("unexpected error in preemption: %v", status.AsError())
}
if diff := cmp.Diff(test.want, res); diff != "" {
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
}
if asyncPreemptionEnabled {
// Wait for the pod to be deleted.
if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
mu.RLock()
defer mu.RUnlock()
return len(deletedPodNames) == len(test.expectedPods), nil
}); err != nil {
t.Errorf("expected %v pods to be deleted, got %v.", len(test.expectedPods), len(deletedPodNames))
}
} else {
mu.RLock()
// If async preemption is disabled, the pod should be deleted immediately.
if len(deletedPodNames) != len(test.expectedPods) {
t.Errorf("expected %v pods to be deleted, got %v.", len(test.expectedPods), len(deletedPodNames))
}
mu.RUnlock()
}
mu.RLock()
if diff := cmp.Diff(sets.List(patchedPodNames), sets.List(deletedPodNames)); diff != "" {
t.Errorf("unexpected difference in the set of patched and deleted pods: %s", diff)
}
// Make sure that the DisruptionTarget condition has been added to the pod status
for _, patchedPod := range patchedPods {
expectedPodCondition := &v1.PodCondition{
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: v1.PodReasonPreemptionByScheduler,
Message: fmt.Sprintf("%s: preempting to accommodate a higher priority pod", patchedPod.Spec.SchedulerName),
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, test := range tests {
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, Async API calls enabled: %v)", test.name, asyncPreemptionEnabled, asyncAPICallsEnabled), func(t *testing.T) {
client := clientsetfake.NewClientset()
informerFactory := informers.NewSharedInformerFactory(client, 0)
podInformer := informerFactory.Core().V1().Pods().Informer()
testPod := test.pod.DeepCopy()
testPods := make([]*v1.Pod, len(test.pods))
for i := range test.pods {
testPods[i] = test.pods[i].DeepCopy()
}
_, condition := apipod.GetPodCondition(&patchedPod.Status, v1.DisruptionTarget)
if diff := cmp.Diff(condition, expectedPodCondition, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" {
t.Fatalf("unexpected difference in the pod %q DisruptionTarget condition: %s", patchedPod.Name, diff)
if err := podInformer.GetStore().Add(testPod); err != nil {
t.Fatalf("Failed to add test pod %s: %v", testPod.Name, err)
}
}
for victimName := range deletedPodNames {
found := false
for _, expPod := range test.expectedPods {
if expPod == victimName {
found = true
break
for i := range testPods {
if err := podInformer.GetStore().Add(testPods[i]); err != nil {
t.Fatalf("Failed to add test pod %s: %v", testPods[i], err)
}
}
if !found {
t.Errorf("pod %v is not expected to be a victim.", victimName)
}
}
if res != nil && res.NominatingInfo != nil {
testPod.Status.NominatedNodeName = res.NominatedNodeName
}
// Manually set the deleted Pods' deletionTimestamp to non-nil.
for _, pod := range testPods {
if deletedPodNames.Has(pod.Name) {
now := metav1.Now()
pod.DeletionTimestamp = &now
deletedPodNames.Delete(pod.Name)
}
}
mu.RUnlock()
// Need to protect deletedPodNames and patchedPodNames to prevent DATA RACE panic.
var mu sync.RWMutex
deletedPodNames := sets.New[string]()
patchedPodNames := sets.New[string]()
patchedPods := []*v1.Pod{}
client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
patchAction := action.(clienttesting.PatchAction)
podName := patchAction.GetName()
namespace := patchAction.GetNamespace()
patch := patchAction.GetPatch()
pod, err := informerFactory.Core().V1().Pods().Lister().Pods(namespace).Get(podName)
if err != nil {
t.Fatalf("Failed to get the original pod %s/%s before patching: %v\n", namespace, podName, err)
}
marshalledPod, err := json.Marshal(pod)
if err != nil {
t.Fatalf("Failed to marshal the original pod %s/%s: %v", namespace, podName, err)
}
updated, err := strategicpatch.StrategicMergePatch(marshalledPod, patch, v1.Pod{})
if err != nil {
t.Fatalf("Failed to apply strategic merge patch %q on pod %#v: %v", patch, marshalledPod, err)
}
updatedPod := &v1.Pod{}
if err := json.Unmarshal(updated, updatedPod); err != nil {
t.Fatalf("Failed to unmarshal updated pod %q: %v", updated, err)
}
patchedPods = append(patchedPods, updatedPod)
mu.Lock()
defer mu.Unlock()
patchedPodNames.Insert(podName)
return true, nil, nil
})
client.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
mu.Lock()
defer mu.Unlock()
deletedPodNames.Insert(action.(clienttesting.DeleteAction).GetName())
return true, nil, nil
})
// Call preempt again and make sure it doesn't preempt any more pods.
res, status = pl.Evaluator.Preempt(ctx, state, testPod, framework.NewDefaultNodeToStatus())
if !status.IsSuccess() && !status.IsRejected() {
t.Errorf("unexpected error in preemption: %v", status.AsError())
}
if res != nil && res.NominatingInfo != nil && len(deletedPodNames) > 0 {
t.Errorf("didn't expect any more preemption. Node %v is selected for preemption.", res.NominatedNodeName)
}
})
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
waitingPods := frameworkruntime.NewWaitingPodsMap()
var apiDispatcher *apidispatcher.APIDispatcher
if asyncAPICallsEnabled {
apiDispatcher = apidispatcher.New(client, 16, apicalls.Relevances)
apiDispatcher.Run(logger)
defer apiDispatcher.Close()
}
cache := internalcache.New(ctx, time.Duration(0), apiDispatcher)
for _, pod := range testPods {
if err := cache.AddPod(logger, pod.DeepCopy()); err != nil {
t.Fatalf("Failed to add pod %s: %v", pod.Name, err)
}
}
cachedNodeInfoMap := map[string]*framework.NodeInfo{}
nodes := make([]*v1.Node, len(test.nodeNames))
for i, name := range test.nodeNames {
node := st.MakeNode().Name(name).Capacity(veryLargeRes).Obj()
// Split node name by '/' to form labels in a format of
// {"hostname": node.Name[0], "zone": node.Name[1], "region": node.Name[2]}
node.Labels = make(map[string]string)
for i, label := range strings.Split(node.Name, "/") {
node.Labels[labelKeys[i]] = label
}
node.Name = node.Labels["hostname"]
t.Logf("node is added: %v. labels: %#v", node.Name, node.Labels)
cache.AddNode(logger, node)
nodes[i] = node
// Set nodeInfo to extenders to mock extenders' cache for preemption.
cachedNodeInfo := framework.NewNodeInfo()
cachedNodeInfo.SetNode(node)
cachedNodeInfoMap[node.Name] = cachedNodeInfo
}
var extenders []framework.Extender
for _, extender := range test.extenders {
// Set nodeInfoMap as extenders cached node information.
extender.CachedNodeNameToInfo = cachedNodeInfoMap
extenders = append(extenders, extender)
}
schedFramework, err := tf.NewFramework(
ctx,
[]tf.RegisterPluginFunc{
test.registerPlugin,
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
},
"",
frameworkruntime.WithClientSet(client),
frameworkruntime.WithAPIDispatcher(apiDispatcher),
frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
frameworkruntime.WithExtenders(extenders),
frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(testPods, nodes)),
frameworkruntime.WithInformerFactory(informerFactory),
frameworkruntime.WithWaitingPods(waitingPods),
frameworkruntime.WithLogger(logger),
frameworkruntime.WithPodActivator(&fakePodActivator{}),
)
if err != nil {
t.Fatal(err)
}
if asyncAPICallsEnabled {
schedFramework.SetAPICacher(apicache.New(nil, cache))
}
state := framework.NewCycleState()
// Some tests rely on PreFilter plugin to compute its CycleState.
if _, s, _ := schedFramework.RunPreFilterPlugins(ctx, state, testPod); !s.IsSuccess() {
t.Errorf("Unexpected preFilterStatus: %v", s)
}
// Call preempt and check the expected results.
features := feature.Features{
EnableAsyncPreemption: asyncPreemptionEnabled,
}
pl, err := New(ctx, getDefaultDefaultPreemptionArgs(), schedFramework, features)
if err != nil {
t.Fatal(err)
}
// so that these nodes are eligible for preemption, we set their status
// to Unschedulable.
nodeToStatusMap := framework.NewDefaultNodeToStatus()
for _, n := range nodes {
nodeToStatusMap.Set(n.Name, fwk.NewStatus(fwk.Unschedulable))
}
res, status := pl.Evaluator.Preempt(ctx, state, testPod, nodeToStatusMap)
if !status.IsSuccess() && !status.IsRejected() {
t.Errorf("unexpected error in preemption: %v", status.AsError())
}
if diff := cmp.Diff(test.want, res); diff != "" {
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
}
if asyncPreemptionEnabled {
// Wait for the pod to be deleted.
if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
mu.RLock()
defer mu.RUnlock()
return len(deletedPodNames) == len(test.expectedPods), nil
}); err != nil {
t.Errorf("expected %v pods to be deleted, got %v.", len(test.expectedPods), len(deletedPodNames))
}
} else {
mu.RLock()
// If async preemption is disabled, the pod should be deleted immediately.
if len(deletedPodNames) != len(test.expectedPods) {
t.Errorf("expected %v pods to be deleted, got %v.", len(test.expectedPods), len(deletedPodNames))
}
mu.RUnlock()
}
mu.RLock()
if diff := cmp.Diff(sets.List(patchedPodNames), sets.List(deletedPodNames)); diff != "" {
t.Errorf("unexpected difference in the set of patched and deleted pods: %s", diff)
}
// Make sure that the DisruptionTarget condition has been added to the pod status
for _, patchedPod := range patchedPods {
expectedPodCondition := &v1.PodCondition{
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: v1.PodReasonPreemptionByScheduler,
Message: fmt.Sprintf("%s: preempting to accommodate a higher priority pod", patchedPod.Spec.SchedulerName),
}
_, condition := apipod.GetPodCondition(&patchedPod.Status, v1.DisruptionTarget)
if diff := cmp.Diff(condition, expectedPodCondition, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" {
t.Fatalf("unexpected difference in the pod %q DisruptionTarget condition: %s", patchedPod.Name, diff)
}
}
for victimName := range deletedPodNames {
found := false
for _, expPod := range test.expectedPods {
if expPod == victimName {
found = true
break
}
}
if !found {
t.Errorf("pod %v is not expected to be a victim.", victimName)
}
}
if res != nil && res.NominatingInfo != nil {
testPod.Status.NominatedNodeName = res.NominatedNodeName
}
// Manually set the deleted Pods' deletionTimestamp to non-nil.
for _, pod := range testPods {
if deletedPodNames.Has(pod.Name) {
now := metav1.Now()
pod.DeletionTimestamp = &now
deletedPodNames.Delete(pod.Name)
}
}
mu.RUnlock()
// Call preempt again and make sure it doesn't preempt any more pods.
res, status = pl.Evaluator.Preempt(ctx, state, testPod, framework.NewDefaultNodeToStatus())
if !status.IsSuccess() && !status.IsRejected() {
t.Errorf("unexpected error in preemption: %v", status.AsError())
}
if res != nil && res.NominatingInfo != nil && len(deletedPodNames) > 0 {
t.Errorf("didn't expect any more preemption. Node %v is selected for preemption.", res.NominatedNodeName)
}
})
}
}
}
}

View File

@@ -33,6 +33,7 @@ import (
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
corelisters "k8s.io/client-go/listers/core/v1"
policylisters "k8s.io/client-go/listers/policy/v1"
corev1helpers "k8s.io/component-helpers/scheduling/corev1"
@@ -182,7 +183,7 @@ func NewEvaluator(pluginName string, fh framework.Handle, i Interface, enableAsy
newStatus := victim.Status.DeepCopy()
updated := apipod.UpdatePodCondition(newStatus, condition)
if updated {
if err := util.PatchPodStatus(ctx, ev.Handler.ClientSet(), victim, newStatus); err != nil {
if err := util.PatchPodStatus(ctx, ev.Handler.ClientSet(), victim.Name, victim.Namespace, &victim.Status, newStatus); err != nil {
logger.Error(err, "Could not add DisruptionTarget condition due to preemption", "pod", klog.KObj(victim), "preemptor", klog.KObj(preemptor))
return err
}
@@ -450,7 +451,7 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.
// nomination updates these pods and moves them to the active queue. It
// lets scheduler find another place for them.
nominatedPods := getLowerPriorityNominatedPods(logger, fh, pod, c.Name())
if err := util.ClearNominatedNodeName(ctx, cs, nominatedPods...); err != nil {
if err := clearNominatedNodeName(ctx, cs, ev.Handler.APICacher(), nominatedPods...); err != nil {
utilruntime.HandleErrorWithContext(ctx, err, "Cannot clear 'NominatedNodeName' field")
// We do not return as this error is not critical.
}
@@ -458,6 +459,31 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.
return nil
}
// clearNominatedNodeName internally submit a patch request to API server
// to set each pods[*].Status.NominatedNodeName> to "".
func clearNominatedNodeName(ctx context.Context, cs clientset.Interface, apiCacher framework.APICacher, pods ...*v1.Pod) utilerrors.Aggregate {
var errs []error
for _, p := range pods {
if apiCacher != nil {
// When API cacher is available, use it to clear the NominatedNodeName.
_, err := apiCacher.PatchPodStatus(p, nil, &framework.NominatingInfo{NominatedNodeName: "", NominatingMode: framework.ModeOverride})
if err != nil {
errs = append(errs, err)
}
} else {
if len(p.Status.NominatedNodeName) == 0 {
continue
}
podStatusCopy := p.Status.DeepCopy()
podStatusCopy.NominatedNodeName = ""
if err := util.PatchPodStatus(ctx, cs, p.Name, p.Namespace, &p.Status, podStatusCopy); err != nil {
errs = append(errs, err)
}
}
}
return utilerrors.NewAggregate(errs)
}
// prepareCandidateAsync triggers a goroutine for some preparation work:
// - Evict the victim pods
// - Reject the victim pods if they are in waitingPod map
@@ -504,7 +530,7 @@ func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName
// nomination updates these pods and moves them to the active queue. It
// lets scheduler find another place for them.
nominatedPods := getLowerPriorityNominatedPods(logger, ev.Handler, pod, c.Name())
if err := util.ClearNominatedNodeName(ctx, ev.Handler.ClientSet(), nominatedPods...); err != nil {
if err := clearNominatedNodeName(ctx, ev.Handler.ClientSet(), ev.Handler.APICacher(), nominatedPods...); err != nil {
utilruntime.HandleErrorWithContext(ctx, err, "Cannot clear 'NominatedNodeName' field from lower priority pods on the same target node", "node", c.Name())
result = metrics.GoroutineResultError
// We do not return as this error is not critical.

View File

@@ -30,6 +30,7 @@ import (
v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
@@ -43,9 +44,12 @@ import (
"k8s.io/klog/v2/ktesting"
extenderv1 "k8s.io/kube-scheduler/extender/v1"
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
@@ -442,6 +446,9 @@ func TestPrepareCandidate(t *testing.T) {
SchedulerName(defaultSchedulerName).Priority(highPriority).
Containers([]v1.Container{st.MakeContainer().Name("container1").Obj()}).
Obj()
errDeletePodFailed = errors.New("delete pod failed")
errPatchStatusFailed = errors.New("patch pod status failed")
)
tests := []struct {
@@ -549,7 +556,7 @@ func TestPrepareCandidate(t *testing.T) {
testPods: []*v1.Pod{},
expectedDeletionError: true,
nodeNames: []string{node1Name},
expectedStatus: fwk.AsStatus(errors.New("delete pod failed")),
expectedStatus: fwk.AsStatus(errDeletePodFailed),
expectedPreemptingMap: sets.New(types.UID("preemptor")),
expectedActivatedPods: map[string]*v1.Pod{preemptor.Name: preemptor},
},
@@ -586,7 +593,7 @@ func TestPrepareCandidate(t *testing.T) {
testPods: []*v1.Pod{},
expectedPatchError: true,
nodeNames: []string{node1Name},
expectedStatus: fwk.AsStatus(errors.New("patch pod status failed")),
expectedStatus: fwk.AsStatus(errPatchStatusFailed),
expectedPreemptingMap: sets.New(types.UID("preemptor")),
expectedActivatedPods: map[string]*v1.Pod{preemptor.Name: preemptor},
},
@@ -614,192 +621,203 @@ func TestPrepareCandidate(t *testing.T) {
// which results in the second victim not being deleted.
"",
},
expectedStatus: fwk.AsStatus(errors.New("patch pod status failed")),
expectedStatus: fwk.AsStatus(errPatchStatusFailed),
expectedPreemptingMap: sets.New(types.UID("preemptor")),
expectedActivatedPods: map[string]*v1.Pod{preemptor.Name: preemptor},
},
}
for _, asyncPreemptionEnabled := range []bool{true, false} {
for _, tt := range tests {
t.Run(fmt.Sprintf("%v (Async preemption enabled: %v)", tt.name, asyncPreemptionEnabled), func(t *testing.T) {
metrics.Register()
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, tt := range tests {
t.Run(fmt.Sprintf("%v (Async preemption enabled: %v, Async API calls enabled: %v)", tt.name, asyncPreemptionEnabled, asyncAPICallsEnabled), func(t *testing.T) {
metrics.Register()
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
nodes := make([]*v1.Node, len(tt.nodeNames))
for i, nodeName := range tt.nodeNames {
nodes[i] = st.MakeNode().Name(nodeName).Capacity(veryLargeRes).Obj()
}
registeredPlugins := append([]tf.RegisterPluginFunc{
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New)},
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
)
var objs []runtime.Object
for _, pod := range tt.testPods {
objs = append(objs, pod)
}
mu := &sync.RWMutex{}
deletedPods := sets.New[string]()
deletionFailure := false // whether any request to delete pod failed
patchFailure := false // whether any request to patch pod status failed
cs := clientsetfake.NewClientset(objs...)
cs.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
mu.Lock()
defer mu.Unlock()
name := action.(clienttesting.DeleteAction).GetName()
if name == "fail-victim" {
deletionFailure = true
return true, nil, fmt.Errorf("delete pod failed")
nodes := make([]*v1.Node, len(tt.nodeNames))
for i, nodeName := range tt.nodeNames {
nodes[i] = st.MakeNode().Name(nodeName).Capacity(veryLargeRes).Obj()
}
registeredPlugins := append([]tf.RegisterPluginFunc{
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New)},
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
)
var objs []runtime.Object
for _, pod := range tt.testPods {
objs = append(objs, pod)
}
deletedPods.Insert(name)
return true, nil, nil
})
mu := &sync.RWMutex{}
deletedPods := sets.New[string]()
deletionFailure := false // whether any request to delete pod failed
patchFailure := false // whether any request to patch pod status failed
cs.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
mu.Lock()
defer mu.Unlock()
if action.(clienttesting.PatchAction).GetName() == "fail-victim" {
patchFailure = true
return true, nil, fmt.Errorf("patch pod status failed")
}
return true, nil, nil
})
informerFactory := informers.NewSharedInformerFactory(cs, 0)
eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: cs.EventsV1()})
fakeActivator := &fakePodActivator{activatedPods: make(map[string]*v1.Pod), mu: mu}
// Note: NominatedPodsForNode is called at the beginning of the goroutine in any case.
// fakePodNominator can delay the response of NominatedPodsForNode until the channel is closed,
// which allows us to test the preempting map before the goroutine does nothing yet.
requestStopper := make(chan struct{})
nominator := &fakePodNominator{
SchedulingQueue: internalqueue.NewSchedulingQueue(nil, informerFactory),
requestStopper: requestStopper,
}
fwk, err := tf.NewFramework(
ctx,
registeredPlugins, "",
frameworkruntime.WithClientSet(cs),
frameworkruntime.WithLogger(logger),
frameworkruntime.WithInformerFactory(informerFactory),
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.testPods, nodes)),
frameworkruntime.WithPodNominator(nominator),
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, "test-scheduler")),
frameworkruntime.WithPodActivator(fakeActivator),
)
if err != nil {
t.Fatal(err)
}
informerFactory.Start(ctx.Done())
informerFactory.WaitForCacheSync(ctx.Done())
fakePreemptionScorePostFilterPlugin := &FakePreemptionScorePostFilterPlugin{}
pe := NewEvaluator("FakePreemptionScorePostFilter", fwk, fakePreemptionScorePostFilterPlugin, asyncPreemptionEnabled)
if asyncPreemptionEnabled {
pe.prepareCandidateAsync(tt.candidate, tt.preemptor, "test-plugin")
pe.mu.Lock()
// The preempting map should be registered synchronously
// so we don't need wait.Poll.
if !tt.expectedPreemptingMap.Equal(pe.preempting) {
t.Errorf("expected preempting map %v, got %v", tt.expectedPreemptingMap, pe.preempting)
close(requestStopper)
pe.mu.Unlock()
return
}
pe.mu.Unlock()
// make the requests complete
close(requestStopper)
} else {
close(requestStopper) // no need to stop requests
status := pe.prepareCandidate(ctx, tt.candidate, tt.preemptor, "test-plugin")
if tt.expectedStatus == nil {
if status != nil {
t.Errorf("expect nil status, but got %v", status)
cs := clientsetfake.NewClientset(objs...)
cs.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
mu.Lock()
defer mu.Unlock()
name := action.(clienttesting.DeleteAction).GetName()
if name == "fail-victim" {
deletionFailure = true
return true, nil, errDeletePodFailed
}
} else {
if status == nil {
t.Errorf("expect status %v, but got nil", tt.expectedStatus)
} else if status.Code() != tt.expectedStatus.Code() {
t.Errorf("expect status code %v, but got %v", tt.expectedStatus.Code(), status.Code())
} else if status.Message() != tt.expectedStatus.Message() {
t.Errorf("expect status message %v, but got %v", tt.expectedStatus.Message(), status.Message())
deletedPods.Insert(name)
return true, nil, nil
})
cs.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
mu.Lock()
defer mu.Unlock()
if action.(clienttesting.PatchAction).GetName() == "fail-victim" {
patchFailure = true
return true, nil, errPatchStatusFailed
}
return true, nil, nil
})
informerFactory := informers.NewSharedInformerFactory(cs, 0)
eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: cs.EventsV1()})
fakeActivator := &fakePodActivator{activatedPods: make(map[string]*v1.Pod), mu: mu}
// Note: NominatedPodsForNode is called at the beginning of the goroutine in any case.
// fakePodNominator can delay the response of NominatedPodsForNode until the channel is closed,
// which allows us to test the preempting map before the goroutine does nothing yet.
requestStopper := make(chan struct{})
nominator := &fakePodNominator{
SchedulingQueue: internalqueue.NewSchedulingQueue(nil, informerFactory),
requestStopper: requestStopper,
}
}
var lastErrMsg string
if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
mu.RLock()
defer mu.RUnlock()
pe.mu.Lock()
defer pe.mu.Unlock()
if len(pe.preempting) != 0 {
// The preempting map should be empty after the goroutine in all test cases.
lastErrMsg = fmt.Sprintf("expected no preempting pods, got %v", pe.preempting)
return false, nil
var apiDispatcher *apidispatcher.APIDispatcher
if asyncAPICallsEnabled {
apiDispatcher = apidispatcher.New(cs, 16, apicalls.Relevances)
apiDispatcher.Run(logger)
defer apiDispatcher.Close()
}
if tt.expectedDeletionError != deletionFailure {
lastErrMsg = fmt.Sprintf("expected deletion error %v, got %v", tt.expectedDeletionError, deletionFailure)
return false, nil
fwk, err := tf.NewFramework(
ctx,
registeredPlugins, "",
frameworkruntime.WithClientSet(cs),
frameworkruntime.WithAPIDispatcher(apiDispatcher),
frameworkruntime.WithLogger(logger),
frameworkruntime.WithInformerFactory(informerFactory),
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.testPods, nodes)),
frameworkruntime.WithPodNominator(nominator),
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, "test-scheduler")),
frameworkruntime.WithPodActivator(fakeActivator),
)
if err != nil {
t.Fatal(err)
}
if tt.expectedPatchError != patchFailure {
lastErrMsg = fmt.Sprintf("expected patch error %v, got %v", tt.expectedPatchError, patchFailure)
return false, nil
informerFactory.Start(ctx.Done())
informerFactory.WaitForCacheSync(ctx.Done())
fakePreemptionScorePostFilterPlugin := &FakePreemptionScorePostFilterPlugin{}
if asyncAPICallsEnabled {
cache := internalcache.New(ctx, 100*time.Millisecond, apiDispatcher)
fwk.SetAPICacher(apicache.New(nil, cache))
}
pe := NewEvaluator("FakePreemptionScorePostFilter", fwk, fakePreemptionScorePostFilterPlugin, asyncPreemptionEnabled)
if asyncPreemptionEnabled {
if diff := cmp.Diff(tt.expectedActivatedPods, fakeActivator.activatedPods); tt.expectedActivatedPods != nil && diff != "" {
lastErrMsg = fmt.Sprintf("Unexpected activated pods (-want,+got):\n%s", diff)
return false, nil
pe.prepareCandidateAsync(tt.candidate, tt.preemptor, "test-plugin")
pe.mu.Lock()
// The preempting map should be registered synchronously
// so we don't need wait.Poll.
if !tt.expectedPreemptingMap.Equal(pe.preempting) {
t.Errorf("expected preempting map %v, got %v", tt.expectedPreemptingMap, pe.preempting)
close(requestStopper)
pe.mu.Unlock()
return
}
if tt.expectedActivatedPods == nil && len(fakeActivator.activatedPods) != 0 {
lastErrMsg = fmt.Sprintf("expected no activated pods, got %v", fakeActivator.activatedPods)
return false, nil
pe.mu.Unlock()
// make the requests complete
close(requestStopper)
} else {
close(requestStopper) // no need to stop requests
status := pe.prepareCandidate(ctx, tt.candidate, tt.preemptor, "test-plugin")
if tt.expectedStatus == nil {
if status != nil {
t.Errorf("expect nil status, but got %v", status)
}
} else {
if !cmp.Equal(status, tt.expectedStatus) {
t.Errorf("expect status %v, but got %v", tt.expectedStatus, status)
}
}
}
if deletedPods.Len() > 1 {
// For now, we only expect at most one pod to be deleted in all test cases.
// If we need to test multiple pods deletion, we need to update the test table definition.
return false, fmt.Errorf("expected at most one pod to be deleted, got %v", deletedPods.UnsortedList())
}
var lastErrMsg string
if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
mu.RLock()
defer mu.RUnlock()
if len(tt.expectedDeletedPod) == 0 {
if deletedPods.Len() != 0 {
// When tt.expectedDeletedPod is empty, we expect no pod to be deleted.
return false, fmt.Errorf("expected no pod to be deleted, got %v", deletedPods.UnsortedList())
pe.mu.Lock()
defer pe.mu.Unlock()
if len(pe.preempting) != 0 {
// The preempting map should be empty after the goroutine in all test cases.
lastErrMsg = fmt.Sprintf("expected no preempting pods, got %v", pe.preempting)
return false, nil
}
// nothing further to check.
if tt.expectedDeletionError != deletionFailure {
lastErrMsg = fmt.Sprintf("expected deletion error %v, got %v", tt.expectedDeletionError, deletionFailure)
return false, nil
}
if tt.expectedPatchError != patchFailure {
lastErrMsg = fmt.Sprintf("expected patch error %v, got %v", tt.expectedPatchError, patchFailure)
return false, nil
}
if asyncPreemptionEnabled {
if diff := cmp.Diff(tt.expectedActivatedPods, fakeActivator.activatedPods); tt.expectedActivatedPods != nil && diff != "" {
lastErrMsg = fmt.Sprintf("Unexpected activated pods (-want,+got):\n%s", diff)
return false, nil
}
if tt.expectedActivatedPods == nil && len(fakeActivator.activatedPods) != 0 {
lastErrMsg = fmt.Sprintf("expected no activated pods, got %v", fakeActivator.activatedPods)
return false, nil
}
}
if deletedPods.Len() > 1 {
// For now, we only expect at most one pod to be deleted in all test cases.
// If we need to test multiple pods deletion, we need to update the test table definition.
return false, fmt.Errorf("expected at most one pod to be deleted, got %v", deletedPods.UnsortedList())
}
if len(tt.expectedDeletedPod) == 0 {
if deletedPods.Len() != 0 {
// When tt.expectedDeletedPod is empty, we expect no pod to be deleted.
return false, fmt.Errorf("expected no pod to be deleted, got %v", deletedPods.UnsortedList())
}
// nothing further to check.
return true, nil
}
found := false
for _, podName := range tt.expectedDeletedPod {
if deletedPods.Has(podName) ||
// If podName is empty, we expect no pod to be deleted.
(deletedPods.Len() == 0 && podName == "") {
found = true
}
}
if !found {
lastErrMsg = fmt.Sprintf("expected pod %v to be deleted, but %v is deleted", strings.Join(tt.expectedDeletedPod, " or "), deletedPods.UnsortedList())
return false, nil
}
return true, nil
}); err != nil {
t.Fatal(lastErrMsg)
}
found := false
for _, podName := range tt.expectedDeletedPod {
if deletedPods.Has(podName) ||
// If podName is empty, we expect no pod to be deleted.
(deletedPods.Len() == 0 && podName == "") {
found = true
}
}
if !found {
lastErrMsg = fmt.Sprintf("expected pod %v to be deleted, but %v is deleted", strings.Join(tt.expectedDeletedPod, " or "), deletedPods.UnsortedList())
return false, nil
}
return true, nil
}); err != nil {
t.Fatal(lastErrMsg)
}
})
})
}
}
}
}
@@ -1032,10 +1050,15 @@ func TestCallExtenders(t *testing.T) {
objs = append(objs, preemptor)
cs := clientsetfake.NewClientset(objs...)
informerFactory := informers.NewSharedInformerFactory(cs, 0)
apiDispatcher := apidispatcher.New(cs, 16, apicalls.Relevances)
apiDispatcher.Run(logger)
defer apiDispatcher.Close()
fwk, err := tf.NewFramework(
ctx,
registeredPlugins, "",
frameworkruntime.WithClientSet(cs),
frameworkruntime.WithAPIDispatcher(apiDispatcher),
frameworkruntime.WithLogger(logger),
frameworkruntime.WithExtenders(tt.extenders),
frameworkruntime.WithInformerFactory(informerFactory),
@@ -1047,6 +1070,8 @@ func TestCallExtenders(t *testing.T) {
}
informerFactory.Start(ctx.Done())
informerFactory.WaitForCacheSync(ctx.Done())
cache := internalcache.New(ctx, 100*time.Millisecond, apiDispatcher)
fwk.SetAPICacher(apicache.New(nil, cache))
fakePreemptionScorePostFilterPlugin := &FakePreemptionScorePostFilterPlugin{}
pe := Evaluator{
@@ -1075,3 +1100,85 @@ func TestCallExtenders(t *testing.T) {
})
}
}
func TestRemoveNominatedNodeName(t *testing.T) {
tests := []struct {
name string
currentNominatedNodeName string
newNominatedNodeName string
expectPatchRequest bool
expectedPatchData string
}{
{
name: "Should make patch request to clear node name",
currentNominatedNodeName: "node1",
expectPatchRequest: true,
expectedPatchData: `{"status":{"nominatedNodeName":null}}`,
},
{
name: "Should not make patch request if nominated node is already cleared",
currentNominatedNodeName: "",
expectPatchRequest: false,
},
}
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
actualPatchRequests := 0
var actualPatchData string
cs := &clientsetfake.Clientset{}
patchCalled := make(chan struct{}, 1)
cs.AddReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
actualPatchRequests++
patch := action.(clienttesting.PatchAction)
actualPatchData = string(patch.GetPatch())
patchCalled <- struct{}{}
// For this test, we don't care about the result of the patched pod, just that we got the expected
// patch request, so just returning &v1.Pod{} here is OK because scheduler doesn't use the response.
return true, &v1.Pod{}, nil
})
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
Status: v1.PodStatus{NominatedNodeName: test.currentNominatedNodeName},
}
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var apiCacher framework.APICacher
if asyncAPICallsEnabled {
apiDispatcher := apidispatcher.New(cs, 16, apicalls.Relevances)
apiDispatcher.Run(logger)
defer apiDispatcher.Close()
informerFactory := informers.NewSharedInformerFactory(cs, 0)
queue := internalqueue.NewSchedulingQueue(nil, informerFactory, internalqueue.WithAPIDispatcher(apiDispatcher))
apiCacher = apicache.New(queue, nil)
}
if err := clearNominatedNodeName(ctx, cs, apiCacher, pod); err != nil {
t.Fatalf("Error calling removeNominatedNodeName: %v", err)
}
if test.expectPatchRequest {
select {
case <-patchCalled:
case <-time.After(time.Second):
t.Fatalf("Timed out while waiting for patch to be called")
}
if actualPatchData != test.expectedPatchData {
t.Fatalf("Patch data mismatch: Actual was %v, but expected %v", actualPatchData, test.expectedPatchData)
}
} else {
select {
case <-patchCalled:
t.Fatalf("Expected patch not to be called, actual patch data: %v", actualPatchData)
case <-time.After(time.Second):
}
}
})
}
}
}

View File

@@ -37,6 +37,7 @@ import (
"k8s.io/klog/v2"
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
"k8s.io/kubernetes/pkg/scheduler/metrics"
@@ -86,6 +87,8 @@ type frameworkImpl struct {
extenders []framework.Extender
framework.PodNominator
framework.PodActivator
apiDispatcher *apidispatcher.APIDispatcher
apiCacher framework.APICacher
parallelizer parallelize.Parallelizer
}
@@ -138,6 +141,7 @@ type frameworkOptions struct {
captureProfile CaptureProfile
parallelizer parallelize.Parallelizer
waitingPods *waitingPodsMap
apiDispatcher *apidispatcher.APIDispatcher
logger *klog.Logger
}
@@ -223,6 +227,13 @@ func WithParallelism(parallelism int) Option {
}
}
// WithAPIDispatcher sets API dispatcher for the scheduling frameworkImpl.
func WithAPIDispatcher(apiDispatcher *apidispatcher.APIDispatcher) Option {
return func(o *frameworkOptions) {
o.apiDispatcher = apiDispatcher
}
}
// CaptureProfile is a callback to capture a finalized profile.
type CaptureProfile func(config.KubeSchedulerProfile)
@@ -289,6 +300,7 @@ func NewFramework(ctx context.Context, r Registry, profile *config.KubeScheduler
extenders: options.extenders,
PodNominator: options.podNominator,
PodActivator: options.podActivator,
apiDispatcher: options.apiDispatcher,
parallelizer: options.parallelizer,
logger: logger,
}
@@ -441,6 +453,10 @@ func (f *frameworkImpl) SetPodActivator(a framework.PodActivator) {
f.PodActivator = a
}
func (f *frameworkImpl) SetAPICacher(c framework.APICacher) {
f.apiCacher = c
}
// Close closes each plugin, when they implement io.Closer interface.
func (f *frameworkImpl) Close() error {
var errs []error
@@ -1679,3 +1695,22 @@ func (f *frameworkImpl) PercentageOfNodesToScore() *int32 {
func (f *frameworkImpl) Parallelizer() parallelize.Parallelizer {
return f.parallelizer
}
// APIDispatcher returns an apiDispatcher that can be used to dispatch API calls.
// This requires SchedulerAsyncAPICalls feature gate to be enabled.
func (f *frameworkImpl) APIDispatcher() fwk.APIDispatcher {
if f.apiDispatcher == nil {
return nil
}
return f.apiDispatcher
}
// APICacher returns an apiCacher that can be used to dispatch API calls through scheduler's cache
// instead of directly using APIDispatcher().
// This requires SchedulerAsyncAPICalls feature gate to be enabled.
func (f *frameworkImpl) APICacher() framework.APICacher {
if f.apiCacher == nil {
return nil
}
return f.apiCacher
}

View File

@@ -1090,7 +1090,7 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo
msg := truncateMessage(errMsg)
fwk.EventRecorder().Eventf(pod, nil, v1.EventTypeWarning, "FailedScheduling", "Scheduling", msg)
if err := updatePod(ctx, sched.client, pod, &v1.PodCondition{
if err := updatePod(ctx, sched.client, fwk.APICacher(), pod, &v1.PodCondition{
Type: v1.PodScheduled,
ObservedGeneration: podutil.CalculatePodConditionObservedGeneration(&pod.Status, pod.Generation, v1.PodScheduled),
Status: v1.ConditionFalse,
@@ -1111,7 +1111,12 @@ func truncateMessage(message string) string {
return message[:max-len(suffix)] + suffix
}
func updatePod(ctx context.Context, client clientset.Interface, pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) error {
func updatePod(ctx context.Context, client clientset.Interface, apiCacher framework.APICacher, pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) error {
if apiCacher != nil {
// When API cacher is available, use it to patch the status.
_, err := apiCacher.PatchPodStatus(pod, condition, nominatingInfo)
return err
}
logger := klog.FromContext(ctx)
logger.V(3).Info("Updating pod condition", "pod", klog.KObj(pod), "conditionType", condition.Type, "conditionStatus", condition.Status, "conditionReason", condition.Reason)
podStatusCopy := pod.Status.DeepCopy()
@@ -1124,5 +1129,5 @@ func updatePod(ctx context.Context, client clientset.Interface, pod *v1.Pod, con
if nnnNeedsUpdate {
podStatusCopy.NominatedNodeName = nominatingInfo.NominatedNodeName
}
return util.PatchPodStatus(ctx, client, pod, podStatusCopy)
return util.PatchPodStatus(ctx, client, pod.Name, pod.Namespace, &pod.Status, podStatusCopy)
}

File diff suppressed because it is too large Load Diff

View File

@@ -40,10 +40,13 @@ import (
"k8s.io/kubernetes/pkg/features"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/apis/config/scheme"
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
cachedebugger "k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger"
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
frameworkplugins "k8s.io/kubernetes/pkg/scheduler/framework/plugins"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources"
@@ -93,6 +96,12 @@ type Scheduler struct {
// SchedulingQueue holds pods to be scheduled
SchedulingQueue internalqueue.SchedulingQueue
// If possible, indirect operation on APIDispatcher, e.g. through SchedulingQueue, is preferred.
// Is nil iff SchedulerAsyncAPICalls feature gate is disabled.
// Adding a call to APIDispatcher should not be done directly by in-tree usages.
// framework.APICache should be used instead.
APIDispatcher *apidispatcher.APIDispatcher
// Profiles are the scheduling profiles.
Profiles profile.Map
@@ -331,6 +340,10 @@ func New(ctx context.Context,
}
draManager = dynamicresources.NewDRAManager(ctx, resourceClaimCache, resourceSliceTracker, informerFactory)
}
var apiDispatcher *apidispatcher.APIDispatcher
if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerAsyncAPICalls) {
apiDispatcher = apidispatcher.New(client, int(options.parallelism), apicalls.Relevances)
}
profiles, err := profile.NewMap(ctx, options.profiles, registry, recorderFactory,
frameworkruntime.WithComponentConfigVersion(options.componentConfigVersion),
@@ -344,6 +357,7 @@ func New(ctx context.Context,
frameworkruntime.WithExtenders(extenders),
frameworkruntime.WithMetricsRecorder(metricsRecorder),
frameworkruntime.WithWaitingPods(waitingPods),
frameworkruntime.WithAPIDispatcher(apiDispatcher),
)
if err != nil {
return nil, fmt.Errorf("initializing profiles: %v", err)
@@ -385,15 +399,22 @@ func New(ctx context.Context,
internalqueue.WithQueueingHintMapPerProfile(queueingHintsPerProfile),
internalqueue.WithPluginMetricsSamplePercent(pluginMetricsSamplePercent),
internalqueue.WithMetricsRecorder(*metricsRecorder),
internalqueue.WithAPIDispatcher(apiDispatcher),
)
schedulerCache := internalcache.New(ctx, durationToExpireAssumedPod, apiDispatcher)
var apiCache framework.APICacher
if apiDispatcher != nil {
apiCache = apicache.New(podQueue, schedulerCache)
}
for _, fwk := range profiles {
fwk.SetPodNominator(podQueue)
fwk.SetPodActivator(podQueue)
fwk.SetAPICacher(apiCache)
}
schedulerCache := internalcache.New(ctx, durationToExpireAssumedPod)
// Setup cache debugger.
debugger := cachedebugger.New(nodeLister, podLister, schedulerCache, podQueue)
debugger.ListenForSignal(ctx)
@@ -408,6 +429,7 @@ func New(ctx context.Context,
SchedulingQueue: podQueue,
Profiles: profiles,
logger: logger,
APIDispatcher: apiDispatcher,
}
sched.NextPod = podQueue.Pop
sched.applyDefaultHandlers()
@@ -499,6 +521,10 @@ func (sched *Scheduler) Run(ctx context.Context) {
logger := klog.FromContext(ctx)
sched.SchedulingQueue.Run(logger)
if sched.APIDispatcher != nil {
go sched.APIDispatcher.Run(logger)
}
// We need to start scheduleOne loop in a dedicated goroutine,
// because scheduleOne function hangs on getting the next item
// from the SchedulingQueue.
@@ -508,6 +534,9 @@ func (sched *Scheduler) Run(ctx context.Context) {
go wait.UntilWithContext(ctx, sched.ScheduleOne, 0)
<-ctx.Done()
if sched.APIDispatcher != nil {
sched.APIDispatcher.Close()
}
sched.SchedulingQueue.Close()
// If the plugins satisfy the io.Closer interface, they are closed.

View File

@@ -53,9 +53,12 @@ import (
"k8s.io/kubernetes/pkg/features"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/apis/config/testing/defaults"
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
@@ -282,97 +285,125 @@ func TestFailureHandler(t *testing.T) {
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, tt := range tests {
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", tt.name, asyncAPICallsEnabled), func(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
client := fake.NewClientset(&v1.PodList{Items: []v1.Pod{*testPod}})
informerFactory := informers.NewSharedInformerFactory(client, 0)
podInformer := informerFactory.Core().V1().Pods()
// Need to add/update/delete testPod to the store.
if err := podInformer.Informer().GetStore().Add(testPod); err != nil {
t.Fatal(err)
}
var apiDispatcher *apidispatcher.APIDispatcher
if asyncAPICallsEnabled {
apiDispatcher = apidispatcher.New(client, 16, apicalls.Relevances)
apiDispatcher.Run(logger)
defer apiDispatcher.Close()
}
recorder := metrics.NewMetricsAsyncRecorder(3, 20*time.Microsecond, ctx.Done())
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())), internalqueue.WithMetricsRecorder(*recorder), internalqueue.WithAPIDispatcher(apiDispatcher))
schedulerCache := internalcache.New(ctx, 30*time.Second, apiDispatcher)
queue.Add(logger, testPod)
if _, err := queue.Pop(logger); err != nil {
t.Fatalf("Pop failed: %v", err)
}
if tt.podUpdatedDuringScheduling {
if err := podInformer.Informer().GetStore().Update(testPodUpdated); err != nil {
t.Fatal(err)
}
queue.Update(logger, testPod, testPodUpdated)
}
if tt.podDeletedDuringScheduling {
if err := podInformer.Informer().GetStore().Delete(testPod); err != nil {
t.Fatal(err)
}
queue.Delete(testPod)
}
s, schedFramework, err := initScheduler(ctx, schedulerCache, queue, apiDispatcher, client, informerFactory)
if err != nil {
t.Fatal(err)
}
testPodInfo := &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(t, testPod)}
s.FailureHandler(ctx, schedFramework, testPodInfo, fwk.NewStatus(fwk.Unschedulable), nil, time.Now())
var got *v1.Pod
if tt.podUpdatedDuringScheduling {
pInfo, ok := queue.GetPod(testPod.Name, testPod.Namespace)
if !ok {
t.Fatalf("Failed to get pod %s/%s from queue", testPod.Namespace, testPod.Name)
}
got = pInfo.Pod
} else {
got = getPodFromPriorityQueue(queue, testPod)
}
if diff := cmp.Diff(tt.expect, got); diff != "" {
t.Errorf("Unexpected pod (-want, +got): %s", diff)
}
})
}
}
}
func TestFailureHandler_PodAlreadyBound(t *testing.T) {
for _, asyncAPICallsEnabled := range []bool{true, false} {
t.Run(fmt.Sprintf("Async API calls enabled: %v", asyncAPICallsEnabled), func(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
client := fake.NewClientset(&v1.PodList{Items: []v1.Pod{*testPod}})
nodeFoo := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}
testPod := st.MakePod().Name("test-pod").Namespace(v1.NamespaceDefault).Node("foo").Obj()
client := fake.NewClientset(&v1.PodList{Items: []v1.Pod{*testPod}}, &v1.NodeList{Items: []v1.Node{nodeFoo}})
informerFactory := informers.NewSharedInformerFactory(client, 0)
podInformer := informerFactory.Core().V1().Pods()
// Need to add/update/delete testPod to the store.
podInformer.Informer().GetStore().Add(testPod)
recorder := metrics.NewMetricsAsyncRecorder(3, 20*time.Microsecond, ctx.Done())
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())), internalqueue.WithMetricsRecorder(*recorder))
schedulerCache := internalcache.New(ctx, 30*time.Second)
queue.Add(logger, testPod)
if _, err := queue.Pop(logger); err != nil {
t.Fatalf("Pop failed: %v", err)
// Need to add testPod to the store.
if err := podInformer.Informer().GetStore().Add(testPod); err != nil {
t.Fatal(err)
}
if tt.podUpdatedDuringScheduling {
podInformer.Informer().GetStore().Update(testPodUpdated)
queue.Update(logger, testPod, testPodUpdated)
}
if tt.podDeletedDuringScheduling {
podInformer.Informer().GetStore().Delete(testPod)
queue.Delete(testPod)
var apiDispatcher *apidispatcher.APIDispatcher
if asyncAPICallsEnabled {
apiDispatcher = apidispatcher.New(client, 16, apicalls.Relevances)
apiDispatcher.Run(logger)
defer apiDispatcher.Close()
}
s, schedFramework, err := initScheduler(ctx, schedulerCache, queue, client, informerFactory)
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())), internalqueue.WithAPIDispatcher(apiDispatcher))
schedulerCache := internalcache.New(ctx, 30*time.Second, apiDispatcher)
// Add node to schedulerCache no matter it's deleted in API server or not.
schedulerCache.AddNode(logger, &nodeFoo)
s, schedFramework, err := initScheduler(ctx, schedulerCache, queue, apiDispatcher, client, informerFactory)
if err != nil {
t.Fatal(err)
}
testPodInfo := &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(t, testPod)}
s.FailureHandler(ctx, schedFramework, testPodInfo, fwk.NewStatus(fwk.Unschedulable), nil, time.Now())
s.FailureHandler(ctx, schedFramework, testPodInfo, fwk.NewStatus(fwk.Unschedulable).WithError(fmt.Errorf("binding rejected: timeout")), nil, time.Now())
var got *v1.Pod
if tt.podUpdatedDuringScheduling {
pInfo, ok := queue.GetPod(testPod.Name, testPod.Namespace)
if !ok {
t.Fatalf("Failed to get pod %s/%s from queue", testPod.Namespace, testPod.Name)
}
got = pInfo.Pod
} else {
got = getPodFromPriorityQueue(queue, testPod)
}
if diff := cmp.Diff(tt.expect, got); diff != "" {
t.Errorf("Unexpected pod (-want, +got): %s", diff)
pod := getPodFromPriorityQueue(queue, testPod)
if pod != nil {
t.Fatalf("Unexpected pod: %v should not be in PriorityQueue when the NodeName of pod is not empty", pod.Name)
}
})
}
}
func TestFailureHandler_PodAlreadyBound(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
nodeFoo := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}
testPod := st.MakePod().Name("test-pod").Namespace(v1.NamespaceDefault).Node("foo").Obj()
client := fake.NewClientset(&v1.PodList{Items: []v1.Pod{*testPod}}, &v1.NodeList{Items: []v1.Node{nodeFoo}})
informerFactory := informers.NewSharedInformerFactory(client, 0)
podInformer := informerFactory.Core().V1().Pods()
// Need to add testPod to the store.
podInformer.Informer().GetStore().Add(testPod)
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())))
schedulerCache := internalcache.New(ctx, 30*time.Second)
// Add node to schedulerCache no matter it's deleted in API server or not.
schedulerCache.AddNode(logger, &nodeFoo)
s, schedFramework, err := initScheduler(ctx, schedulerCache, queue, client, informerFactory)
if err != nil {
t.Fatal(err)
}
testPodInfo := &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(t, testPod)}
s.FailureHandler(ctx, schedFramework, testPodInfo, fwk.NewStatus(fwk.Unschedulable).WithError(fmt.Errorf("binding rejected: timeout")), nil, time.Now())
pod := getPodFromPriorityQueue(queue, testPod)
if pod != nil {
t.Fatalf("Unexpected pod: %v should not be in PriorityQueue when the NodeName of pod is not empty", pod.Name)
}
}
// TestWithPercentageOfNodesToScore tests scheduler's PercentageOfNodesToScore is set correctly.
func TestWithPercentageOfNodesToScore(t *testing.T) {
tests := []struct {
@@ -445,7 +476,7 @@ func getPodFromPriorityQueue(queue *internalqueue.PriorityQueue, pod *v1.Pod) *v
return nil
}
func initScheduler(ctx context.Context, cache internalcache.Cache, queue internalqueue.SchedulingQueue,
func initScheduler(ctx context.Context, cache internalcache.Cache, queue internalqueue.SchedulingQueue, apiDispatcher *apidispatcher.APIDispatcher,
client kubernetes.Interface, informerFactory informers.SharedInformerFactory) (*Scheduler, framework.Framework, error) {
logger := klog.FromContext(ctx)
registerPluginFuncs := []tf.RegisterPluginFunc{
@@ -458,6 +489,7 @@ func initScheduler(ctx context.Context, cache internalcache.Cache, queue interna
registerPluginFuncs,
testSchedulerName,
frameworkruntime.WithClientSet(client),
frameworkruntime.WithAPIDispatcher(apiDispatcher),
frameworkruntime.WithInformerFactory(informerFactory),
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, testSchedulerName)),
frameworkruntime.WithWaitingPods(waitingPods),
@@ -465,12 +497,16 @@ func initScheduler(ctx context.Context, cache internalcache.Cache, queue interna
if err != nil {
return nil, nil, err
}
if apiDispatcher != nil {
fwk.SetAPICacher(apicache.New(queue, cache))
}
s := &Scheduler{
Cache: cache,
client: client,
StopEverything: ctx.Done(),
SchedulingQueue: queue,
APIDispatcher: apiDispatcher,
Profiles: profile.Map{testSchedulerName: fwk},
logger: logger,
}

View File

@@ -26,7 +26,6 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/client-go/kubernetes"
@@ -101,12 +100,12 @@ func Retriable(err error) bool {
// PatchPodStatus calculates the delta bytes change from <old.Status> to <newStatus>,
// and then submit a request to API server to patch the pod changes.
func PatchPodStatus(ctx context.Context, cs kubernetes.Interface, old *v1.Pod, newStatus *v1.PodStatus) error {
func PatchPodStatus(ctx context.Context, cs kubernetes.Interface, name string, namespace string, oldStatus *v1.PodStatus, newStatus *v1.PodStatus) error {
if newStatus == nil {
return nil
}
oldData, err := json.Marshal(v1.Pod{Status: old.Status})
oldData, err := json.Marshal(v1.Pod{Status: *oldStatus})
if err != nil {
return err
}
@@ -117,7 +116,7 @@ func PatchPodStatus(ctx context.Context, cs kubernetes.Interface, old *v1.Pod, n
}
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Pod{})
if err != nil {
return fmt.Errorf("failed to create merge patch for pod %q/%q: %v", old.Namespace, old.Name, err)
return fmt.Errorf("failed to create merge patch for pod %q/%q: %w", namespace, name, err)
}
if "{}" == string(patchBytes) {
@@ -125,7 +124,7 @@ func PatchPodStatus(ctx context.Context, cs kubernetes.Interface, old *v1.Pod, n
}
patchFn := func() error {
_, err := cs.CoreV1().Pods(old.Namespace).Patch(ctx, old.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status")
_, err := cs.CoreV1().Pods(namespace).Patch(ctx, name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status")
return err
}
@@ -137,23 +136,6 @@ func DeletePod(ctx context.Context, cs kubernetes.Interface, pod *v1.Pod) error
return cs.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
}
// ClearNominatedNodeName internally submit a patch request to API server
// to set each pods[*].Status.NominatedNodeName> to "".
func ClearNominatedNodeName(ctx context.Context, cs kubernetes.Interface, pods ...*v1.Pod) utilerrors.Aggregate {
var errs []error
for _, p := range pods {
if len(p.Status.NominatedNodeName) == 0 {
continue
}
podStatusCopy := p.Status.DeepCopy()
podStatusCopy.NominatedNodeName = ""
if err := PatchPodStatus(ctx, cs, p, podStatusCopy); err != nil {
errs = append(errs, err)
}
}
return utilerrors.NewAggregate(errs)
}
// IsScalarResourceName validates the resource for Extended, Hugepages, Native and AttachableVolume resources
func IsScalarResourceName(name v1.ResourceName) bool {
return v1helper.IsExtendedResourceName(name) || v1helper.IsHugePageResourceName(name) ||

View File

@@ -161,63 +161,6 @@ func TestMoreImportantPod(t *testing.T) {
}
}
func TestRemoveNominatedNodeName(t *testing.T) {
tests := []struct {
name string
currentNominatedNodeName string
newNominatedNodeName string
expectedPatchRequests int
expectedPatchData string
}{
{
name: "Should make patch request to clear node name",
currentNominatedNodeName: "node1",
expectedPatchRequests: 1,
expectedPatchData: `{"status":{"nominatedNodeName":null}}`,
},
{
name: "Should not make patch request if nominated node is already cleared",
currentNominatedNodeName: "",
expectedPatchRequests: 0,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
actualPatchRequests := 0
var actualPatchData string
cs := &clientsetfake.Clientset{}
cs.AddReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
actualPatchRequests++
patch := action.(clienttesting.PatchAction)
actualPatchData = string(patch.GetPatch())
// For this test, we don't care about the result of the patched pod, just that we got the expected
// patch request, so just returning &v1.Pod{} here is OK because scheduler doesn't use the response.
return true, &v1.Pod{}, nil
})
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
Status: v1.PodStatus{NominatedNodeName: test.currentNominatedNodeName},
}
ctx, cancel := context.WithCancel(ctx)
defer cancel()
if err := ClearNominatedNodeName(ctx, cs, pod); err != nil {
t.Fatalf("Error calling removeNominatedNodeName: %v", err)
}
if actualPatchRequests != test.expectedPatchRequests {
t.Fatalf("Actual patch requests (%d) dos not equal expected patch requests (%d)", actualPatchRequests, test.expectedPatchRequests)
}
if test.expectedPatchRequests > 0 && actualPatchData != test.expectedPatchData {
t.Fatalf("Patch data mismatch: Actual was %v, but expected %v", actualPatchData, test.expectedPatchData)
}
})
}
}
func TestPatchPodStatus(t *testing.T) {
tests := []struct {
name string
@@ -366,7 +309,7 @@ func TestPatchPodStatus(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
err = PatchPodStatus(ctx, client, &tc.pod, &tc.statusToUpdate)
err = PatchPodStatus(ctx, client, tc.pod.Name, tc.pod.Namespace, &tc.pod.Status, &tc.statusToUpdate)
if err != nil && tc.validateErr == nil {
// shouldn't be error
t.Fatal(err)

View File

@@ -17,54 +17,70 @@ limitations under the License.
package bind
import (
"fmt"
"testing"
corev1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/framework"
st "k8s.io/kubernetes/pkg/scheduler/testing"
testutil "k8s.io/kubernetes/test/integration/util"
"k8s.io/kubernetes/test/utils/ktesting"
)
// TestDefaultBinder tests the binding process in the scheduler.
func TestDefaultBinder(t *testing.T) {
testCtx := testutil.InitTestSchedulerWithOptions(t, testutil.InitTestAPIServer(t, "", nil), 0)
testutil.SyncSchedulerInformerFactory(testCtx)
for _, asyncAPICallsEnabled := range []bool{true, false} {
t.Run(fmt.Sprintf("Async API calls enabled: %v", asyncAPICallsEnabled), func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
// Add a node.
node, err := testutil.CreateNode(testCtx.ClientSet, st.MakeNode().Name("testnode").Obj())
if err != nil {
t.Fatal(err)
}
testCtx := testutil.InitTestSchedulerWithOptions(t, testutil.InitTestAPIServer(t, "", nil), 0)
testutil.SyncSchedulerInformerFactory(testCtx)
if testCtx.Scheduler.APIDispatcher != nil {
logger, _ := ktesting.NewTestContext(t)
testCtx.Scheduler.APIDispatcher.Run(logger)
defer testCtx.Scheduler.APIDispatcher.Close()
}
tests := map[string]struct {
anotherUID bool
wantStatusCode fwk.Code
}{
"same UID": {
wantStatusCode: fwk.Success,
},
"different UID": {
anotherUID: true,
wantStatusCode: fwk.Error,
},
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
pod, err := testutil.CreatePausePodWithResource(testCtx.ClientSet, "fixed-name", testCtx.NS.Name, nil)
// Add a node.
node, err := testutil.CreateNode(testCtx.ClientSet, st.MakeNode().Name("testnode").Obj())
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
defer testutil.CleanupPods(testCtx.Ctx, testCtx.ClientSet, t, []*corev1.Pod{pod})
podCopy := pod.DeepCopy()
if tc.anotherUID {
podCopy.UID = "another"
t.Fatal(err)
}
status := testCtx.Scheduler.Profiles["default-scheduler"].RunBindPlugins(testCtx.Ctx, framework.NewCycleState(), podCopy, node.Name)
if code := status.Code(); code != tc.wantStatusCode {
t.Errorf("Bind returned code %s, want %s", code, tc.wantStatusCode)
tests := map[string]struct {
anotherUID bool
wantStatusCode fwk.Code
}{
"same UID": {
wantStatusCode: fwk.Success,
},
"different UID": {
anotherUID: true,
wantStatusCode: fwk.Error,
},
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
pod, err := testutil.CreatePausePodWithResource(testCtx.ClientSet, "fixed-name", testCtx.NS.Name, nil)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
defer testutil.CleanupPods(testCtx.Ctx, testCtx.ClientSet, t, []*corev1.Pod{pod})
podCopy := pod.DeepCopy()
if tc.anotherUID {
podCopy.UID = "another"
}
status := testCtx.Scheduler.Profiles["default-scheduler"].RunBindPlugins(testCtx.Ctx, framework.NewCycleState(), podCopy, node.Name)
if code := status.Code(); code != tc.wantStatusCode {
t.Errorf("Bind returned code %s, want %s", code, tc.wantStatusCode)
}
})
}
})
}

View File

@@ -212,80 +212,89 @@ func TestUpdateNominatedNodeName(t *testing.T) {
for _, tt := range tests {
for _, qHintEnabled := range []bool{false, true} {
t.Run(fmt.Sprintf("%s, with queuehint(%v)", tt.name, qHintEnabled), func(t *testing.T) {
if !qHintEnabled {
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, version.MustParse("1.33"))
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, false)
for _, asyncAPICallsEnabled := range []bool{false, true} {
if !qHintEnabled && asyncAPICallsEnabled {
// This can't happen.
continue
}
// Set the SchedulerPopFromBackoffQ feature to false, because when it's enabled, we can't be sure the pod won't be popped from the backoffQ.
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerPopFromBackoffQ, false)
t.Run(fmt.Sprintf("%s (Queueing hints enabled: %v, Async API calls enabled: %v)", tt.name, qHintEnabled, asyncAPICallsEnabled), func(t *testing.T) {
if !qHintEnabled {
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, version.MustParse("1.33"))
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, false)
} else {
// Handle SchedulerAsyncAPICalls feature only in 1.34+.
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
}
// Set the SchedulerPopFromBackoffQ feature to false, because when it's enabled, we can't be sure the pod won't be popped from the backoffQ.
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerPopFromBackoffQ, false)
testCtx, teardown := schedulerutils.InitTestSchedulerForFrameworkTest(t, testContext, 0, true,
scheduler.WithClock(fakeClock),
// UpdateFunc needs to be called when the nominated pod is still in the backoff queue, thus small, but non 0 value.
scheduler.WithPodInitialBackoffSeconds(int64(testBackoff.Seconds())),
scheduler.WithPodMaxBackoffSeconds(int64(testBackoff.Seconds())),
)
defer teardown()
testCtx, teardown := schedulerutils.InitTestSchedulerForFrameworkTest(t, testContext, 0, true,
scheduler.WithClock(fakeClock),
// UpdateFunc needs to be called when the nominated pod is still in the backoff queue, thus small, but non 0 value.
scheduler.WithPodInitialBackoffSeconds(int64(testBackoff.Seconds())),
scheduler.WithPodMaxBackoffSeconds(int64(testBackoff.Seconds())),
)
defer teardown()
_, err := testutils.CreateNode(testCtx.ClientSet, testNode)
if err != nil {
t.Fatalf("Creating node error: %v", err)
}
_, err := testutils.CreateNode(testCtx.ClientSet, testNode)
if err != nil {
t.Fatalf("Creating node error: %v", err)
}
// Ensure node is present in scheduler cache.
if err := testutils.WaitForNodesInCache(testCtx.Ctx, testCtx.Scheduler, 1); err != nil {
t.Fatalf("Waiting for node in cache error: %v", err)
}
// Ensure node is present in scheduler cache.
if err := testutils.WaitForNodesInCache(testCtx.Ctx, testCtx.Scheduler, 1); err != nil {
t.Fatalf("Waiting for node in cache error: %v", err)
}
// Create initial low-priority pod and wait until it's scheduled.
pod, err := testutils.CreatePausePod(testCtx.ClientSet, podLow)
if err != nil {
t.Fatalf("Creating pod error: %v", err)
}
if err := testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Fatalf("Pod %v was not scheduled: %v", pod.Name, err)
}
// Create initial low-priority pod and wait until it's scheduled.
pod, err := testutils.CreatePausePod(testCtx.ClientSet, podLow)
if err != nil {
t.Fatalf("Creating pod error: %v", err)
}
if err := testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Fatalf("Pod %v was not scheduled: %v", pod.Name, err)
}
// Create mid-priority pod and wait until it becomes nominated (preempt low-priority pod) and remain uschedulable.
pod, err = testutils.CreatePausePod(testCtx.ClientSet, podMidNominated)
if err != nil {
t.Fatalf("Creating pod error: %v", err)
}
if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("NominatedNodeName field was not set for pod %v: %v", pod.Name, err)
}
if err := testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("Pod %v haven't become unschedulabe: %v", pod.Name, err)
}
// Create mid-priority pod and wait until it becomes nominated (preempt low-priority pod) and remain uschedulable.
pod, err = testutils.CreatePausePod(testCtx.ClientSet, podMidNominated)
if err != nil {
t.Fatalf("Creating pod error: %v", err)
}
if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("NominatedNodeName field was not set for pod %v: %v", pod.Name, err)
}
if err := testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("Pod %v haven't become unschedulabe: %v", pod.Name, err)
}
// Remove the initial low-priority pod, which will move the nominated unschedulable pod back to the backoff queue.
if err := testutils.DeletePod(testCtx.ClientSet, podLow.Name, podLow.Namespace); err != nil {
t.Fatalf("Deleting pod error: %v", err)
}
// Remove the initial low-priority pod, which will move the nominated unschedulable pod back to the backoff queue.
if err := testutils.DeletePod(testCtx.ClientSet, podLow.Name, podLow.Namespace); err != nil {
t.Fatalf("Deleting pod error: %v", err)
}
// Create another low-priority pods which cannot be scheduled because the mid-priority pod is nominated on the node and the node doesn't have enough resource to have two pods both.
pod, err = testutils.CreatePausePod(testCtx.ClientSet, podLow)
if err != nil {
t.Fatalf("Creating pod error: %v", err)
}
if err := testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Fatalf("Pod %v was not scheduled: %v", pod.Name, err)
}
// Create another low-priority pods which cannot be scheduled because the mid-priority pod is nominated on the node and the node doesn't have enough resource to have two pods both.
pod, err = testutils.CreatePausePod(testCtx.ClientSet, podLow)
if err != nil {
t.Fatalf("Creating pod error: %v", err)
}
if err := testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Fatalf("Pod %v was not scheduled: %v", pod.Name, err)
}
// Update causing the nominated pod to be removed or to get its nominated node name removed, which should trigger scheduling of the low priority pod.
// Note that the update has to happen since the nominated pod is still in the backoffQ to actually test updates of nominated, but not bound yet pods.
tt.updateFunc(testCtx)
// Update causing the nominated pod to be removed or to get its nominated node name removed, which should trigger scheduling of the low priority pod.
// Note that the update has to happen since the nominated pod is still in the backoffQ to actually test updates of nominated, but not bound yet pods.
tt.updateFunc(testCtx)
// Advance time by the 2 * maxPodBackoffSeconds to move low priority pod out of the backoff queue.
fakeClock.Step(2 * testBackoff)
// Advance time by the 2 * maxPodBackoffSeconds to move low priority pod out of the backoff queue.
fakeClock.Step(2 * testBackoff)
// Expect the low-priority pod is notified about unnominated mid-pririty pod and gets scheduled, as it should fit this time.
if err := testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, podLow); err != nil {
t.Fatalf("Pod %v was not scheduled: %v", podLow.Name, err)
}
testutils.CleanupPods(testCtx.Ctx, testCtx.ClientSet, t, cleanupPods)
})
// Expect the low-priority pod is notified about unnominated mid-pririty pod and gets scheduled, as it should fit this time.
if err := testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, podLow); err != nil {
t.Fatalf("Pod %v was not scheduled: %v", podLow.Name, err)
}
testutils.CleanupPods(testCtx.Ctx, testCtx.ClientSet, t, cleanupPods)
})
}
}
}
}

View File

@@ -171,16 +171,6 @@ func TestPreemption(t *testing.T) {
}},
})
testCtx := testutils.InitTestSchedulerWithOptions(t,
testutils.InitTestAPIServer(t, "preemption", nil),
0,
scheduler.WithProfiles(cfg.Profiles...),
scheduler.WithFrameworkOutOfTreeRegistry(registry))
testutils.SyncSchedulerInformerFactory(testCtx)
go testCtx.Scheduler.Run(testCtx.Ctx)
cs := testCtx.ClientSet
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI)},
@@ -201,9 +191,8 @@ func TestPreemption(t *testing.T) {
initTokens: maxTokens,
existingPods: []*v1.Pod{
initPausePod(&testutils.PausePodConfig{
Name: "victim-pod",
Namespace: testCtx.NS.Name,
Priority: &lowPriority,
Name: "victim-pod",
Priority: &lowPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
@@ -211,9 +200,8 @@ func TestPreemption(t *testing.T) {
}),
},
pod: initPausePod(&testutils.PausePodConfig{
Name: "preemptor-pod",
Namespace: testCtx.NS.Name,
Priority: &highPriority,
Name: "preemptor-pod",
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
@@ -226,9 +214,8 @@ func TestPreemption(t *testing.T) {
initTokens: 1,
existingPods: []*v1.Pod{
initPausePod(&testutils.PausePodConfig{
Name: "victim-pod",
Namespace: testCtx.NS.Name,
Priority: &lowPriority,
Name: "victim-pod",
Priority: &lowPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
@@ -236,9 +223,8 @@ func TestPreemption(t *testing.T) {
}),
},
pod: initPausePod(&testutils.PausePodConfig{
Name: "preemptor-pod",
Namespace: testCtx.NS.Name,
Priority: &highPriority,
Name: "preemptor-pod",
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
@@ -256,9 +242,8 @@ func TestPreemption(t *testing.T) {
enablePreFilter: true,
existingPods: []*v1.Pod{
initPausePod(&testutils.PausePodConfig{
Name: "victim-pod",
Namespace: testCtx.NS.Name,
Priority: &lowPriority,
Name: "victim-pod",
Priority: &lowPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
@@ -266,9 +251,8 @@ func TestPreemption(t *testing.T) {
}),
},
pod: initPausePod(&testutils.PausePodConfig{
Name: "preemptor-pod",
Namespace: testCtx.NS.Name,
Priority: &highPriority,
Name: "preemptor-pod",
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
@@ -283,9 +267,8 @@ func TestPreemption(t *testing.T) {
unresolvable: true,
existingPods: []*v1.Pod{
initPausePod(&testutils.PausePodConfig{
Name: "victim-pod",
Namespace: testCtx.NS.Name,
Priority: &lowPriority,
Name: "victim-pod",
Priority: &lowPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
@@ -293,9 +276,8 @@ func TestPreemption(t *testing.T) {
}),
},
pod: initPausePod(&testutils.PausePodConfig{
Name: "preemptor-pod",
Namespace: testCtx.NS.Name,
Priority: &highPriority,
Name: "preemptor-pod",
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
@@ -308,13 +290,13 @@ func TestPreemption(t *testing.T) {
initTokens: maxTokens,
existingPods: []*v1.Pod{
initPausePod(&testutils.PausePodConfig{
Name: "pod-0", Namespace: testCtx.NS.Name,
Name: "pod-0",
Priority: &mediumPriority,
Labels: map[string]string{"pod": "p0"},
Resources: defaultPodRes,
}),
initPausePod(&testutils.PausePodConfig{
Name: "pod-1", Namespace: testCtx.NS.Name,
Name: "pod-1",
Priority: &lowPriority,
Labels: map[string]string{"pod": "p1"},
Resources: defaultPodRes,
@@ -341,7 +323,6 @@ func TestPreemption(t *testing.T) {
// A higher priority pod with anti-affinity.
pod: initPausePod(&testutils.PausePodConfig{
Name: "preemptor-pod",
Namespace: testCtx.NS.Name,
Priority: &highPriority,
Labels: map[string]string{"pod": "preemptor"},
Resources: defaultPodRes,
@@ -372,13 +353,13 @@ func TestPreemption(t *testing.T) {
initTokens: maxTokens,
existingPods: []*v1.Pod{
initPausePod(&testutils.PausePodConfig{
Name: "pod-0", Namespace: testCtx.NS.Name,
Name: "pod-0",
Priority: &mediumPriority,
Labels: map[string]string{"pod": "p0"},
Resources: defaultPodRes,
}),
initPausePod(&testutils.PausePodConfig{
Name: "pod-1", Namespace: testCtx.NS.Name,
Name: "pod-1",
Priority: &highPriority,
Labels: map[string]string{"pod": "p1"},
Resources: defaultPodRes,
@@ -405,7 +386,6 @@ func TestPreemption(t *testing.T) {
// A higher priority pod with anti-affinity.
pod: initPausePod(&testutils.PausePodConfig{
Name: "preemptor-pod",
Namespace: testCtx.NS.Name,
Priority: &highPriority,
Labels: map[string]string{"pod": "preemptor"},
Resources: defaultPodRes,
@@ -439,61 +419,77 @@ func TestPreemption(t *testing.T) {
v1.ResourceMemory: "500",
}
nodeObject := st.MakeNode().Name("node1").Capacity(nodeRes).Label("node", "node1").Obj()
if _, err := createNode(testCtx.ClientSet, nodeObject); err != nil {
t.Fatalf("Error creating node: %v", err)
}
for _, asyncPreemptionEnabled := range []bool{true, false} {
for _, test := range tests {
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", test.name, asyncPreemptionEnabled), func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled)
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, test := range tests {
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, Async API calls enabled: %v)", test.name, asyncPreemptionEnabled, asyncAPICallsEnabled), func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
filter.Tokens = test.initTokens
filter.EnablePreFilter = test.enablePreFilter
filter.Unresolvable = test.unresolvable
pods := make([]*v1.Pod, len(test.existingPods))
// Create and run existingPods.
for i, p := range test.existingPods {
pods[i], err = runPausePod(cs, p)
if err != nil {
t.Fatalf("Error running pause pod: %v", err)
testCtx := testutils.InitTestSchedulerWithOptions(t,
testutils.InitTestAPIServer(t, "preemption", nil),
0,
scheduler.WithProfiles(cfg.Profiles...),
scheduler.WithFrameworkOutOfTreeRegistry(registry))
testutils.SyncSchedulerInformerFactory(testCtx)
go testCtx.Scheduler.Run(testCtx.Ctx)
if _, err := createNode(testCtx.ClientSet, nodeObject); err != nil {
t.Fatalf("Error creating node: %v", err)
}
}
// Create the "pod".
preemptor, err := createPausePod(cs, test.pod)
if err != nil {
t.Errorf("Error while creating high priority pod: %v", err)
}
// Wait for preemption of pods and make sure the other ones are not preempted.
for i, p := range pods {
if _, found := test.preemptedPodIndexes[i]; found {
if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false,
podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name)
}
pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{})
cs := testCtx.ClientSet
filter.Tokens = test.initTokens
filter.EnablePreFilter = test.enablePreFilter
filter.Unresolvable = test.unresolvable
pods := make([]*v1.Pod, len(test.existingPods))
// Create and run existingPods.
for i, p := range test.existingPods {
p.Namespace = testCtx.NS.Name
pods[i], err = runPausePod(cs, p)
if err != nil {
t.Errorf("Error %v when getting the updated status for pod %v/%v ", err, p.Namespace, p.Name)
t.Fatalf("Error running pause pod: %v", err)
}
_, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget)
if cond == nil {
t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(pod), v1.DisruptionTarget)
}
// Create the "pod".
test.pod.Namespace = testCtx.NS.Name
preemptor, err := createPausePod(cs, test.pod)
if err != nil {
t.Errorf("Error while creating high priority pod: %v", err)
}
// Wait for preemption of pods and make sure the other ones are not preempted.
for i, p := range pods {
if _, found := test.preemptedPodIndexes[i]; found {
if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false,
podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name)
}
pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{})
if err != nil {
t.Errorf("Error %v when getting the updated status for pod %v/%v ", err, p.Namespace, p.Name)
}
_, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget)
if cond == nil {
t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(pod), v1.DisruptionTarget)
}
} else if p.DeletionTimestamp != nil {
t.Errorf("Didn't expect pod %v to get preempted.", p.Name)
}
} else if p.DeletionTimestamp != nil {
t.Errorf("Didn't expect pod %v to get preempted.", p.Name)
}
}
// Also check that the preemptor pod gets the NominatedNodeName field set.
if len(test.preemptedPodIndexes) > 0 {
if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil {
t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err)
// Also check that the preemptor pod gets the NominatedNodeName field set.
if len(test.preemptedPodIndexes) > 0 {
if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil {
t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err)
}
}
}
// Cleanup
pods = append(pods, preemptor)
testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
})
// Cleanup
pods = append(pods, preemptor)
testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
})
}
}
}
}
@@ -809,177 +805,185 @@ func TestAsyncPreemption(t *testing.T) {
// All test cases have the same node.
node := st.MakeNode().Name("node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj()
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// We need to use a custom preemption plugin to test async preemption behavior
delayedPreemptionPluginName := "delay-preemption"
// keyed by the pod name
preemptionDoneChannels := make(map[string]chan struct{})
defer func() {
for _, ch := range preemptionDoneChannels {
close(ch)
}
}()
registry := make(frameworkruntime.Registry)
var preemptionPlugin *defaultpreemption.DefaultPreemption
err := registry.Register(delayedPreemptionPluginName, func(c context.Context, r runtime.Object, fh framework.Handle) (framework.Plugin, error) {
p, err := frameworkruntime.FactoryAdapter(plfeature.Features{EnableAsyncPreemption: true}, defaultpreemption.New)(c, &config.DefaultPreemptionArgs{
// Set default values to pass the validation at the initialization, not related to the test.
MinCandidateNodesPercentage: 10,
MinCandidateNodesAbsolute: 100,
}, fh)
if err != nil {
return nil, fmt.Errorf("error creating default preemption plugin: %w", err)
}
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, test := range tests {
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", test.name, asyncAPICallsEnabled), func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
var ok bool
preemptionPlugin, ok = p.(*defaultpreemption.DefaultPreemption)
if !ok {
return nil, fmt.Errorf("unexpected plugin type %T", p)
}
preemptPodFn := preemptionPlugin.Evaluator.PreemptPod
preemptionPlugin.Evaluator.PreemptPod = func(ctx context.Context, c preemption.Candidate, preemptor, victim *v1.Pod, pluginName string) error {
// block the preemption goroutine to complete until the test case allows it to proceed.
if ch, ok := preemptionDoneChannels[preemptor.Name]; ok {
<-ch
// We need to use a custom preemption plugin to test async preemption behavior
delayedPreemptionPluginName := "delay-preemption"
// keyed by the pod name
preemptionDoneChannels := make(map[string]chan struct{})
defer func() {
for _, ch := range preemptionDoneChannels {
close(ch)
}
}()
registry := make(frameworkruntime.Registry)
var preemptionPlugin *defaultpreemption.DefaultPreemption
err := registry.Register(delayedPreemptionPluginName, func(c context.Context, r runtime.Object, fh framework.Handle) (framework.Plugin, error) {
p, err := frameworkruntime.FactoryAdapter(plfeature.Features{EnableAsyncPreemption: true}, defaultpreemption.New)(c, &config.DefaultPreemptionArgs{
// Set default values to pass the validation at the initialization, not related to the test.
MinCandidateNodesPercentage: 10,
MinCandidateNodesAbsolute: 100,
}, fh)
if err != nil {
return nil, fmt.Errorf("error creating default preemption plugin: %w", err)
}
return preemptPodFn(ctx, c, preemptor, victim, pluginName)
}
return preemptionPlugin, nil
})
if err != nil {
t.Fatalf("Error registering a filter: %v", err)
}
cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
Profiles: []configv1.KubeSchedulerProfile{{
SchedulerName: ptr.To(v1.DefaultSchedulerName),
Plugins: &configv1.Plugins{
MultiPoint: configv1.PluginSet{
Enabled: []configv1.Plugin{
{Name: delayedPreemptionPluginName},
},
Disabled: []configv1.Plugin{
{Name: names.DefaultPreemption},
var ok bool
preemptionPlugin, ok = p.(*defaultpreemption.DefaultPreemption)
if !ok {
return nil, fmt.Errorf("unexpected plugin type %T", p)
}
preemptPodFn := preemptionPlugin.Evaluator.PreemptPod
preemptionPlugin.Evaluator.PreemptPod = func(ctx context.Context, c preemption.Candidate, preemptor, victim *v1.Pod, pluginName string) error {
// block the preemption goroutine to complete until the test case allows it to proceed.
if ch, ok := preemptionDoneChannels[preemptor.Name]; ok {
<-ch
}
return preemptPodFn(ctx, c, preemptor, victim, pluginName)
}
return preemptionPlugin, nil
})
if err != nil {
t.Fatalf("Error registering a filter: %v", err)
}
cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
Profiles: []configv1.KubeSchedulerProfile{{
SchedulerName: ptr.To(v1.DefaultSchedulerName),
Plugins: &configv1.Plugins{
MultiPoint: configv1.PluginSet{
Enabled: []configv1.Plugin{
{Name: delayedPreemptionPluginName},
},
Disabled: []configv1.Plugin{
{Name: names.DefaultPreemption},
},
},
},
},
}},
}},
})
// It initializes the scheduler, but doesn't start.
// We manually trigger the scheduling cycle.
testCtx := testutils.InitTestSchedulerWithOptions(t,
testutils.InitTestAPIServer(t, "preemption", nil),
0,
scheduler.WithProfiles(cfg.Profiles...),
scheduler.WithFrameworkOutOfTreeRegistry(registry),
// disable backoff
scheduler.WithPodMaxBackoffSeconds(0),
scheduler.WithPodInitialBackoffSeconds(0),
)
testutils.SyncSchedulerInformerFactory(testCtx)
cs := testCtx.ClientSet
if preemptionPlugin == nil {
t.Fatalf("the preemption plugin should be initialized")
}
logger, _ := ktesting.NewTestContext(t)
if testCtx.Scheduler.APIDispatcher != nil {
testCtx.Scheduler.APIDispatcher.Run(logger)
defer testCtx.Scheduler.APIDispatcher.Close()
}
testCtx.Scheduler.SchedulingQueue.Run(logger)
defer testCtx.Scheduler.SchedulingQueue.Close()
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, true)
createdPods := []*v1.Pod{}
defer testutils.CleanupPods(testCtx.Ctx, cs, t, createdPods)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
t.Fatalf("Failed to create an initial Node %q: %v", node.Name, err)
}
defer func() {
if err := cs.CoreV1().Nodes().Delete(ctx, node.Name, metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete the Node %q: %v", node.Name, err)
}
}()
for _, scenario := range test.scenarios {
t.Logf("Running scenario: %s", scenario.name)
switch {
case scenario.createPod != nil:
if scenario.createPod.count == nil {
scenario.createPod.count = ptr.To(1)
}
for i := 0; i < *scenario.createPod.count; i++ {
pod, err := cs.CoreV1().Pods(testCtx.NS.Name).Create(ctx, scenario.createPod.pod, metav1.CreateOptions{})
if err != nil {
t.Fatalf("Failed to create a Pod %q: %v", pod.Name, err)
}
createdPods = append(createdPods, pod)
}
case scenario.schedulePod != nil:
lastFailure := ""
if err := wait.PollUntilContextTimeout(testCtx.Ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
if len(testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()) == 0 {
lastFailure = fmt.Sprintf("Expected the pod %s to be scheduled, but no pod arrives at the activeQ", scenario.schedulePod.podName)
return false, nil
}
if testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()[0].Name != scenario.schedulePod.podName {
// need to wait more because maybe the queue will get another Pod that higher priority than the current top pod.
lastFailure = fmt.Sprintf("The pod %s is expected to be scheduled, but the top Pod is %s", scenario.schedulePod.podName, testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()[0].Name)
return false, nil
}
return true, nil
}); err != nil {
t.Fatal(lastFailure)
}
preemptionDoneChannels[scenario.schedulePod.podName] = make(chan struct{})
testCtx.Scheduler.ScheduleOne(testCtx.Ctx)
if scenario.schedulePod.expectSuccess {
if err := wait.PollUntilContextTimeout(testCtx.Ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, testCtx.NS.Name, scenario.schedulePod.podName)); err != nil {
t.Fatalf("Expected the pod %s to be scheduled", scenario.schedulePod.podName)
}
} else {
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.schedulePod.podName) {
t.Fatalf("Expected the pod %s to be in the queue after the scheduling attempt", scenario.schedulePod.podName)
}
}
case scenario.completePreemption != "":
if _, ok := preemptionDoneChannels[scenario.completePreemption]; !ok {
t.Fatalf("The preemptor Pod %q is not running preemption", scenario.completePreemption)
}
close(preemptionDoneChannels[scenario.completePreemption])
delete(preemptionDoneChannels, scenario.completePreemption)
case scenario.podGatedInQueue != "":
// make sure the Pod is in the queue in the first place.
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.podGatedInQueue) {
t.Fatalf("Expected the pod %s to be in the queue", scenario.podGatedInQueue)
}
// Make sure this Pod is gated by the preemption at PreEnqueue extension point
// by activating the Pod and see if it's still in the unsched pod pool.
testCtx.Scheduler.SchedulingQueue.Activate(logger, map[string]*v1.Pod{scenario.podGatedInQueue: st.MakePod().Namespace(testCtx.NS.Name).Name(scenario.podGatedInQueue).Obj()})
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.podGatedInQueue) {
t.Fatalf("Expected the pod %s to be in the queue even after the activation", scenario.podGatedInQueue)
}
case scenario.podRunningPreemption != nil:
if err := wait.PollUntilContextTimeout(testCtx.Ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
return preemptionPlugin.Evaluator.IsPodRunningPreemption(createdPods[*scenario.podRunningPreemption].GetUID()), nil
}); err != nil {
t.Fatalf("Expected the pod %s to be running preemption", createdPods[*scenario.podRunningPreemption].Name)
}
}
}
})
// It initializes the scheduler, but doesn't start.
// We manually trigger the scheduling cycle.
testCtx := testutils.InitTestSchedulerWithOptions(t,
testutils.InitTestAPIServer(t, "preemption", nil),
0,
scheduler.WithProfiles(cfg.Profiles...),
scheduler.WithFrameworkOutOfTreeRegistry(registry),
// disable backoff
scheduler.WithPodMaxBackoffSeconds(0),
scheduler.WithPodInitialBackoffSeconds(0),
)
testutils.SyncSchedulerInformerFactory(testCtx)
cs := testCtx.ClientSet
if preemptionPlugin == nil {
t.Fatalf("the preemption plugin should be initialized")
}
logger, _ := ktesting.NewTestContext(t)
testCtx.Scheduler.SchedulingQueue.Run(logger)
defer testCtx.Scheduler.SchedulingQueue.Close()
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, true)
createdPods := []*v1.Pod{}
defer testutils.CleanupPods(testCtx.Ctx, cs, t, createdPods)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
t.Fatalf("Failed to create an initial Node %q: %v", node.Name, err)
}
defer func() {
if err := cs.CoreV1().Nodes().Delete(ctx, node.Name, metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete the Node %q: %v", node.Name, err)
}
}()
for _, scenario := range test.scenarios {
t.Logf("Running scenario: %s", scenario.name)
switch {
case scenario.createPod != nil:
if scenario.createPod.count == nil {
scenario.createPod.count = ptr.To(1)
}
for i := 0; i < *scenario.createPod.count; i++ {
pod, err := cs.CoreV1().Pods(testCtx.NS.Name).Create(ctx, scenario.createPod.pod, metav1.CreateOptions{})
if err != nil {
t.Fatalf("Failed to create a Pod %q: %v", pod.Name, err)
}
createdPods = append(createdPods, pod)
}
case scenario.schedulePod != nil:
lastFailure := ""
if err := wait.PollUntilContextTimeout(testCtx.Ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
if len(testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()) == 0 {
lastFailure = fmt.Sprintf("Expected the pod %s to be scheduled, but no pod arrives at the activeQ", scenario.schedulePod.podName)
return false, nil
}
if testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()[0].Name != scenario.schedulePod.podName {
// need to wait more because maybe the queue will get another Pod that higher priority than the current top pod.
lastFailure = fmt.Sprintf("The pod %s is expected to be scheduled, but the top Pod is %s", scenario.schedulePod.podName, testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()[0].Name)
return false, nil
}
return true, nil
}); err != nil {
t.Fatal(lastFailure)
}
preemptionDoneChannels[scenario.schedulePod.podName] = make(chan struct{})
testCtx.Scheduler.ScheduleOne(testCtx.Ctx)
if scenario.schedulePod.expectSuccess {
if err := wait.PollUntilContextTimeout(testCtx.Ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, testCtx.NS.Name, scenario.schedulePod.podName)); err != nil {
t.Fatalf("Expected the pod %s to be scheduled", scenario.schedulePod.podName)
}
} else {
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.schedulePod.podName) {
t.Fatalf("Expected the pod %s to be in the queue after the scheduling attempt", scenario.schedulePod.podName)
}
}
case scenario.completePreemption != "":
if _, ok := preemptionDoneChannels[scenario.completePreemption]; !ok {
t.Fatalf("The preemptor Pod %q is not running preemption", scenario.completePreemption)
}
close(preemptionDoneChannels[scenario.completePreemption])
delete(preemptionDoneChannels, scenario.completePreemption)
case scenario.podGatedInQueue != "":
// make sure the Pod is in the queue in the first place.
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.podGatedInQueue) {
t.Fatalf("Expected the pod %s to be in the queue", scenario.podGatedInQueue)
}
// Make sure this Pod is gated by the preemption at PreEnqueue extension point
// by activating the Pod and see if it's still in the unsched pod pool.
testCtx.Scheduler.SchedulingQueue.Activate(logger, map[string]*v1.Pod{scenario.podGatedInQueue: st.MakePod().Namespace(testCtx.NS.Name).Name(scenario.podGatedInQueue).Obj()})
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.podGatedInQueue) {
t.Fatalf("Expected the pod %s to be in the queue even after the activation", scenario.podGatedInQueue)
}
case scenario.podRunningPreemption != nil:
if err := wait.PollUntilContextTimeout(testCtx.Ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
return preemptionPlugin.Evaluator.IsPodRunningPreemption(createdPods[*scenario.podRunningPreemption].GetUID()), nil
}); err != nil {
t.Fatalf("Expected the pod %s to be running preemption", createdPods[*scenario.podRunningPreemption].Name)
}
}
}
})
}
}
}
@@ -1680,83 +1684,86 @@ func TestNominatedNodeCleanUp(t *testing.T) {
}
for _, asyncPreemptionEnabled := range []bool{true, false} {
for _, tt := range tests {
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", tt.name, asyncPreemptionEnabled), func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled)
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, tt := range tests {
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, Async API calls enabled: %v)", tt.name, asyncPreemptionEnabled, asyncAPICallsEnabled), func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
Profiles: []configv1.KubeSchedulerProfile{{
SchedulerName: ptr.To(v1.DefaultSchedulerName),
Plugins: tt.customPlugins,
}},
})
testCtx := initTest(
t,
"preemption",
scheduler.WithProfiles(cfg.Profiles...),
scheduler.WithFrameworkOutOfTreeRegistry(tt.outOfTreeRegistry),
)
cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
Profiles: []configv1.KubeSchedulerProfile{{
SchedulerName: ptr.To(v1.DefaultSchedulerName),
Plugins: tt.customPlugins,
}},
})
testCtx := initTest(
t,
"preemption",
scheduler.WithProfiles(cfg.Profiles...),
scheduler.WithFrameworkOutOfTreeRegistry(tt.outOfTreeRegistry),
)
cs, ns := testCtx.ClientSet, testCtx.NS.Name
for _, node := range tt.initNodes {
if _, err := createNode(cs, node); err != nil {
t.Fatalf("Error creating initial node %v: %v", node.Name, err)
}
}
// Create a node with the specified capacity.
nodeName := "fake-node"
if _, err := createNode(cs, st.MakeNode().Name(nodeName).Capacity(tt.nodeCapacity).Obj()); err != nil {
t.Fatalf("Error creating node %v: %v", nodeName, err)
}
// Create pods and run post check if necessary.
for i, pods := range tt.podsToCreate {
for _, p := range pods {
p.Namespace = ns
if _, err := createPausePod(cs, p); err != nil {
t.Fatalf("Error creating pod %v: %v", p.Name, err)
cs, ns := testCtx.ClientSet, testCtx.NS.Name
for _, node := range tt.initNodes {
if _, err := createNode(cs, node); err != nil {
t.Fatalf("Error creating initial node %v: %v", node.Name, err)
}
}
// If necessary, run the post check function.
if len(tt.postChecks) > i && tt.postChecks[i] != nil {
// Create a node with the specified capacity.
nodeName := "fake-node"
if _, err := createNode(cs, st.MakeNode().Name(nodeName).Capacity(tt.nodeCapacity).Obj()); err != nil {
t.Fatalf("Error creating node %v: %v", nodeName, err)
}
// Create pods and run post check if necessary.
for i, pods := range tt.podsToCreate {
for _, p := range pods {
if err := tt.postChecks[i](testCtx.Ctx, cs, p); err != nil {
t.Fatalf("Pod %v didn't pass the postChecks[%v]: %v", p.Name, i, err)
p.Namespace = ns
if _, err := createPausePod(cs, p); err != nil {
t.Fatalf("Error creating pod %v: %v", p.Name, err)
}
}
// If necessary, run the post check function.
if len(tt.postChecks) > i && tt.postChecks[i] != nil {
for _, p := range pods {
if err := tt.postChecks[i](testCtx.Ctx, cs, p); err != nil {
t.Fatalf("Pod %v didn't pass the postChecks[%v]: %v", p.Name, i, err)
}
}
}
}
}
// Delete the fake node if necessary.
if tt.deleteFakeNode {
if err := cs.CoreV1().Nodes().Delete(testCtx.Ctx, nodeName, *metav1.NewDeleteOptions(0)); err != nil {
t.Fatalf("Node %v cannot be deleted: %v", nodeName, err)
// Delete the fake node if necessary.
if tt.deleteFakeNode {
if err := cs.CoreV1().Nodes().Delete(testCtx.Ctx, nodeName, *metav1.NewDeleteOptions(0)); err != nil {
t.Fatalf("Node %v cannot be deleted: %v", nodeName, err)
}
}
}
// Force deleting the terminating pods if necessary.
// This is required if we demand to delete terminating Pods physically.
for _, podName := range tt.podNamesToDelete {
if err := deletePod(cs, podName, ns); err != nil {
t.Fatalf("Pod %v cannot be deleted: %v", podName, err)
// Force deleting the terminating pods if necessary.
// This is required if we demand to delete terminating Pods physically.
for _, podName := range tt.podNamesToDelete {
if err := deletePod(cs, podName, ns); err != nil {
t.Fatalf("Pod %v cannot be deleted: %v", podName, err)
}
}
}
// Verify if .status.nominatedNodeName is cleared.
if err := wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
pod, err := cs.CoreV1().Pods(ns).Get(ctx, "medium", metav1.GetOptions{})
if err != nil {
t.Errorf("Error getting the medium pod: %v", err)
// Verify if .status.nominatedNodeName is cleared.
if err := wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
pod, err := cs.CoreV1().Pods(ns).Get(ctx, "medium", metav1.GetOptions{})
if err != nil {
t.Errorf("Error getting the medium pod: %v", err)
}
if len(pod.Status.NominatedNodeName) == 0 {
return true, nil
}
return false, err
}); err != nil {
t.Errorf(".status.nominatedNodeName of the medium pod was not cleared: %v", err)
}
if len(pod.Status.NominatedNodeName) == 0 {
return true, nil
}
return false, err
}); err != nil {
t.Errorf(".status.nominatedNodeName of the medium pod was not cleared: %v", err)
}
})
})
}
}
}
}

View File

@@ -49,6 +49,7 @@ import (
testfwk "k8s.io/kubernetes/test/integration/framework"
testutils "k8s.io/kubernetes/test/integration/util"
imageutils "k8s.io/kubernetes/test/utils/image"
"k8s.io/kubernetes/test/utils/ktesting"
"k8s.io/utils/ptr"
)
@@ -112,6 +113,12 @@ func TestSchedulingGates(t *testing.T) {
)
testutils.SyncSchedulerInformerFactory(testCtx)
if testCtx.Scheduler.APIDispatcher != nil {
logger, _ := ktesting.NewTestContext(t)
testCtx.Scheduler.APIDispatcher.Run(logger)
defer testCtx.Scheduler.APIDispatcher.Close()
}
cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx
// Create node, so we can schedule pods.

View File

@@ -18,13 +18,17 @@ package scheduler
import (
"context"
"fmt"
"testing"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/klog/v2"
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler"
"k8s.io/kubernetes/pkg/scheduler/framework"
st "k8s.io/kubernetes/pkg/scheduler/testing"
@@ -36,6 +40,11 @@ var _ framework.EnqueueExtensions = &PermitPlugin{}
var _ framework.ReservePlugin = &ReservePlugin{}
var _ framework.EnqueueExtensions = &ReservePlugin{}
type ResettablePlugin interface {
framework.Plugin
Reset()
}
type ReservePlugin struct {
name string
statusCode fwk.Code
@@ -78,6 +87,11 @@ func (rp *ReservePlugin) EventsToRegister(_ context.Context) ([]fwk.ClusterEvent
}, nil
}
func (rp *ReservePlugin) Reset() {
rp.numReserveCalled = 0
rp.numUnreserveCalled = 0
}
type PermitPlugin struct {
name string
statusCode fwk.Code
@@ -115,12 +129,16 @@ func (pp *PermitPlugin) EventsToRegister(_ context.Context) ([]fwk.ClusterEventW
}, nil
}
func (pp *PermitPlugin) Reset() {
pp.numPermitCalled = 0
}
func TestReScheduling(t *testing.T) {
testContext := testutils.InitTestAPIServer(t, "permit-plugin", nil)
tests := []struct {
name string
plugins []framework.Plugin
action func() error
name string
plugin ResettablePlugin
action func() error
// The first time for pod scheduling, we make pod scheduled error or unschedulable on purpose.
// This is controlled by wantFirstSchedulingError. By default, pod is unschedulable.
wantFirstSchedulingError bool
@@ -130,10 +148,8 @@ func TestReScheduling(t *testing.T) {
wantError bool
}{
{
name: "Rescheduling pod rejected by Permit Plugin",
plugins: []framework.Plugin{
&PermitPlugin{name: "permit", statusCode: fwk.Unschedulable},
},
name: "Rescheduling pod rejected by Permit Plugin",
plugin: &PermitPlugin{name: "permit", statusCode: fwk.Unschedulable},
action: func() error {
_, err := testutils.CreateNode(testContext.ClientSet, st.MakeNode().Name("fake-node").Obj())
return err
@@ -141,10 +157,8 @@ func TestReScheduling(t *testing.T) {
wantScheduled: true,
},
{
name: "Rescheduling pod rejected by Permit Plugin with unrelated event",
plugins: []framework.Plugin{
&PermitPlugin{name: "permit", statusCode: fwk.Unschedulable},
},
name: "Rescheduling pod rejected by Permit Plugin with unrelated event",
plugin: &PermitPlugin{name: "permit", statusCode: fwk.Unschedulable},
action: func() error {
_, err := testutils.CreatePausePod(testContext.ClientSet,
testutils.InitPausePod(&testutils.PausePodConfig{Name: "test-pod-2", Namespace: testContext.NS.Name}))
@@ -153,10 +167,8 @@ func TestReScheduling(t *testing.T) {
wantScheduled: false,
},
{
name: "Rescheduling pod failed by Permit Plugin",
plugins: []framework.Plugin{
&PermitPlugin{name: "permit", statusCode: fwk.Error},
},
name: "Rescheduling pod failed by Permit Plugin",
plugin: &PermitPlugin{name: "permit", statusCode: fwk.Error},
action: func() error {
_, err := testutils.CreateNode(testContext.ClientSet, st.MakeNode().Name("fake-node").Obj())
return err
@@ -165,10 +177,8 @@ func TestReScheduling(t *testing.T) {
wantError: true,
},
{
name: "Rescheduling pod rejected by Reserve Plugin",
plugins: []framework.Plugin{
&ReservePlugin{name: "reserve", statusCode: fwk.Unschedulable},
},
name: "Rescheduling pod rejected by Reserve Plugin",
plugin: &ReservePlugin{name: "reserve", statusCode: fwk.Unschedulable},
action: func() error {
_, err := testutils.CreateNode(testContext.ClientSet, st.MakeNode().Name("fake-node").Obj())
return err
@@ -176,10 +186,8 @@ func TestReScheduling(t *testing.T) {
wantScheduled: true,
},
{
name: "Rescheduling pod rejected by Reserve Plugin with unrelated event",
plugins: []framework.Plugin{
&ReservePlugin{name: "reserve", statusCode: fwk.Unschedulable},
},
name: "Rescheduling pod rejected by Reserve Plugin with unrelated event",
plugin: &ReservePlugin{name: "reserve", statusCode: fwk.Unschedulable},
action: func() error {
_, err := testutils.CreatePausePod(testContext.ClientSet,
testutils.InitPausePod(&testutils.PausePodConfig{Name: "test-pod-2", Namespace: testContext.NS.Name}))
@@ -188,10 +196,8 @@ func TestReScheduling(t *testing.T) {
wantScheduled: false,
},
{
name: "Rescheduling pod failed by Reserve Plugin",
plugins: []framework.Plugin{
&ReservePlugin{name: "reserve", statusCode: fwk.Error},
},
name: "Rescheduling pod failed by Reserve Plugin",
plugin: &ReservePlugin{name: "reserve", statusCode: fwk.Error},
action: func() error {
_, err := testutils.CreateNode(testContext.ClientSet, st.MakeNode().Name("fake-node").Obj())
return err
@@ -201,54 +207,59 @@ func TestReScheduling(t *testing.T) {
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// Create a plugin registry for testing. Register only a permit plugin.
registry, prof := InitRegistryAndConfig(t, nil, test.plugins...)
for _, asyncAPICallsEnabled := range []bool{true, false} {
for _, test := range tests {
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", test.name, asyncAPICallsEnabled), func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
testCtx, teardown := InitTestSchedulerForFrameworkTest(t, testContext, 2, true,
scheduler.WithProfiles(prof),
scheduler.WithFrameworkOutOfTreeRegistry(registry))
defer teardown()
// Create a plugin registry for testing. Register only a permit plugin.
registry, prof := InitRegistryAndConfig(t, nil, test.plugin)
t.Cleanup(test.plugin.Reset)
pod, err := testutils.CreatePausePod(testCtx.ClientSet,
testutils.InitPausePod(&testutils.PausePodConfig{Name: "test-pod", Namespace: testCtx.NS.Name}))
if err != nil {
t.Errorf("Error while creating a test pod: %v", err)
}
testCtx, teardown := InitTestSchedulerForFrameworkTest(t, testContext, 2, true,
scheduler.WithProfiles(prof),
scheduler.WithFrameworkOutOfTreeRegistry(registry))
defer teardown()
// The first time for scheduling, pod is error or unschedulable, controlled by wantFirstSchedulingError
if test.wantFirstSchedulingError {
if err = wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Millisecond, 30*time.Second, false,
testutils.PodSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil {
t.Errorf("Expected a scheduling error, but got: %v", err)
pod, err := testutils.CreatePausePod(testCtx.ClientSet,
testutils.InitPausePod(&testutils.PausePodConfig{Name: "test-pod", Namespace: testCtx.NS.Name}))
if err != nil {
t.Errorf("Error while creating a test pod: %v", err)
}
} else {
if err = testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("Didn't expect the pod to be scheduled. error: %v", err)
}
}
if test.action() != nil {
if err = test.action(); err != nil {
t.Errorf("Perform action() error: %v", err)
// The first time for scheduling, pod is error or unschedulable, controlled by wantFirstSchedulingError
if test.wantFirstSchedulingError {
if err = wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Millisecond, 30*time.Second, false,
testutils.PodSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil {
t.Errorf("Expected a scheduling error, but got: %v", err)
}
} else {
if err = testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("Didn't expect the pod to be scheduled. error: %v", err)
}
}
}
if test.wantScheduled {
if err = testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("Didn't expect the pod to be unschedulable. error: %v", err)
if test.action != nil {
if err = test.action(); err != nil {
t.Errorf("Perform action() error: %v", err)
}
}
} else if test.wantError {
if err = wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Millisecond, 30*time.Second, false,
testutils.PodSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil {
t.Errorf("Expected a scheduling error, but got: %v", err)
if test.wantScheduled {
if err = testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("Didn't expect the pod to be unschedulable. error: %v", err)
}
} else if test.wantError {
if err = wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Millisecond, 30*time.Second, false,
testutils.PodSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil {
t.Errorf("Expected a scheduling error, but got: %v", err)
}
} else {
if err = testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("Didn't expect the pod to be scheduled. error: %v", err)
}
}
} else {
if err = testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
t.Errorf("Didn't expect the pod to be scheduled. error: %v", err)
}
}
})
})
}
}
}

View File

@@ -39,6 +39,16 @@
- name: 5Nodes_QueueingHintsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: false
labels: [integration-test, short]
params:
initNodes: 5
initPods: 5
measurePods: 10
- name: 5Nodes_AsyncAPICallsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: true
labels: [integration-test, short]
params:
initNodes: 5
@@ -77,6 +87,16 @@
- name: 5000Nodes_50000Pods_QueueingHintsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: false
labels: [performance]
params:
initNodes: 5000
initPods: 5000
measurePods: 50000
- name: 5000Nodes_50000Pods_AsyncAPICallsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: true
labels: [performance]
params:
initNodes: 5000
@@ -158,6 +178,16 @@
- name: 5Nodes_QueueingHintsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: false
labels: [integration-test, short]
params:
initNodes: 5
initPods: 20
measurePods: 5
- name: 5Nodes_AsyncAPICallsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: true
labels: [integration-test, short]
params:
initNodes: 5
@@ -174,6 +204,16 @@
- name: 1000Nodes_QueueingHintsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: false
labels: [performance, short]
params:
initNodes: 1000
initPods: 4000
measurePods: 1000
- name: 1000Nodes_AsyncAPICallsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: true
labels: [performance, short]
params:
initNodes: 1000
@@ -222,6 +262,16 @@
- name: 5Nodes_QueueingHintsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: false
labels: [integration-test, short]
params:
initNodes: 5
initPods: 20
measurePods: 5
- name: 5Nodes_AsyncAPICallsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: true
labels: [integration-test, short]
params:
initNodes: 5
@@ -248,6 +298,7 @@
labels: [performance]
featureGates:
SchedulerAsyncPreemption: true
SchedulerAsyncAPICalls: false
params:
initNodes: 5000
initPods: 20000
@@ -255,6 +306,17 @@
- name: 5000Nodes_QueueingHintsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: false
labels: [performance]
threshold: 160
params:
initNodes: 5000
initPods: 20000
measurePods: 5000
- name: 5000Nodes_AsyncAPICallsEnabled
featureGates:
SchedulerQueueingHints: true
SchedulerAsyncAPICalls: true
labels: [performance]
threshold: 160
params: