mirror of
https://github.com/outbackdingo/kubernetes.git
synced 2026-01-27 10:19:35 +00:00
Merge pull request #132886 from macsko/send_api_calls_through_dispatcher
KEP-5229: Send API calls through dispatcher and cache
This commit is contained in:
72
pkg/scheduler/backend/api_cache/api_cache.go
Normal file
72
pkg/scheduler/backend/api_cache/api_cache.go
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package apicache
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// APICache is responsible for sending API calls' requests through scheduling queue or cache.
|
||||
type APICache struct {
|
||||
schedulingQueue internalqueue.SchedulingQueue
|
||||
cache internalcache.Cache
|
||||
}
|
||||
|
||||
func New(schedulingQueue internalqueue.SchedulingQueue, cache internalcache.Cache) *APICache {
|
||||
return &APICache{
|
||||
schedulingQueue: schedulingQueue,
|
||||
cache: cache,
|
||||
}
|
||||
}
|
||||
|
||||
// PatchPodStatus sends a patch request for a Pod's status through a scheduling queue.
|
||||
// The patch could be first applied to the cached Pod object and then the API call is executed asynchronously.
|
||||
// It returns a channel that can be used to wait for the call's completion.
|
||||
func (c *APICache) PatchPodStatus(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) (<-chan error, error) {
|
||||
return c.schedulingQueue.PatchPodStatus(pod, condition, nominatingInfo)
|
||||
}
|
||||
|
||||
// BindPod sends a binding request through a cache. The binding could be first applied to the cached Pod object
|
||||
// and then the API call is executed asynchronously.
|
||||
// It returns a channel that can be used to wait for the call's completion.
|
||||
func (c *APICache) BindPod(binding *v1.Binding) (<-chan error, error) {
|
||||
return c.cache.BindPod(binding)
|
||||
}
|
||||
|
||||
// WaitOnFinish blocks until the result of an API call is sent to the given onFinish channel
|
||||
// (returned by methods BindPod or PreemptPod).
|
||||
//
|
||||
// It returns the error received from the channel.
|
||||
// It also returns nil if the call was skipped or overwritten,
|
||||
// as these are considered successful lifecycle outcomes.
|
||||
func (c *APICache) WaitOnFinish(ctx context.Context, onFinish <-chan error) error {
|
||||
select {
|
||||
case err := <-onFinish:
|
||||
if fwk.IsUnexpectedError(err) {
|
||||
return err
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -65,11 +65,11 @@ func (ad *APIDispatcher) SyncObject(obj metav1.Object) (metav1.Object, error) {
|
||||
// Run starts the main processing loop of the APIDispatcher, which pops calls
|
||||
// from the queue and dispatches them to worker goroutines for execution.
|
||||
func (ad *APIDispatcher) Run(logger klog.Logger) {
|
||||
go func() {
|
||||
// Create a new context to allow to cancel the APICalls' execution when the APIDispatcher is closed.
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
ad.cancel = cancel
|
||||
// Create a new context to allow to cancel the APICalls' execution when the APIDispatcher is closed.
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
ad.cancel = cancel
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
||||
36
pkg/scheduler/backend/cache/cache.go
vendored
36
pkg/scheduler/backend/cache/cache.go
vendored
@@ -30,6 +30,7 @@ import (
|
||||
"k8s.io/klog/v2"
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
)
|
||||
|
||||
@@ -41,9 +42,9 @@ var (
|
||||
// It automatically starts a go routine that manages expiration of assumed pods.
|
||||
// "ttl" is how long the assumed pod will get expired.
|
||||
// "ctx" is the context that would close the background goroutine.
|
||||
func New(ctx context.Context, ttl time.Duration) Cache {
|
||||
func New(ctx context.Context, ttl time.Duration, apiDispatcher fwk.APIDispatcher) Cache {
|
||||
logger := klog.FromContext(ctx)
|
||||
cache := newCache(ctx, ttl, cleanAssumedPeriod)
|
||||
cache := newCache(ctx, ttl, cleanAssumedPeriod, apiDispatcher)
|
||||
cache.run(logger)
|
||||
return cache
|
||||
}
|
||||
@@ -76,6 +77,10 @@ type cacheImpl struct {
|
||||
nodeTree *nodeTree
|
||||
// A map from image name to its ImageStateSummary.
|
||||
imageStates map[string]*fwk.ImageStateSummary
|
||||
|
||||
// apiDispatcher is used for the methods that are expected to send API calls.
|
||||
// It's non-nil only if the SchedulerAsyncAPICalls feature gate is enabled.
|
||||
apiDispatcher fwk.APIDispatcher
|
||||
}
|
||||
|
||||
type podState struct {
|
||||
@@ -87,18 +92,19 @@ type podState struct {
|
||||
bindingFinished bool
|
||||
}
|
||||
|
||||
func newCache(ctx context.Context, ttl, period time.Duration) *cacheImpl {
|
||||
func newCache(ctx context.Context, ttl, period time.Duration, apiDispatcher fwk.APIDispatcher) *cacheImpl {
|
||||
logger := klog.FromContext(ctx)
|
||||
return &cacheImpl{
|
||||
ttl: ttl,
|
||||
period: period,
|
||||
stop: ctx.Done(),
|
||||
|
||||
nodes: make(map[string]*nodeInfoListItem),
|
||||
nodeTree: newNodeTree(logger, nil),
|
||||
assumedPods: sets.New[string](),
|
||||
podStates: make(map[string]*podState),
|
||||
imageStates: make(map[string]*fwk.ImageStateSummary),
|
||||
nodes: make(map[string]*nodeInfoListItem),
|
||||
nodeTree: newNodeTree(logger, nil),
|
||||
assumedPods: sets.New[string](),
|
||||
podStates: make(map[string]*podState),
|
||||
imageStates: make(map[string]*fwk.ImageStateSummary),
|
||||
apiDispatcher: apiDispatcher,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -759,3 +765,17 @@ func (cache *cacheImpl) updateMetrics() {
|
||||
metrics.CacheSize.WithLabelValues("pods").Set(float64(len(cache.podStates)))
|
||||
metrics.CacheSize.WithLabelValues("nodes").Set(float64(len(cache.nodes)))
|
||||
}
|
||||
|
||||
// BindPod handles the pod binding by adding a bind API call to the dispatcher.
|
||||
// This method should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
|
||||
func (cache *cacheImpl) BindPod(binding *v1.Binding) (<-chan error, error) {
|
||||
// Don't store anything in the cache, as the pod is already assumed, and in case of a binding failure, it will be forgotten.
|
||||
onFinish := make(chan error, 1)
|
||||
err := cache.apiDispatcher.Add(apicalls.Implementations.PodBinding(binding), fwk.APICallOptions{
|
||||
OnFinish: onFinish,
|
||||
})
|
||||
if fwk.IsUnexpectedError(err) {
|
||||
return onFinish, err
|
||||
}
|
||||
return onFinish, nil
|
||||
}
|
||||
|
||||
36
pkg/scheduler/backend/cache/cache_test.go
vendored
36
pkg/scheduler/backend/cache/cache_test.go
vendored
@@ -235,7 +235,7 @@ func TestAssumePodScheduled(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache := newCache(ctx, time.Second, time.Second)
|
||||
cache := newCache(ctx, time.Second, time.Second, nil)
|
||||
for _, pod := range tc.pods {
|
||||
if err := cache.AssumePod(logger, pod); err != nil {
|
||||
t.Fatalf("AssumePod failed: %v", err)
|
||||
@@ -354,7 +354,7 @@ func TestExpirePod(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache := newCache(ctx, tc.ttl, time.Second)
|
||||
cache := newCache(ctx, tc.ttl, time.Second, nil)
|
||||
|
||||
for _, pod := range tc.pods {
|
||||
if err := cache.AssumePod(logger, pod.pod); err != nil {
|
||||
@@ -415,7 +415,7 @@ func TestAddPodWillConfirm(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache := newCache(ctx, ttl, time.Second)
|
||||
cache := newCache(ctx, ttl, time.Second, nil)
|
||||
for _, podToAssume := range test.podsToAssume {
|
||||
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
|
||||
t.Fatalf("assumePod failed: %v", err)
|
||||
@@ -458,7 +458,7 @@ func TestDump(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache := newCache(ctx, ttl, time.Second)
|
||||
cache := newCache(ctx, ttl, time.Second, nil)
|
||||
for _, podToAssume := range test.podsToAssume {
|
||||
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
|
||||
t.Errorf("assumePod failed: %v", err)
|
||||
@@ -526,7 +526,7 @@ func TestAddPodAlwaysUpdatesPodInfoInNodeInfo(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
cache := newCache(ctx, ttl, time.Second)
|
||||
cache := newCache(ctx, ttl, time.Second, nil)
|
||||
for _, podToAssume := range test.podsToAssume {
|
||||
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
|
||||
t.Fatalf("assumePod failed: %v", err)
|
||||
@@ -585,7 +585,7 @@ func TestAddPodWillReplaceAssumed(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache := newCache(ctx, ttl, time.Second)
|
||||
cache := newCache(ctx, ttl, time.Second, nil)
|
||||
for _, podToAssume := range test.podsToAssume {
|
||||
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
|
||||
t.Fatalf("assumePod failed: %v", err)
|
||||
@@ -638,7 +638,7 @@ func TestAddPodAfterExpiration(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
now := time.Now()
|
||||
cache := newCache(ctx, ttl, time.Second)
|
||||
cache := newCache(ctx, ttl, time.Second, nil)
|
||||
if err := assumeAndFinishBinding(logger, cache, test.pod, now); err != nil {
|
||||
t.Fatalf("assumePod failed: %v", err)
|
||||
}
|
||||
@@ -703,7 +703,7 @@ func TestUpdatePod(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache := newCache(ctx, ttl, time.Second)
|
||||
cache := newCache(ctx, ttl, time.Second, nil)
|
||||
for _, podToAdd := range test.podsToAdd {
|
||||
if err := cache.AddPod(logger, podToAdd); err != nil {
|
||||
t.Fatalf("AddPod failed: %v", err)
|
||||
@@ -765,7 +765,7 @@ func TestUpdatePodAndGet(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache := newCache(ctx, ttl, time.Second)
|
||||
cache := newCache(ctx, ttl, time.Second, nil)
|
||||
// trying to get an unknown pod should return an error
|
||||
// podToUpdate has not been added yet
|
||||
if _, err := cache.GetPod(tc.podToUpdate); err == nil {
|
||||
@@ -848,7 +848,7 @@ func TestExpireAddUpdatePod(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
now := time.Now()
|
||||
cache := newCache(ctx, ttl, time.Second)
|
||||
cache := newCache(ctx, ttl, time.Second, nil)
|
||||
for _, podToAssume := range test.podsToAssume {
|
||||
if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil {
|
||||
t.Fatalf("assumePod failed: %v", err)
|
||||
@@ -909,7 +909,7 @@ func TestEphemeralStorageResource(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache := newCache(ctx, time.Second, time.Second)
|
||||
cache := newCache(ctx, time.Second, time.Second, nil)
|
||||
if err := cache.AddPod(logger, test.pod); err != nil {
|
||||
t.Fatalf("AddPod failed: %v", err)
|
||||
}
|
||||
@@ -963,7 +963,7 @@ func TestRemovePod(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
nodeName := pod.Spec.NodeName
|
||||
cache := newCache(ctx, time.Second, time.Second)
|
||||
cache := newCache(ctx, time.Second, time.Second, nil)
|
||||
// Add/Assume pod succeeds even before adding the nodes.
|
||||
if tt.assume {
|
||||
if err := cache.AddPod(logger, pod); err != nil {
|
||||
@@ -1013,7 +1013,7 @@ func TestForgetPod(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
cache := newCache(ctx, ttl, time.Second)
|
||||
cache := newCache(ctx, ttl, time.Second, nil)
|
||||
for _, pod := range pods {
|
||||
if err := assumeAndFinishBinding(logger, cache, pod, now); err != nil {
|
||||
t.Fatalf("assumePod failed: %v", err)
|
||||
@@ -1226,7 +1226,7 @@ func TestNodeOperators(t *testing.T) {
|
||||
imageStates := buildImageStates(tc.nodes)
|
||||
expected := buildNodeInfo(node, tc.pods, imageStates)
|
||||
|
||||
cache := newCache(ctx, time.Second, time.Second)
|
||||
cache := newCache(ctx, time.Second, time.Second, nil)
|
||||
for _, nodeItem := range tc.nodes {
|
||||
cache.AddNode(logger, nodeItem)
|
||||
}
|
||||
@@ -1720,7 +1720,7 @@ func TestSchedulerCache_UpdateSnapshot(t *testing.T) {
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache = newCache(ctx, time.Second, time.Second)
|
||||
cache = newCache(ctx, time.Second, time.Second, nil)
|
||||
snapshot = NewEmptySnapshot()
|
||||
|
||||
for _, op := range test.operations {
|
||||
@@ -1954,7 +1954,7 @@ func TestSchedulerCache_updateNodeInfoSnapshotList(t *testing.T) {
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache = newCache(ctx, time.Second, time.Second)
|
||||
cache = newCache(ctx, time.Second, time.Second, nil)
|
||||
snapshot = NewEmptySnapshot()
|
||||
|
||||
test.operations(t)
|
||||
@@ -2060,7 +2060,7 @@ func setupCacheOf1kNodes30kPods(b *testing.B) Cache {
|
||||
logger, ctx := ktesting.NewTestContext(b)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
cache := newCache(ctx, time.Second, time.Second)
|
||||
cache := newCache(ctx, time.Second, time.Second, nil)
|
||||
for i := 0; i < 1000; i++ {
|
||||
nodeName := fmt.Sprintf("node-%d", i)
|
||||
cache.AddNode(logger, st.MakeNode().Name(nodeName).Obj())
|
||||
@@ -2081,7 +2081,7 @@ func setupCacheWithAssumedPods(b *testing.B, podNum int, assumedTime time.Time)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
addedNodes := make(map[string]struct{})
|
||||
defer cancel()
|
||||
cache := newCache(ctx, time.Second, time.Second)
|
||||
cache := newCache(ctx, time.Second, time.Second, nil)
|
||||
for i := 0; i < podNum; i++ {
|
||||
nodeName := fmt.Sprintf("node-%d", i/10)
|
||||
if _, ok := addedNodes[nodeName]; !ok {
|
||||
|
||||
73
pkg/scheduler/backend/cache/fake/fake_cache.go
vendored
73
pkg/scheduler/backend/cache/fake/fake_cache.go
vendored
@@ -20,74 +20,47 @@ import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// Cache is used for testing
|
||||
type Cache struct {
|
||||
internalcache.Cache
|
||||
AssumeFunc func(*v1.Pod)
|
||||
ForgetFunc func(*v1.Pod)
|
||||
IsAssumedPodFunc func(*v1.Pod) bool
|
||||
GetPodFunc func(*v1.Pod) *v1.Pod
|
||||
}
|
||||
|
||||
// AssumePod is a fake method for testing.
|
||||
// AssumePod allows to mock this method for testing.
|
||||
func (c *Cache) AssumePod(logger klog.Logger, pod *v1.Pod) error {
|
||||
c.AssumeFunc(pod)
|
||||
return nil
|
||||
if c.AssumeFunc != nil {
|
||||
c.AssumeFunc(pod)
|
||||
return nil
|
||||
}
|
||||
return c.Cache.AssumePod(logger, pod)
|
||||
}
|
||||
|
||||
// FinishBinding is a fake method for testing.
|
||||
func (c *Cache) FinishBinding(logger klog.Logger, pod *v1.Pod) error { return nil }
|
||||
|
||||
// ForgetPod is a fake method for testing.
|
||||
// ForgetPod allows to mock this method for testing.
|
||||
func (c *Cache) ForgetPod(logger klog.Logger, pod *v1.Pod) error {
|
||||
c.ForgetFunc(pod)
|
||||
return nil
|
||||
if c.ForgetFunc != nil {
|
||||
c.ForgetFunc(pod)
|
||||
return nil
|
||||
}
|
||||
return c.Cache.ForgetPod(logger, pod)
|
||||
}
|
||||
|
||||
// AddPod is a fake method for testing.
|
||||
func (c *Cache) AddPod(logger klog.Logger, pod *v1.Pod) error { return nil }
|
||||
|
||||
// UpdatePod is a fake method for testing.
|
||||
func (c *Cache) UpdatePod(logger klog.Logger, oldPod, newPod *v1.Pod) error { return nil }
|
||||
|
||||
// RemovePod is a fake method for testing.
|
||||
func (c *Cache) RemovePod(logger klog.Logger, pod *v1.Pod) error { return nil }
|
||||
|
||||
// IsAssumedPod is a fake method for testing.
|
||||
// IsAssumedPod allows to mock this method for testing.
|
||||
func (c *Cache) IsAssumedPod(pod *v1.Pod) (bool, error) {
|
||||
return c.IsAssumedPodFunc(pod), nil
|
||||
if c.IsAssumedPodFunc != nil {
|
||||
return c.IsAssumedPodFunc(pod), nil
|
||||
}
|
||||
return c.Cache.IsAssumedPod(pod)
|
||||
}
|
||||
|
||||
// GetPod is a fake method for testing.
|
||||
// GetPod allows to mock this method for testing.
|
||||
func (c *Cache) GetPod(pod *v1.Pod) (*v1.Pod, error) {
|
||||
return c.GetPodFunc(pod), nil
|
||||
}
|
||||
|
||||
// AddNode is a fake method for testing.
|
||||
func (c *Cache) AddNode(logger klog.Logger, node *v1.Node) *framework.NodeInfo { return nil }
|
||||
|
||||
// UpdateNode is a fake method for testing.
|
||||
func (c *Cache) UpdateNode(logger klog.Logger, oldNode, newNode *v1.Node) *framework.NodeInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveNode is a fake method for testing.
|
||||
func (c *Cache) RemoveNode(logger klog.Logger, node *v1.Node) error { return nil }
|
||||
|
||||
// UpdateSnapshot is a fake method for testing.
|
||||
func (c *Cache) UpdateSnapshot(logger klog.Logger, snapshot *internalcache.Snapshot) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// NodeCount is a fake method for testing.
|
||||
func (c *Cache) NodeCount() int { return 0 }
|
||||
|
||||
// PodCount is a fake method for testing.
|
||||
func (c *Cache) PodCount() (int, error) { return 0, nil }
|
||||
|
||||
// Dump is a fake method for testing.
|
||||
func (c *Cache) Dump() *internalcache.Dump {
|
||||
return &internalcache.Dump{}
|
||||
if c.GetPodFunc != nil {
|
||||
return c.GetPodFunc(pod), nil
|
||||
}
|
||||
return c.Cache.GetPod(pod)
|
||||
}
|
||||
|
||||
4
pkg/scheduler/backend/cache/interface.go
vendored
4
pkg/scheduler/backend/cache/interface.go
vendored
@@ -114,6 +114,10 @@ type Cache interface {
|
||||
|
||||
// Dump produces a dump of the current cache.
|
||||
Dump() *Dump
|
||||
|
||||
// BindPod handles the pod binding by adding a bind API call to the dispatcher.
|
||||
// This method should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
|
||||
BindPod(binding *v1.Binding) (<-chan error, error)
|
||||
}
|
||||
|
||||
// Dump is a dump of the cache state.
|
||||
|
||||
@@ -49,6 +49,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/heap"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
@@ -128,6 +129,10 @@ type SchedulingQueue interface {
|
||||
// Run starts the goroutines managing the queue.
|
||||
Run(logger klog.Logger)
|
||||
|
||||
// PatchPodStatus handles the pod status update by sending an update API call through API dispatcher.
|
||||
// This method should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
|
||||
PatchPodStatus(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) (<-chan error, error)
|
||||
|
||||
// The following functions are supposed to be used only for testing or debugging.
|
||||
GetPod(name, namespace string) (*framework.QueuedPodInfo, bool)
|
||||
PendingPods() ([]*v1.Pod, string)
|
||||
@@ -193,6 +198,10 @@ type PriorityQueue struct {
|
||||
// pluginMetricsSamplePercent is the percentage of plugin metrics to be sampled.
|
||||
pluginMetricsSamplePercent int
|
||||
|
||||
// apiDispatcher is used for the methods that are expected to send API calls.
|
||||
// It's non-nil only if the SchedulerAsyncAPICalls feature gate is enabled.
|
||||
apiDispatcher fwk.APIDispatcher
|
||||
|
||||
// isSchedulingQueueHintEnabled indicates whether the feature gate for the scheduling queue is enabled.
|
||||
isSchedulingQueueHintEnabled bool
|
||||
// isPopFromBackoffQEnabled indicates whether the feature gate SchedulerPopFromBackoffQ is enabled.
|
||||
@@ -224,6 +233,7 @@ type priorityQueueOptions struct {
|
||||
pluginMetricsSamplePercent int
|
||||
preEnqueuePluginMap map[string]map[string]framework.PreEnqueuePlugin
|
||||
queueingHintMap QueueingHintMapPerProfile
|
||||
apiDispatcher fwk.APIDispatcher
|
||||
}
|
||||
|
||||
// Option configures a PriorityQueue
|
||||
@@ -298,6 +308,13 @@ func WithPluginMetricsSamplePercent(percent int) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithAPIDispatcher sets the API dispatcher.
|
||||
func WithAPIDispatcher(apiDispatcher fwk.APIDispatcher) Option {
|
||||
return func(o *priorityQueueOptions) {
|
||||
o.apiDispatcher = apiDispatcher
|
||||
}
|
||||
}
|
||||
|
||||
var defaultPriorityQueueOptions = priorityQueueOptions{
|
||||
clock: clock.RealClock{},
|
||||
podInitialBackoffDuration: DefaultPodInitialBackoffDuration,
|
||||
@@ -349,6 +366,7 @@ func NewPriorityQueue(
|
||||
metricsRecorder: options.metricsRecorder,
|
||||
pluginMetricsSamplePercent: options.pluginMetricsSamplePercent,
|
||||
moveRequestCycle: -1,
|
||||
apiDispatcher: options.apiDispatcher,
|
||||
isSchedulingQueueHintEnabled: isSchedulingQueueHintEnabled,
|
||||
isPopFromBackoffQEnabled: isPopFromBackoffQEnabled,
|
||||
}
|
||||
@@ -1307,6 +1325,20 @@ func (p *PriorityQueue) PendingPods() ([]*v1.Pod, string) {
|
||||
return result, fmt.Sprintf(pendingPodsSummary, activeQLen, backoffQLen, len(p.unschedulablePods.podInfoMap))
|
||||
}
|
||||
|
||||
// PatchPodStatus handles the pod status update by sending an update API call through API dispatcher.
|
||||
// This method should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
|
||||
func (p *PriorityQueue) PatchPodStatus(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) (<-chan error, error) {
|
||||
// Don't store anything in the cache. This might be extended in the next releases.
|
||||
onFinish := make(chan error, 1)
|
||||
err := p.apiDispatcher.Add(apicalls.Implementations.PodStatusPatch(pod, condition, nominatingInfo), fwk.APICallOptions{
|
||||
OnFinish: onFinish,
|
||||
})
|
||||
if fwk.IsUnexpectedError(err) {
|
||||
return onFinish, err
|
||||
}
|
||||
return onFinish, nil
|
||||
}
|
||||
|
||||
// Note: this function assumes the caller locks both p.lock.RLock and p.activeQ.getLock().RLock.
|
||||
func (p *PriorityQueue) nominatedPodToInfo(np podRef, unlockedActiveQ unlockedActiveQueueReader) *framework.PodInfo {
|
||||
pod := np.toPod()
|
||||
|
||||
@@ -136,6 +136,20 @@ func (sched *Scheduler) addPodToSchedulingQueue(obj interface{}) {
|
||||
sched.SchedulingQueue.Add(logger, pod)
|
||||
}
|
||||
|
||||
func (sched *Scheduler) syncPodWithDispatcher(pod *v1.Pod) *v1.Pod {
|
||||
enrichedObj, err := sched.APIDispatcher.SyncObject(pod)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("failed to sync pod %s/%s with API dispatcher: %w", pod.Namespace, pod.Name, err))
|
||||
return pod
|
||||
}
|
||||
enrichedPod, ok := enrichedObj.(*v1.Pod)
|
||||
if !ok {
|
||||
utilruntime.HandleError(fmt.Errorf("cannot convert enrichedObj of type %T to *v1.Pod", enrichedObj))
|
||||
return pod
|
||||
}
|
||||
return enrichedPod
|
||||
}
|
||||
|
||||
func (sched *Scheduler) updatePodInSchedulingQueue(oldObj, newObj interface{}) {
|
||||
start := time.Now()
|
||||
logger := sched.logger
|
||||
@@ -153,6 +167,12 @@ func (sched *Scheduler) updatePodInSchedulingQueue(oldObj, newObj interface{}) {
|
||||
}
|
||||
}
|
||||
|
||||
if sched.APIDispatcher != nil {
|
||||
// If the API dispatcher is available, sync the new pod with the details.
|
||||
// However, at the moment the updated newPod is discarded and this logic will be handled in the future releases.
|
||||
_ = sched.syncPodWithDispatcher(newPod)
|
||||
}
|
||||
|
||||
isAssumed, err := sched.Cache.IsAssumedPod(newPod)
|
||||
if err != nil {
|
||||
utilruntime.HandleErrorWithLogger(logger, err, "Failed to check whether pod is assumed", "pod", klog.KObj(newPod))
|
||||
@@ -274,6 +294,12 @@ func (sched *Scheduler) updatePodInCache(oldObj, newObj interface{}) {
|
||||
return
|
||||
}
|
||||
|
||||
if sched.APIDispatcher != nil {
|
||||
// If the API dispatcher is available, sync the new pod with the details.
|
||||
// However, at the moment the updated newPod is discarded and this logic will be handled in the future releases.
|
||||
_ = sched.syncPodWithDispatcher(newPod)
|
||||
}
|
||||
|
||||
logger.V(4).Info("Update event for scheduled pod", "pod", klog.KObj(oldPod))
|
||||
if err := sched.Cache.UpdatePod(logger, oldPod, newPod); err != nil {
|
||||
utilruntime.HandleErrorWithLogger(logger, err, "Scheduler cache UpdatePod failed", "pod", klog.KObj(oldPod))
|
||||
|
||||
@@ -32,26 +32,26 @@ import (
|
||||
resourcealphaapi "k8s.io/api/resource/v1alpha3"
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/version"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
resourceslicetracker "k8s.io/dynamic-resource-allocation/resourceslice/tracker"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/client-go/dynamic/dynamicinformer"
|
||||
dyfake "k8s.io/client-go/dynamic/fake"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
resourceslicetracker "k8s.io/dynamic-resource-allocation/resourceslice/tracker"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports"
|
||||
@@ -162,16 +162,22 @@ func TestEventHandlers_MoveToActiveOnNominatedNodeUpdate(t *testing.T) {
|
||||
client := fake.NewClientset(objs...)
|
||||
informerFactory := informers.NewSharedInformerFactory(client, 0)
|
||||
|
||||
// apiDispatcher is unused in the test, but intializing it anyway.
|
||||
apiDispatcher := apidispatcher.New(client, 16, apicalls.Relevances)
|
||||
apiDispatcher.Run(logger)
|
||||
defer apiDispatcher.Close()
|
||||
|
||||
recorder := metrics.NewMetricsAsyncRecorder(3, 20*time.Microsecond, ctx.Done())
|
||||
queue := internalqueue.NewPriorityQueue(
|
||||
newDefaultQueueSort(),
|
||||
informerFactory,
|
||||
internalqueue.WithMetricsRecorder(*recorder),
|
||||
internalqueue.WithQueueingHintMapPerProfile(queueingHintMap),
|
||||
internalqueue.WithAPIDispatcher(apiDispatcher),
|
||||
// disable backoff queue
|
||||
internalqueue.WithPodInitialBackoffDuration(0),
|
||||
internalqueue.WithPodMaxBackoffDuration(0))
|
||||
schedulerCache := internalcache.New(ctx, 30*time.Second)
|
||||
schedulerCache := internalcache.New(ctx, 30*time.Second, nil)
|
||||
|
||||
// Put test pods into unschedulable queue
|
||||
for _, pod := range unschedulablePods {
|
||||
@@ -186,7 +192,7 @@ func TestEventHandlers_MoveToActiveOnNominatedNodeUpdate(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
s, _, err := initScheduler(ctx, schedulerCache, queue, client, informerFactory)
|
||||
s, _, err := initScheduler(ctx, schedulerCache, queue, apiDispatcher, client, informerFactory)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize test scheduler: %v", err)
|
||||
}
|
||||
@@ -242,7 +248,7 @@ func TestUpdatePodInCache(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
sched := &Scheduler{
|
||||
Cache: internalcache.New(ctx, ttl),
|
||||
Cache: internalcache.New(ctx, ttl, nil),
|
||||
SchedulingQueue: internalqueue.NewTestQueue(ctx, nil),
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
@@ -333,7 +333,7 @@ func TestSchedulerWithExtenders(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
cache := internalcache.New(ctx, time.Duration(0))
|
||||
cache := internalcache.New(ctx, time.Duration(0), nil)
|
||||
for _, name := range test.nodes {
|
||||
cache.AddNode(logger, createNode(name))
|
||||
}
|
||||
|
||||
45
pkg/scheduler/framework/api_calls/api_calls.go
Normal file
45
pkg/scheduler/framework/api_calls/api_calls.go
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package apicalls
|
||||
|
||||
import (
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
const (
|
||||
PodStatusPatch fwk.APICallType = "pod_status_patch"
|
||||
PodBinding fwk.APICallType = "pod_binding"
|
||||
)
|
||||
|
||||
// Relevances is a built-in mapping types to relevances.
|
||||
// Types of the same relevance should only be defined for different object types.
|
||||
// Misconfiguration of this map can lead to unexpected system bahavior,
|
||||
// so any change has to be well tested and done with care.
|
||||
// This mapping can be replaced by the out-of-tree plugin in its init() function, if needed.
|
||||
var Relevances = fwk.APICallRelevances{
|
||||
PodStatusPatch: 1,
|
||||
PodBinding: 2,
|
||||
}
|
||||
|
||||
// Implementation is a built-in mapping types to calls' constructors.
|
||||
// It's used to construct calls' objects in the scheduler framework and for easier replacement of those.
|
||||
// This mapping can be replaced by the out-of-tree plugin in its init() function, if needed.
|
||||
var Implementations = framework.APICallImplementations[*PodStatusPatchCall, *PodBindingCall]{
|
||||
PodStatusPatch: NewPodStatusPatchCall,
|
||||
PodBinding: NewPodBindingCall,
|
||||
}
|
||||
68
pkg/scheduler/framework/api_calls/pod_binding.go
Normal file
68
pkg/scheduler/framework/api_calls/pod_binding.go
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package apicalls
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
)
|
||||
|
||||
// PodBindingCall is used to bind the pod using the binding details.
|
||||
type PodBindingCall struct {
|
||||
binding *v1.Binding
|
||||
}
|
||||
|
||||
func NewPodBindingCall(binding *v1.Binding) *PodBindingCall {
|
||||
return &PodBindingCall{
|
||||
binding: binding,
|
||||
}
|
||||
}
|
||||
|
||||
func (pbc *PodBindingCall) CallType() fwk.APICallType {
|
||||
return PodBinding
|
||||
}
|
||||
|
||||
func (pbc *PodBindingCall) UID() types.UID {
|
||||
return pbc.binding.UID
|
||||
}
|
||||
|
||||
func (pbc *PodBindingCall) Execute(ctx context.Context, client clientset.Interface) error {
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(3).Info("Attempting to bind pod to node", "pod", klog.KObj(&pbc.binding.ObjectMeta), "node", pbc.binding.Target.Name)
|
||||
|
||||
return client.CoreV1().Pods(pbc.binding.Namespace).Bind(ctx, pbc.binding, metav1.CreateOptions{})
|
||||
}
|
||||
|
||||
func (pbc *PodBindingCall) Sync(obj metav1.Object) (metav1.Object, error) {
|
||||
// Don't need to store or update an object.
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func (pbc *PodBindingCall) Merge(oldCall fwk.APICall) error {
|
||||
// Merge should just overwrite the previous call.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pbc *PodBindingCall) IsNoOp() bool {
|
||||
return false
|
||||
}
|
||||
66
pkg/scheduler/framework/api_calls/pod_binding_test.go
Normal file
66
pkg/scheduler/framework/api_calls/pod_binding_test.go
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package apicalls
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
clienttesting "k8s.io/client-go/testing"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
)
|
||||
|
||||
func TestPodBindingCall_Execute(t *testing.T) {
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
binding := &v1.Binding{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "pod",
|
||||
Namespace: "ns",
|
||||
},
|
||||
Target: v1.ObjectReference{
|
||||
Name: "node",
|
||||
},
|
||||
}
|
||||
|
||||
client := fake.NewClientset()
|
||||
bound := false
|
||||
client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
createAction := action.(clienttesting.CreateActionImpl)
|
||||
if createAction.Subresource != "binding" {
|
||||
return false, nil, nil
|
||||
}
|
||||
bound = true
|
||||
|
||||
gotBinding := createAction.GetObject().(*v1.Binding)
|
||||
if diff := cmp.Diff(binding, gotBinding); diff != "" {
|
||||
t.Errorf("Execute() sent incorrect binding object (-want,+got):\n%s", diff)
|
||||
}
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
call := NewPodBindingCall(binding)
|
||||
if err := call.Execute(ctx, client); err != nil {
|
||||
t.Fatalf("Execute() returned unexpected error: %v", err)
|
||||
}
|
||||
if !bound {
|
||||
t.Error("Expected binding API to be called")
|
||||
}
|
||||
}
|
||||
183
pkg/scheduler/framework/api_calls/pod_status_patch.go
Normal file
183
pkg/scheduler/framework/api_calls/pod_status_patch.go
Normal file
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package apicalls
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// PodStatusPatchCall is used to patch the pod status.
|
||||
type PodStatusPatchCall struct {
|
||||
lock sync.Mutex
|
||||
// executed is set at the beginning of the call's Execute
|
||||
// and is used by Sync to know if the podStatus should be updated.
|
||||
executed bool
|
||||
|
||||
// podUID is an UID of the pod.
|
||||
podUID types.UID
|
||||
// podRef is a reference to the pod.
|
||||
podRef klog.ObjectRef
|
||||
// podStatus contains the actual status of the pod.
|
||||
podStatus *v1.PodStatus
|
||||
// newCondition is a condition to update.
|
||||
newCondition *v1.PodCondition
|
||||
// nominatingInfo is a nominating info to update.
|
||||
nominatingInfo *framework.NominatingInfo
|
||||
}
|
||||
|
||||
func NewPodStatusPatchCall(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) *PodStatusPatchCall {
|
||||
return &PodStatusPatchCall{
|
||||
podUID: pod.UID,
|
||||
podRef: klog.KObj(pod),
|
||||
podStatus: pod.Status.DeepCopy(),
|
||||
newCondition: condition,
|
||||
nominatingInfo: nominatingInfo,
|
||||
}
|
||||
}
|
||||
|
||||
func (psuc *PodStatusPatchCall) CallType() fwk.APICallType {
|
||||
return PodStatusPatch
|
||||
}
|
||||
|
||||
func (psuc *PodStatusPatchCall) UID() types.UID {
|
||||
return psuc.podUID
|
||||
}
|
||||
|
||||
// syncStatus syncs the given status with condition and nominatingInfo. It returns true if anything was actually updated.
|
||||
func syncStatus(status *v1.PodStatus, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) bool {
|
||||
nnnNeedsUpdate := nominatingInfo.Mode() == framework.ModeOverride && status.NominatedNodeName != nominatingInfo.NominatedNodeName
|
||||
if condition != nil {
|
||||
if !podutil.UpdatePodCondition(status, condition) && !nnnNeedsUpdate {
|
||||
return false
|
||||
}
|
||||
} else if !nnnNeedsUpdate {
|
||||
return false
|
||||
}
|
||||
if nnnNeedsUpdate {
|
||||
status.NominatedNodeName = nominatingInfo.NominatedNodeName
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (psuc *PodStatusPatchCall) Execute(ctx context.Context, client clientset.Interface) error {
|
||||
psuc.lock.Lock()
|
||||
// Executed flag is set not to race with podStatus write in Sync afterwards.
|
||||
psuc.executed = true
|
||||
condition := psuc.newCondition.DeepCopy()
|
||||
podStatusCopy := psuc.podStatus.DeepCopy()
|
||||
psuc.lock.Unlock()
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
if condition != nil {
|
||||
logger.V(3).Info("Updating pod condition", "pod", psuc.podRef, "conditionType", condition.Type, "conditionStatus", condition.Status, "conditionReason", condition.Reason)
|
||||
}
|
||||
|
||||
// Sync status to have the condition and nominatingInfo applied on a podStatusCopy.
|
||||
synced := syncStatus(podStatusCopy, condition, psuc.nominatingInfo)
|
||||
if !synced {
|
||||
logger.V(5).Info("Pod status patch call does not need to be executed because it has no effect", "pod", psuc.podRef)
|
||||
return nil
|
||||
}
|
||||
|
||||
// It's safe to run PatchPodStatus even on outdated pod object.
|
||||
err := util.PatchPodStatus(ctx, client, psuc.podRef.Name, psuc.podRef.Namespace, psuc.podStatus, podStatusCopy)
|
||||
if err != nil {
|
||||
logger.Error(err, "Failed to patch pod status", "pod", psuc.podRef)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (psuc *PodStatusPatchCall) Sync(obj metav1.Object) (metav1.Object, error) {
|
||||
pod, ok := obj.(*v1.Pod)
|
||||
if !ok {
|
||||
return obj, fmt.Errorf("unexpected error: object of type %T is not of type *v1.Pod", obj)
|
||||
}
|
||||
|
||||
psuc.lock.Lock()
|
||||
if !psuc.executed {
|
||||
// Set podStatus only if the call execution haven't started yet,
|
||||
// because otherwise it's irrelevant and might race.
|
||||
psuc.podStatus = pod.Status.DeepCopy()
|
||||
}
|
||||
psuc.lock.Unlock()
|
||||
|
||||
podCopy := pod.DeepCopy()
|
||||
// Sync passed pod's status with the call's condition and nominatingInfo.
|
||||
synced := syncStatus(&podCopy.Status, psuc.newCondition, psuc.nominatingInfo)
|
||||
if !synced {
|
||||
return pod, nil
|
||||
}
|
||||
return podCopy, nil
|
||||
}
|
||||
|
||||
func (psuc *PodStatusPatchCall) Merge(oldCall fwk.APICall) error {
|
||||
oldPsuc, ok := oldCall.(*PodStatusPatchCall)
|
||||
if !ok {
|
||||
return fmt.Errorf("unexpected error: call of type %T is not of type *PodStatusPatchCall", oldCall)
|
||||
}
|
||||
if psuc.nominatingInfo.Mode() == framework.ModeNoop && oldPsuc.nominatingInfo.Mode() == framework.ModeOverride {
|
||||
// Set a nominatingInfo from an old call if the new one is no-op.
|
||||
psuc.nominatingInfo = oldPsuc.nominatingInfo
|
||||
}
|
||||
if psuc.newCondition == nil && oldPsuc.newCondition != nil {
|
||||
// Set a condition from an old call if the new one is nil.
|
||||
psuc.newCondition = oldPsuc.newCondition
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// conditionNeedsUpdate checks if the pod condition needs update.
|
||||
func conditionNeedsUpdate(status *v1.PodStatus, condition *v1.PodCondition) bool {
|
||||
// Try to find this pod condition.
|
||||
_, oldCondition := podutil.GetPodCondition(status, condition.Type)
|
||||
if oldCondition == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
isEqual := condition.Status == oldCondition.Status &&
|
||||
condition.Reason == oldCondition.Reason &&
|
||||
condition.Message == oldCondition.Message &&
|
||||
condition.LastProbeTime.Equal(&oldCondition.LastProbeTime)
|
||||
|
||||
// Return true if one of the fields have changed.
|
||||
return !isEqual
|
||||
}
|
||||
|
||||
func (psuc *PodStatusPatchCall) IsNoOp() bool {
|
||||
nnnNeedsUpdate := psuc.nominatingInfo.Mode() == framework.ModeOverride && psuc.podStatus.NominatedNodeName != psuc.nominatingInfo.NominatedNodeName
|
||||
if nnnNeedsUpdate {
|
||||
return false
|
||||
}
|
||||
if psuc.newCondition == nil {
|
||||
return true
|
||||
}
|
||||
return !conditionNeedsUpdate(psuc.podStatus, psuc.newCondition)
|
||||
}
|
||||
250
pkg/scheduler/framework/api_calls/pod_status_patch_test.go
Normal file
250
pkg/scheduler/framework/api_calls/pod_status_patch_test.go
Normal file
@@ -0,0 +1,250 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package apicalls
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
clienttesting "k8s.io/client-go/testing"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
func TestPodStatusPatchCall_IsNoOp(t *testing.T) {
|
||||
podWithNode := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
UID: "uid",
|
||||
},
|
||||
Status: v1.PodStatus{
|
||||
NominatedNodeName: "node-a",
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodScheduled,
|
||||
Status: v1.ConditionFalse,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
pod *v1.Pod
|
||||
condition *v1.PodCondition
|
||||
nominatingInfo *framework.NominatingInfo
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "No-op when condition and node name match",
|
||||
pod: podWithNode,
|
||||
condition: &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
|
||||
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "Not no-op when condition is different",
|
||||
pod: podWithNode,
|
||||
condition: &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionTrue},
|
||||
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "Not no-op when nominated node name is different",
|
||||
pod: podWithNode,
|
||||
condition: &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
|
||||
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-b", NominatingMode: framework.ModeOverride},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "No-op when condition is nil and node name matches",
|
||||
pod: podWithNode,
|
||||
condition: nil,
|
||||
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "Not no-op when condition is nil but node name differs",
|
||||
pod: podWithNode,
|
||||
condition: nil,
|
||||
nominatingInfo: &framework.NominatingInfo{NominatedNodeName: "node-b", NominatingMode: framework.ModeOverride},
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
call := NewPodStatusPatchCall(test.pod, test.condition, test.nominatingInfo)
|
||||
if got := call.IsNoOp(); got != test.want {
|
||||
t.Errorf("Expected IsNoOp() to return %v, but got %v", test.want, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodStatusPatchCall_Merge(t *testing.T) {
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
UID: "uid",
|
||||
},
|
||||
}
|
||||
|
||||
t.Run("Merges nominating info and condition from the old call", func(t *testing.T) {
|
||||
oldCall := NewPodStatusPatchCall(pod, &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
|
||||
&framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride},
|
||||
)
|
||||
newCall := NewPodStatusPatchCall(pod, nil, &framework.NominatingInfo{NominatingMode: framework.ModeNoop})
|
||||
|
||||
if err := newCall.Merge(oldCall); err != nil {
|
||||
t.Fatalf("Unexpected error returned by Merge(): %v", err)
|
||||
}
|
||||
if newCall.nominatingInfo.NominatedNodeName != "node-a" {
|
||||
t.Errorf("Expected NominatedNodeName to be node-a, but got: %v", newCall.nominatingInfo.NominatedNodeName)
|
||||
}
|
||||
if newCall.newCondition == nil || newCall.newCondition.Type != v1.PodScheduled {
|
||||
t.Errorf("Expected PodScheduled condition, but got: %v", newCall.newCondition)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Doesn't overwrite nominating info and condition of a new call", func(t *testing.T) {
|
||||
oldCall := NewPodStatusPatchCall(pod, nil, &framework.NominatingInfo{NominatingMode: framework.ModeNoop})
|
||||
newCall := NewPodStatusPatchCall(pod, &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
|
||||
&framework.NominatingInfo{NominatedNodeName: "node-b", NominatingMode: framework.ModeOverride})
|
||||
|
||||
if err := newCall.Merge(oldCall); err != nil {
|
||||
t.Fatalf("Unexpected error returned by Merge(): %v", err)
|
||||
}
|
||||
if newCall.nominatingInfo.NominatedNodeName != "node-b" {
|
||||
t.Errorf("Expected NominatedNodeName to be node-b, but got: %v", newCall.nominatingInfo.NominatedNodeName)
|
||||
}
|
||||
if newCall.newCondition == nil || newCall.newCondition.Type != v1.PodScheduled {
|
||||
t.Errorf("Expected PodScheduled condition, but got: %v", newCall.newCondition)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestPodStatusPatchCall_Sync(t *testing.T) {
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
UID: "uid",
|
||||
},
|
||||
Status: v1.PodStatus{
|
||||
NominatedNodeName: "node-a",
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodScheduled,
|
||||
Status: v1.ConditionFalse,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
t.Run("Syncs the status before execution and updates the pod", func(t *testing.T) {
|
||||
call := NewPodStatusPatchCall(pod, nil,
|
||||
&framework.NominatingInfo{NominatedNodeName: "node-c", NominatingMode: framework.ModeOverride})
|
||||
|
||||
updatedPod := pod.DeepCopy()
|
||||
updatedPod.Status.NominatedNodeName = "node-b"
|
||||
|
||||
syncedObj, err := call.Sync(updatedPod)
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error returned by Sync(): %v", err)
|
||||
}
|
||||
if call.podStatus.NominatedNodeName != "node-b" {
|
||||
t.Errorf("Expected podStatus NominatedNodeName to be node-b, but got: %v", call.podStatus.NominatedNodeName)
|
||||
}
|
||||
syncedPod := syncedObj.(*v1.Pod)
|
||||
if syncedPod.Status.NominatedNodeName != "node-c" {
|
||||
t.Errorf("Expected synced pod's NominatedNodeName to be node-c, but got: %v", syncedPod.Status.NominatedNodeName)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Doesn't sync internal status during or after execution, but updates the pod", func(t *testing.T) {
|
||||
call := NewPodStatusPatchCall(pod, nil,
|
||||
&framework.NominatingInfo{NominatedNodeName: "node-c", NominatingMode: framework.ModeOverride})
|
||||
call.executed = true
|
||||
|
||||
updatedPod := pod.DeepCopy()
|
||||
updatedPod.Status.NominatedNodeName = "node-b"
|
||||
|
||||
syncedObj, err := call.Sync(updatedPod)
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error returned by Sync(): %v", err)
|
||||
}
|
||||
if call.podStatus.NominatedNodeName != "node-a" {
|
||||
t.Errorf("Expected podStatus NominatedNodeName to be node-a, but got: %v", call.podStatus.NominatedNodeName)
|
||||
}
|
||||
syncedPod := syncedObj.(*v1.Pod)
|
||||
if syncedPod.Status.NominatedNodeName != "node-c" {
|
||||
t.Errorf("Expected synced pod's NominatedNodeName to be node-c, but got: %v", syncedPod.Status.NominatedNodeName)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestPodStatusPatchCall_Execute(t *testing.T) {
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
UID: "uid",
|
||||
Name: "pod",
|
||||
Namespace: "ns",
|
||||
},
|
||||
Status: v1.PodStatus{
|
||||
NominatedNodeName: "node-a",
|
||||
},
|
||||
}
|
||||
|
||||
t.Run("Successful patch", func(t *testing.T) {
|
||||
client := fake.NewClientset()
|
||||
patched := false
|
||||
client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
patched = true
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
call := NewPodStatusPatchCall(pod, &v1.PodCondition{Type: v1.PodScheduled, Status: v1.ConditionFalse},
|
||||
&framework.NominatingInfo{NominatingMode: framework.ModeNoop})
|
||||
if err := call.Execute(ctx, client); err != nil {
|
||||
t.Fatalf("Unexpected error returned by Execute(): %v", err)
|
||||
}
|
||||
if !patched {
|
||||
t.Error("Expected patch API to be called")
|
||||
}
|
||||
if !call.executed {
|
||||
t.Error("Expected 'executed' flag to be set during execution")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Skip API call if patch is not needed", func(t *testing.T) {
|
||||
client := fake.NewClientset()
|
||||
patched := false
|
||||
client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
patched = true
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
noOpCall := NewPodStatusPatchCall(pod, nil,
|
||||
&framework.NominatingInfo{NominatedNodeName: "node-a", NominatingMode: framework.ModeOverride})
|
||||
if err := noOpCall.Execute(ctx, client); err != nil {
|
||||
t.Fatalf("Unexpected error returned by Execute(): %v", err)
|
||||
}
|
||||
if patched {
|
||||
t.Error("Expected patch API not to be called if the call is no-op")
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -579,6 +579,8 @@ type Framework interface {
|
||||
SetPodNominator(nominator PodNominator)
|
||||
// SetPodActivator sets the PodActivator
|
||||
SetPodActivator(activator PodActivator)
|
||||
// SetAPICacher sets the APICacher
|
||||
SetAPICacher(apiCacher APICacher)
|
||||
|
||||
// Close calls Close method of each plugin.
|
||||
Close() error
|
||||
@@ -642,6 +644,49 @@ type Handle interface {
|
||||
|
||||
// Parallelizer returns a parallelizer holding parallelism for scheduler.
|
||||
Parallelizer() parallelize.Parallelizer
|
||||
|
||||
// APIDispatcher returns a fwk.APIDispatcher that can be used to dispatch API calls directly.
|
||||
// This is non-nil if the SchedulerAsyncAPICalls feature gate is enabled.
|
||||
APIDispatcher() fwk.APIDispatcher
|
||||
|
||||
// APICacher returns an APICacher that coordinates API calls with the scheduler's internal cache.
|
||||
// Use this to ensure the scheduler's view of the cluster remains consistent.
|
||||
// This is non-nil if the SchedulerAsyncAPICalls feature gate is enabled.
|
||||
APICacher() APICacher
|
||||
}
|
||||
|
||||
// APICacher defines methods that send API calls through the scheduler's cache
|
||||
// before they are executed asynchronously by the APIDispatcher.
|
||||
// This ensures the scheduler's internal state is updated optimistically,
|
||||
// reflecting the intended outcome of the call.
|
||||
// This methods should be used only if the SchedulerAsyncAPICalls feature gate is enabled.
|
||||
type APICacher interface {
|
||||
// PatchPodStatus sends a patch request for a Pod's status.
|
||||
// The patch could be first applied to the cached Pod object and then the API call is executed asynchronously.
|
||||
// It returns a channel that can be used to wait for the call's completion.
|
||||
PatchPodStatus(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *NominatingInfo) (<-chan error, error)
|
||||
|
||||
// BindPod sends a binding request. The binding could be first applied to the cached Pod object
|
||||
// and then the API call is executed asynchronously.
|
||||
// It returns a channel that can be used to wait for the call's completion.
|
||||
BindPod(binding *v1.Binding) (<-chan error, error)
|
||||
|
||||
// WaitOnFinish blocks until the result of an API call is sent to the given onFinish channel
|
||||
// (returned by methods BindPod or PreemptPod).
|
||||
//
|
||||
// It returns the error received from the channel.
|
||||
// It also returns nil if the call was skipped or overwritten,
|
||||
// as these are considered successful lifecycle outcomes.
|
||||
// Direct onFinish channel read can be used to access these results.
|
||||
WaitOnFinish(ctx context.Context, onFinish <-chan error) error
|
||||
}
|
||||
|
||||
// APICallImplementations define constructors for each fwk.APICall that is used by the scheduler internally.
|
||||
type APICallImplementations[T, K fwk.APICall] struct {
|
||||
// PodStatusPatch is a constructor used to create fwk.APICall object for pod status patch.
|
||||
PodStatusPatch func(pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *NominatingInfo) T
|
||||
// PodBinding is a constructor used to create fwk.APICall object for pod binding.
|
||||
PodBinding func(binding *v1.Binding) K
|
||||
}
|
||||
|
||||
// PreFilterResult wraps needed info for scheduler framework to act upon PreFilter phase.
|
||||
|
||||
@@ -51,11 +51,23 @@ func (b DefaultBinder) Name() string {
|
||||
// Bind binds pods to nodes using the k8s client.
|
||||
func (b DefaultBinder) Bind(ctx context.Context, state fwk.CycleState, p *v1.Pod, nodeName string) *fwk.Status {
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(3).Info("Attempting to bind pod to node", "pod", klog.KObj(p), "node", klog.KRef("", nodeName))
|
||||
binding := &v1.Binding{
|
||||
ObjectMeta: metav1.ObjectMeta{Namespace: p.Namespace, Name: p.Name, UID: p.UID},
|
||||
Target: v1.ObjectReference{Kind: "Node", Name: nodeName},
|
||||
}
|
||||
if b.handle.APICacher() != nil {
|
||||
// When API cacher is available, use it to bind the pod.
|
||||
onFinish, err := b.handle.APICacher().BindPod(binding)
|
||||
if err != nil {
|
||||
return fwk.AsStatus(err)
|
||||
}
|
||||
err = b.handle.APICacher().WaitOnFinish(ctx, onFinish)
|
||||
if err != nil {
|
||||
return fwk.AsStatus(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
logger.V(3).Info("Attempting to bind pod to node", "pod", klog.KObj(p), "node", klog.KRef("", nodeName))
|
||||
err := b.handle.ClientSet().CoreV1().Pods(binding.Namespace).Bind(ctx, binding, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
return fwk.AsStatus(err)
|
||||
|
||||
@@ -19,7 +19,9 @@ package defaultbinder
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
@@ -29,10 +31,19 @@ import (
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
clienttesting "k8s.io/client-go/testing"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
|
||||
frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
st "k8s.io/kubernetes/pkg/scheduler/testing"
|
||||
)
|
||||
|
||||
func init() {
|
||||
metrics.Register()
|
||||
}
|
||||
|
||||
func TestDefaultBinder(t *testing.T) {
|
||||
testPod := st.MakePod().Name("foo").Namespace("ns").Obj()
|
||||
testNode := "foohost.kubernetes.mydomain.com"
|
||||
@@ -52,37 +63,51 @@ func TestDefaultBinder(t *testing.T) {
|
||||
injectErr: errors.New("binding error"),
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, tt := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", tt.name, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
var gotBinding *v1.Binding
|
||||
client := fake.NewClientset(testPod)
|
||||
client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
if action.GetSubresource() != "binding" {
|
||||
return false, nil, nil
|
||||
var gotBinding *v1.Binding
|
||||
client := fake.NewClientset(testPod)
|
||||
client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
if action.GetSubresource() != "binding" {
|
||||
return false, nil, nil
|
||||
}
|
||||
if tt.injectErr != nil {
|
||||
return true, nil, tt.injectErr
|
||||
}
|
||||
gotBinding = action.(clienttesting.CreateAction).GetObject().(*v1.Binding)
|
||||
return true, gotBinding, nil
|
||||
})
|
||||
|
||||
var apiDispatcher *apidispatcher.APIDispatcher
|
||||
if asyncAPICallsEnabled {
|
||||
apiDispatcher = apidispatcher.New(client, 16, apicalls.Relevances)
|
||||
apiDispatcher.Run(logger)
|
||||
defer apiDispatcher.Close()
|
||||
}
|
||||
if tt.injectErr != nil {
|
||||
return true, nil, tt.injectErr
|
||||
|
||||
fh, err := frameworkruntime.NewFramework(ctx, nil, nil, frameworkruntime.WithClientSet(client), frameworkruntime.WithAPIDispatcher(apiDispatcher))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if asyncAPICallsEnabled {
|
||||
cache := internalcache.New(ctx, time.Duration(0), apiDispatcher)
|
||||
fh.SetAPICacher(apicache.New(nil, cache))
|
||||
}
|
||||
|
||||
binder := &DefaultBinder{handle: fh}
|
||||
status := binder.Bind(ctx, nil, testPod, testNode)
|
||||
if got := status.AsError(); (tt.injectErr != nil) != (got != nil) {
|
||||
t.Errorf("got error %q, want %q", got, tt.injectErr)
|
||||
}
|
||||
if diff := cmp.Diff(tt.wantBinding, gotBinding); diff != "" {
|
||||
t.Errorf("got different binding (-want, +got): %s", diff)
|
||||
}
|
||||
gotBinding = action.(clienttesting.CreateAction).GetObject().(*v1.Binding)
|
||||
return true, gotBinding, nil
|
||||
})
|
||||
|
||||
fh, err := frameworkruntime.NewFramework(ctx, nil, nil, frameworkruntime.WithClientSet(client))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
binder := &DefaultBinder{handle: fh}
|
||||
status := binder.Bind(ctx, nil, testPod, testNode)
|
||||
if got := status.AsError(); (tt.injectErr != nil) != (got != nil) {
|
||||
t.Errorf("got error %q, want %q", got, tt.injectErr)
|
||||
}
|
||||
if diff := cmp.Diff(tt.wantBinding, gotBinding); diff != "" {
|
||||
t.Errorf("got different binding (-want, +got): %s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,9 +52,12 @@ import (
|
||||
apipod "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
configv1 "k8s.io/kubernetes/pkg/scheduler/apis/config/v1"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
@@ -390,80 +393,99 @@ func TestPostFilter(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// index the potential victim pods in the fake client so that the victims deletion logic does not fail
|
||||
podItems := []v1.Pod{}
|
||||
for _, pod := range tt.pods {
|
||||
podItems = append(podItems, *pod)
|
||||
}
|
||||
cs := clientsetfake.NewClientset(&v1.PodList{Items: podItems})
|
||||
informerFactory := informers.NewSharedInformerFactory(cs, 0)
|
||||
podInformer := informerFactory.Core().V1().Pods().Informer()
|
||||
podInformer.GetStore().Add(tt.pod)
|
||||
for i := range tt.pods {
|
||||
podInformer.GetStore().Add(tt.pods[i])
|
||||
}
|
||||
pdbInformer := informerFactory.Policy().V1().PodDisruptionBudgets().Informer()
|
||||
for i := range tt.pdbs {
|
||||
if err := pdbInformer.GetStore().Add(tt.pdbs[i]); err != nil {
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, tt := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", tt.name, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
// index the potential victim pods in the fake client so that the victims deletion logic does not fail
|
||||
podItems := []v1.Pod{}
|
||||
for _, pod := range tt.pods {
|
||||
podItems = append(podItems, *pod)
|
||||
}
|
||||
cs := clientsetfake.NewClientset(&v1.PodList{Items: podItems})
|
||||
informerFactory := informers.NewSharedInformerFactory(cs, 0)
|
||||
podInformer := informerFactory.Core().V1().Pods().Informer()
|
||||
if err := podInformer.GetStore().Add(tt.pod); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Register NodeResourceFit as the Filter & PreFilter plugin.
|
||||
registeredPlugins := []tf.RegisterPluginFunc{
|
||||
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
|
||||
tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"),
|
||||
tf.RegisterPluginAsExtensions("test-plugin", newTestPlugin, "PreFilter"),
|
||||
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
|
||||
}
|
||||
var extenders []framework.Extender
|
||||
if tt.extender != nil {
|
||||
extenders = append(extenders, tt.extender)
|
||||
}
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
f, err := tf.NewFramework(ctx, registeredPlugins, "",
|
||||
frameworkruntime.WithClientSet(cs),
|
||||
frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
|
||||
frameworkruntime.WithExtenders(extenders),
|
||||
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.pods, tt.nodes)),
|
||||
frameworkruntime.WithLogger(logger),
|
||||
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
p, err := New(ctx, getDefaultDefaultPreemptionArgs(), f, feature.Features{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
state := framework.NewCycleState()
|
||||
// Ensure <state> is populated.
|
||||
if _, status, _ := f.RunPreFilterPlugins(ctx, state, tt.pod); !status.IsSuccess() {
|
||||
t.Errorf("Unexpected PreFilter Status: %v", status)
|
||||
}
|
||||
|
||||
gotResult, gotStatus := p.PostFilter(ctx, state, tt.pod, tt.filteredNodesStatuses)
|
||||
// As we cannot compare two errors directly due to miss the equal method for how to compare two errors, so just need to compare the reasons.
|
||||
if gotStatus.Code() == fwk.Error {
|
||||
if diff := cmp.Diff(tt.wantStatus.Reasons(), gotStatus.Reasons()); diff != "" {
|
||||
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
|
||||
for i := range tt.pods {
|
||||
if err := podInformer.GetStore().Add(tt.pods[i]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if diff := cmp.Diff(tt.wantStatus, gotStatus); diff != "" {
|
||||
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
|
||||
pdbInformer := informerFactory.Policy().V1().PodDisruptionBudgets().Informer()
|
||||
for i := range tt.pdbs {
|
||||
if err := pdbInformer.GetStore().Add(tt.pdbs[i]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if diff := cmp.Diff(tt.wantResult, gotResult); diff != "" {
|
||||
t.Errorf("Unexpected postFilterResult (-want, +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
|
||||
// Register NodeResourceFit as the Filter & PreFilter plugin.
|
||||
registeredPlugins := []tf.RegisterPluginFunc{
|
||||
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
|
||||
tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"),
|
||||
tf.RegisterPluginAsExtensions("test-plugin", newTestPlugin, "PreFilter"),
|
||||
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
|
||||
}
|
||||
var extenders []framework.Extender
|
||||
if tt.extender != nil {
|
||||
extenders = append(extenders, tt.extender)
|
||||
}
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
var apiDispatcher *apidispatcher.APIDispatcher
|
||||
if asyncAPICallsEnabled {
|
||||
apiDispatcher = apidispatcher.New(cs, 16, apicalls.Relevances)
|
||||
apiDispatcher.Run(logger)
|
||||
defer apiDispatcher.Close()
|
||||
}
|
||||
|
||||
f, err := tf.NewFramework(ctx, registeredPlugins, "",
|
||||
frameworkruntime.WithClientSet(cs),
|
||||
frameworkruntime.WithAPIDispatcher(apiDispatcher),
|
||||
frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
|
||||
frameworkruntime.WithExtenders(extenders),
|
||||
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.pods, tt.nodes)),
|
||||
frameworkruntime.WithLogger(logger),
|
||||
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if asyncAPICallsEnabled {
|
||||
cache := internalcache.New(ctx, 100*time.Millisecond, apiDispatcher)
|
||||
f.SetAPICacher(apicache.New(nil, cache))
|
||||
}
|
||||
|
||||
p, err := New(ctx, getDefaultDefaultPreemptionArgs(), f, feature.Features{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
state := framework.NewCycleState()
|
||||
// Ensure <state> is populated.
|
||||
if _, status, _ := f.RunPreFilterPlugins(ctx, state, tt.pod); !status.IsSuccess() {
|
||||
t.Errorf("Unexpected PreFilter Status: %v", status)
|
||||
}
|
||||
|
||||
gotResult, gotStatus := p.PostFilter(ctx, state, tt.pod, tt.filteredNodesStatuses)
|
||||
// As we cannot compare two errors directly due to miss the equal method for how to compare two errors, so just need to compare the reasons.
|
||||
if gotStatus.Code() == fwk.Error {
|
||||
if diff := cmp.Diff(tt.wantStatus.Reasons(), gotStatus.Reasons()); diff != "" {
|
||||
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
|
||||
}
|
||||
} else {
|
||||
if diff := cmp.Diff(tt.wantStatus, gotStatus); diff != "" {
|
||||
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
if diff := cmp.Diff(tt.wantResult, gotResult); diff != "" {
|
||||
t.Errorf("Unexpected postFilterResult (-want, +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2075,228 +2097,241 @@ func TestPreempt(t *testing.T) {
|
||||
|
||||
labelKeys := []string{"hostname", "zone", "region"}
|
||||
for _, asyncPreemptionEnabled := range []bool{true, false} {
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", test.name, asyncPreemptionEnabled), func(t *testing.T) {
|
||||
client := clientsetfake.NewClientset()
|
||||
informerFactory := informers.NewSharedInformerFactory(client, 0)
|
||||
podInformer := informerFactory.Core().V1().Pods().Informer()
|
||||
testPod := test.pod.DeepCopy()
|
||||
testPods := make([]*v1.Pod, len(test.pods))
|
||||
for i := range test.pods {
|
||||
testPods[i] = test.pods[i].DeepCopy()
|
||||
}
|
||||
|
||||
if err := podInformer.GetStore().Add(testPod); err != nil {
|
||||
t.Fatalf("Failed to add test pod %s: %v", testPod.Name, err)
|
||||
}
|
||||
for i := range testPods {
|
||||
if err := podInformer.GetStore().Add(testPods[i]); err != nil {
|
||||
t.Fatalf("Failed to add test pod %s: %v", testPods[i], err)
|
||||
}
|
||||
}
|
||||
|
||||
// Need to protect deletedPodNames and patchedPodNames to prevent DATA RACE panic.
|
||||
var mu sync.RWMutex
|
||||
deletedPodNames := sets.New[string]()
|
||||
patchedPodNames := sets.New[string]()
|
||||
patchedPods := []*v1.Pod{}
|
||||
client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
patchAction := action.(clienttesting.PatchAction)
|
||||
podName := patchAction.GetName()
|
||||
namespace := patchAction.GetNamespace()
|
||||
patch := patchAction.GetPatch()
|
||||
pod, err := informerFactory.Core().V1().Pods().Lister().Pods(namespace).Get(podName)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get the original pod %s/%s before patching: %v\n", namespace, podName, err)
|
||||
}
|
||||
marshalledPod, err := json.Marshal(pod)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal the original pod %s/%s: %v", namespace, podName, err)
|
||||
}
|
||||
updated, err := strategicpatch.StrategicMergePatch(marshalledPod, patch, v1.Pod{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to apply strategic merge patch %q on pod %#v: %v", patch, marshalledPod, err)
|
||||
}
|
||||
updatedPod := &v1.Pod{}
|
||||
if err := json.Unmarshal(updated, updatedPod); err != nil {
|
||||
t.Fatalf("Failed to unmarshal updated pod %q: %v", updated, err)
|
||||
}
|
||||
patchedPods = append(patchedPods, updatedPod)
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
patchedPodNames.Insert(podName)
|
||||
return true, nil, nil
|
||||
})
|
||||
client.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
deletedPodNames.Insert(action.(clienttesting.DeleteAction).GetName())
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
waitingPods := frameworkruntime.NewWaitingPodsMap()
|
||||
|
||||
cache := internalcache.New(ctx, time.Duration(0))
|
||||
for _, pod := range testPods {
|
||||
if err := cache.AddPod(logger, pod.DeepCopy()); err != nil {
|
||||
t.Fatalf("Failed to add pod %s: %v", pod.Name, err)
|
||||
}
|
||||
}
|
||||
cachedNodeInfoMap := map[string]*framework.NodeInfo{}
|
||||
nodes := make([]*v1.Node, len(test.nodeNames))
|
||||
for i, name := range test.nodeNames {
|
||||
node := st.MakeNode().Name(name).Capacity(veryLargeRes).Obj()
|
||||
// Split node name by '/' to form labels in a format of
|
||||
// {"hostname": node.Name[0], "zone": node.Name[1], "region": node.Name[2]}
|
||||
node.ObjectMeta.Labels = make(map[string]string)
|
||||
for i, label := range strings.Split(node.Name, "/") {
|
||||
node.ObjectMeta.Labels[labelKeys[i]] = label
|
||||
}
|
||||
node.Name = node.ObjectMeta.Labels["hostname"]
|
||||
t.Logf("node is added: %v. labels: %#v", node.Name, node.ObjectMeta.Labels)
|
||||
cache.AddNode(logger, node)
|
||||
nodes[i] = node
|
||||
|
||||
// Set nodeInfo to extenders to mock extenders' cache for preemption.
|
||||
cachedNodeInfo := framework.NewNodeInfo()
|
||||
cachedNodeInfo.SetNode(node)
|
||||
cachedNodeInfoMap[node.Name] = cachedNodeInfo
|
||||
}
|
||||
var extenders []framework.Extender
|
||||
for _, extender := range test.extenders {
|
||||
// Set nodeInfoMap as extenders cached node information.
|
||||
extender.CachedNodeNameToInfo = cachedNodeInfoMap
|
||||
extenders = append(extenders, extender)
|
||||
}
|
||||
schedFramework, err := tf.NewFramework(
|
||||
ctx,
|
||||
[]tf.RegisterPluginFunc{
|
||||
test.registerPlugin,
|
||||
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
|
||||
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
|
||||
},
|
||||
"",
|
||||
frameworkruntime.WithClientSet(client),
|
||||
frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
|
||||
frameworkruntime.WithExtenders(extenders),
|
||||
frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
|
||||
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(testPods, nodes)),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithWaitingPods(waitingPods),
|
||||
frameworkruntime.WithLogger(logger),
|
||||
frameworkruntime.WithPodActivator(&fakePodActivator{}),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
state := framework.NewCycleState()
|
||||
// Some tests rely on PreFilter plugin to compute its CycleState.
|
||||
if _, s, _ := schedFramework.RunPreFilterPlugins(ctx, state, testPod); !s.IsSuccess() {
|
||||
t.Errorf("Unexpected preFilterStatus: %v", s)
|
||||
}
|
||||
// Call preempt and check the expected results.
|
||||
features := feature.Features{
|
||||
EnableAsyncPreemption: asyncPreemptionEnabled,
|
||||
}
|
||||
pl, err := New(ctx, getDefaultDefaultPreemptionArgs(), schedFramework, features)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// so that these nodes are eligible for preemption, we set their status
|
||||
// to Unschedulable.
|
||||
|
||||
nodeToStatusMap := framework.NewDefaultNodeToStatus()
|
||||
for _, n := range nodes {
|
||||
nodeToStatusMap.Set(n.Name, fwk.NewStatus(fwk.Unschedulable))
|
||||
}
|
||||
|
||||
res, status := pl.Evaluator.Preempt(ctx, state, testPod, nodeToStatusMap)
|
||||
if !status.IsSuccess() && !status.IsRejected() {
|
||||
t.Errorf("unexpected error in preemption: %v", status.AsError())
|
||||
}
|
||||
if diff := cmp.Diff(test.want, res); diff != "" {
|
||||
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
if asyncPreemptionEnabled {
|
||||
// Wait for the pod to be deleted.
|
||||
if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
mu.RLock()
|
||||
defer mu.RUnlock()
|
||||
return len(deletedPodNames) == len(test.expectedPods), nil
|
||||
}); err != nil {
|
||||
t.Errorf("expected %v pods to be deleted, got %v.", len(test.expectedPods), len(deletedPodNames))
|
||||
}
|
||||
} else {
|
||||
mu.RLock()
|
||||
// If async preemption is disabled, the pod should be deleted immediately.
|
||||
if len(deletedPodNames) != len(test.expectedPods) {
|
||||
t.Errorf("expected %v pods to be deleted, got %v.", len(test.expectedPods), len(deletedPodNames))
|
||||
}
|
||||
mu.RUnlock()
|
||||
}
|
||||
|
||||
mu.RLock()
|
||||
if diff := cmp.Diff(sets.List(patchedPodNames), sets.List(deletedPodNames)); diff != "" {
|
||||
t.Errorf("unexpected difference in the set of patched and deleted pods: %s", diff)
|
||||
}
|
||||
|
||||
// Make sure that the DisruptionTarget condition has been added to the pod status
|
||||
for _, patchedPod := range patchedPods {
|
||||
expectedPodCondition := &v1.PodCondition{
|
||||
Type: v1.DisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: v1.PodReasonPreemptionByScheduler,
|
||||
Message: fmt.Sprintf("%s: preempting to accommodate a higher priority pod", patchedPod.Spec.SchedulerName),
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, Async API calls enabled: %v)", test.name, asyncPreemptionEnabled, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
client := clientsetfake.NewClientset()
|
||||
informerFactory := informers.NewSharedInformerFactory(client, 0)
|
||||
podInformer := informerFactory.Core().V1().Pods().Informer()
|
||||
testPod := test.pod.DeepCopy()
|
||||
testPods := make([]*v1.Pod, len(test.pods))
|
||||
for i := range test.pods {
|
||||
testPods[i] = test.pods[i].DeepCopy()
|
||||
}
|
||||
|
||||
_, condition := apipod.GetPodCondition(&patchedPod.Status, v1.DisruptionTarget)
|
||||
if diff := cmp.Diff(condition, expectedPodCondition, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" {
|
||||
t.Fatalf("unexpected difference in the pod %q DisruptionTarget condition: %s", patchedPod.Name, diff)
|
||||
if err := podInformer.GetStore().Add(testPod); err != nil {
|
||||
t.Fatalf("Failed to add test pod %s: %v", testPod.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
for victimName := range deletedPodNames {
|
||||
found := false
|
||||
for _, expPod := range test.expectedPods {
|
||||
if expPod == victimName {
|
||||
found = true
|
||||
break
|
||||
for i := range testPods {
|
||||
if err := podInformer.GetStore().Add(testPods[i]); err != nil {
|
||||
t.Fatalf("Failed to add test pod %s: %v", testPods[i], err)
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("pod %v is not expected to be a victim.", victimName)
|
||||
}
|
||||
}
|
||||
if res != nil && res.NominatingInfo != nil {
|
||||
testPod.Status.NominatedNodeName = res.NominatedNodeName
|
||||
}
|
||||
|
||||
// Manually set the deleted Pods' deletionTimestamp to non-nil.
|
||||
for _, pod := range testPods {
|
||||
if deletedPodNames.Has(pod.Name) {
|
||||
now := metav1.Now()
|
||||
pod.DeletionTimestamp = &now
|
||||
deletedPodNames.Delete(pod.Name)
|
||||
}
|
||||
}
|
||||
mu.RUnlock()
|
||||
// Need to protect deletedPodNames and patchedPodNames to prevent DATA RACE panic.
|
||||
var mu sync.RWMutex
|
||||
deletedPodNames := sets.New[string]()
|
||||
patchedPodNames := sets.New[string]()
|
||||
patchedPods := []*v1.Pod{}
|
||||
client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
patchAction := action.(clienttesting.PatchAction)
|
||||
podName := patchAction.GetName()
|
||||
namespace := patchAction.GetNamespace()
|
||||
patch := patchAction.GetPatch()
|
||||
pod, err := informerFactory.Core().V1().Pods().Lister().Pods(namespace).Get(podName)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get the original pod %s/%s before patching: %v\n", namespace, podName, err)
|
||||
}
|
||||
marshalledPod, err := json.Marshal(pod)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal the original pod %s/%s: %v", namespace, podName, err)
|
||||
}
|
||||
updated, err := strategicpatch.StrategicMergePatch(marshalledPod, patch, v1.Pod{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to apply strategic merge patch %q on pod %#v: %v", patch, marshalledPod, err)
|
||||
}
|
||||
updatedPod := &v1.Pod{}
|
||||
if err := json.Unmarshal(updated, updatedPod); err != nil {
|
||||
t.Fatalf("Failed to unmarshal updated pod %q: %v", updated, err)
|
||||
}
|
||||
patchedPods = append(patchedPods, updatedPod)
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
patchedPodNames.Insert(podName)
|
||||
return true, nil, nil
|
||||
})
|
||||
client.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
deletedPodNames.Insert(action.(clienttesting.DeleteAction).GetName())
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
// Call preempt again and make sure it doesn't preempt any more pods.
|
||||
res, status = pl.Evaluator.Preempt(ctx, state, testPod, framework.NewDefaultNodeToStatus())
|
||||
if !status.IsSuccess() && !status.IsRejected() {
|
||||
t.Errorf("unexpected error in preemption: %v", status.AsError())
|
||||
}
|
||||
if res != nil && res.NominatingInfo != nil && len(deletedPodNames) > 0 {
|
||||
t.Errorf("didn't expect any more preemption. Node %v is selected for preemption.", res.NominatedNodeName)
|
||||
}
|
||||
})
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
waitingPods := frameworkruntime.NewWaitingPodsMap()
|
||||
|
||||
var apiDispatcher *apidispatcher.APIDispatcher
|
||||
if asyncAPICallsEnabled {
|
||||
apiDispatcher = apidispatcher.New(client, 16, apicalls.Relevances)
|
||||
apiDispatcher.Run(logger)
|
||||
defer apiDispatcher.Close()
|
||||
}
|
||||
|
||||
cache := internalcache.New(ctx, time.Duration(0), apiDispatcher)
|
||||
for _, pod := range testPods {
|
||||
if err := cache.AddPod(logger, pod.DeepCopy()); err != nil {
|
||||
t.Fatalf("Failed to add pod %s: %v", pod.Name, err)
|
||||
}
|
||||
}
|
||||
cachedNodeInfoMap := map[string]*framework.NodeInfo{}
|
||||
nodes := make([]*v1.Node, len(test.nodeNames))
|
||||
for i, name := range test.nodeNames {
|
||||
node := st.MakeNode().Name(name).Capacity(veryLargeRes).Obj()
|
||||
// Split node name by '/' to form labels in a format of
|
||||
// {"hostname": node.Name[0], "zone": node.Name[1], "region": node.Name[2]}
|
||||
node.Labels = make(map[string]string)
|
||||
for i, label := range strings.Split(node.Name, "/") {
|
||||
node.Labels[labelKeys[i]] = label
|
||||
}
|
||||
node.Name = node.Labels["hostname"]
|
||||
t.Logf("node is added: %v. labels: %#v", node.Name, node.Labels)
|
||||
cache.AddNode(logger, node)
|
||||
nodes[i] = node
|
||||
|
||||
// Set nodeInfo to extenders to mock extenders' cache for preemption.
|
||||
cachedNodeInfo := framework.NewNodeInfo()
|
||||
cachedNodeInfo.SetNode(node)
|
||||
cachedNodeInfoMap[node.Name] = cachedNodeInfo
|
||||
}
|
||||
var extenders []framework.Extender
|
||||
for _, extender := range test.extenders {
|
||||
// Set nodeInfoMap as extenders cached node information.
|
||||
extender.CachedNodeNameToInfo = cachedNodeInfoMap
|
||||
extenders = append(extenders, extender)
|
||||
}
|
||||
schedFramework, err := tf.NewFramework(
|
||||
ctx,
|
||||
[]tf.RegisterPluginFunc{
|
||||
test.registerPlugin,
|
||||
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
|
||||
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
|
||||
},
|
||||
"",
|
||||
frameworkruntime.WithClientSet(client),
|
||||
frameworkruntime.WithAPIDispatcher(apiDispatcher),
|
||||
frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
|
||||
frameworkruntime.WithExtenders(extenders),
|
||||
frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
|
||||
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(testPods, nodes)),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithWaitingPods(waitingPods),
|
||||
frameworkruntime.WithLogger(logger),
|
||||
frameworkruntime.WithPodActivator(&fakePodActivator{}),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if asyncAPICallsEnabled {
|
||||
schedFramework.SetAPICacher(apicache.New(nil, cache))
|
||||
}
|
||||
|
||||
state := framework.NewCycleState()
|
||||
// Some tests rely on PreFilter plugin to compute its CycleState.
|
||||
if _, s, _ := schedFramework.RunPreFilterPlugins(ctx, state, testPod); !s.IsSuccess() {
|
||||
t.Errorf("Unexpected preFilterStatus: %v", s)
|
||||
}
|
||||
// Call preempt and check the expected results.
|
||||
features := feature.Features{
|
||||
EnableAsyncPreemption: asyncPreemptionEnabled,
|
||||
}
|
||||
pl, err := New(ctx, getDefaultDefaultPreemptionArgs(), schedFramework, features)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// so that these nodes are eligible for preemption, we set their status
|
||||
// to Unschedulable.
|
||||
|
||||
nodeToStatusMap := framework.NewDefaultNodeToStatus()
|
||||
for _, n := range nodes {
|
||||
nodeToStatusMap.Set(n.Name, fwk.NewStatus(fwk.Unschedulable))
|
||||
}
|
||||
|
||||
res, status := pl.Evaluator.Preempt(ctx, state, testPod, nodeToStatusMap)
|
||||
if !status.IsSuccess() && !status.IsRejected() {
|
||||
t.Errorf("unexpected error in preemption: %v", status.AsError())
|
||||
}
|
||||
if diff := cmp.Diff(test.want, res); diff != "" {
|
||||
t.Errorf("Unexpected status (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
if asyncPreemptionEnabled {
|
||||
// Wait for the pod to be deleted.
|
||||
if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
mu.RLock()
|
||||
defer mu.RUnlock()
|
||||
return len(deletedPodNames) == len(test.expectedPods), nil
|
||||
}); err != nil {
|
||||
t.Errorf("expected %v pods to be deleted, got %v.", len(test.expectedPods), len(deletedPodNames))
|
||||
}
|
||||
} else {
|
||||
mu.RLock()
|
||||
// If async preemption is disabled, the pod should be deleted immediately.
|
||||
if len(deletedPodNames) != len(test.expectedPods) {
|
||||
t.Errorf("expected %v pods to be deleted, got %v.", len(test.expectedPods), len(deletedPodNames))
|
||||
}
|
||||
mu.RUnlock()
|
||||
}
|
||||
|
||||
mu.RLock()
|
||||
if diff := cmp.Diff(sets.List(patchedPodNames), sets.List(deletedPodNames)); diff != "" {
|
||||
t.Errorf("unexpected difference in the set of patched and deleted pods: %s", diff)
|
||||
}
|
||||
|
||||
// Make sure that the DisruptionTarget condition has been added to the pod status
|
||||
for _, patchedPod := range patchedPods {
|
||||
expectedPodCondition := &v1.PodCondition{
|
||||
Type: v1.DisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: v1.PodReasonPreemptionByScheduler,
|
||||
Message: fmt.Sprintf("%s: preempting to accommodate a higher priority pod", patchedPod.Spec.SchedulerName),
|
||||
}
|
||||
|
||||
_, condition := apipod.GetPodCondition(&patchedPod.Status, v1.DisruptionTarget)
|
||||
if diff := cmp.Diff(condition, expectedPodCondition, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" {
|
||||
t.Fatalf("unexpected difference in the pod %q DisruptionTarget condition: %s", patchedPod.Name, diff)
|
||||
}
|
||||
}
|
||||
|
||||
for victimName := range deletedPodNames {
|
||||
found := false
|
||||
for _, expPod := range test.expectedPods {
|
||||
if expPod == victimName {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("pod %v is not expected to be a victim.", victimName)
|
||||
}
|
||||
}
|
||||
if res != nil && res.NominatingInfo != nil {
|
||||
testPod.Status.NominatedNodeName = res.NominatedNodeName
|
||||
}
|
||||
|
||||
// Manually set the deleted Pods' deletionTimestamp to non-nil.
|
||||
for _, pod := range testPods {
|
||||
if deletedPodNames.Has(pod.Name) {
|
||||
now := metav1.Now()
|
||||
pod.DeletionTimestamp = &now
|
||||
deletedPodNames.Delete(pod.Name)
|
||||
}
|
||||
}
|
||||
mu.RUnlock()
|
||||
|
||||
// Call preempt again and make sure it doesn't preempt any more pods.
|
||||
res, status = pl.Evaluator.Preempt(ctx, state, testPod, framework.NewDefaultNodeToStatus())
|
||||
if !status.IsSuccess() && !status.IsRejected() {
|
||||
t.Errorf("unexpected error in preemption: %v", status.AsError())
|
||||
}
|
||||
if res != nil && res.NominatingInfo != nil && len(deletedPodNames) > 0 {
|
||||
t.Errorf("didn't expect any more preemption. Node %v is selected for preemption.", res.NominatedNodeName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ import (
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
policylisters "k8s.io/client-go/listers/policy/v1"
|
||||
corev1helpers "k8s.io/component-helpers/scheduling/corev1"
|
||||
@@ -182,7 +183,7 @@ func NewEvaluator(pluginName string, fh framework.Handle, i Interface, enableAsy
|
||||
newStatus := victim.Status.DeepCopy()
|
||||
updated := apipod.UpdatePodCondition(newStatus, condition)
|
||||
if updated {
|
||||
if err := util.PatchPodStatus(ctx, ev.Handler.ClientSet(), victim, newStatus); err != nil {
|
||||
if err := util.PatchPodStatus(ctx, ev.Handler.ClientSet(), victim.Name, victim.Namespace, &victim.Status, newStatus); err != nil {
|
||||
logger.Error(err, "Could not add DisruptionTarget condition due to preemption", "pod", klog.KObj(victim), "preemptor", klog.KObj(preemptor))
|
||||
return err
|
||||
}
|
||||
@@ -450,7 +451,7 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.
|
||||
// nomination updates these pods and moves them to the active queue. It
|
||||
// lets scheduler find another place for them.
|
||||
nominatedPods := getLowerPriorityNominatedPods(logger, fh, pod, c.Name())
|
||||
if err := util.ClearNominatedNodeName(ctx, cs, nominatedPods...); err != nil {
|
||||
if err := clearNominatedNodeName(ctx, cs, ev.Handler.APICacher(), nominatedPods...); err != nil {
|
||||
utilruntime.HandleErrorWithContext(ctx, err, "Cannot clear 'NominatedNodeName' field")
|
||||
// We do not return as this error is not critical.
|
||||
}
|
||||
@@ -458,6 +459,31 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.
|
||||
return nil
|
||||
}
|
||||
|
||||
// clearNominatedNodeName internally submit a patch request to API server
|
||||
// to set each pods[*].Status.NominatedNodeName> to "".
|
||||
func clearNominatedNodeName(ctx context.Context, cs clientset.Interface, apiCacher framework.APICacher, pods ...*v1.Pod) utilerrors.Aggregate {
|
||||
var errs []error
|
||||
for _, p := range pods {
|
||||
if apiCacher != nil {
|
||||
// When API cacher is available, use it to clear the NominatedNodeName.
|
||||
_, err := apiCacher.PatchPodStatus(p, nil, &framework.NominatingInfo{NominatedNodeName: "", NominatingMode: framework.ModeOverride})
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
} else {
|
||||
if len(p.Status.NominatedNodeName) == 0 {
|
||||
continue
|
||||
}
|
||||
podStatusCopy := p.Status.DeepCopy()
|
||||
podStatusCopy.NominatedNodeName = ""
|
||||
if err := util.PatchPodStatus(ctx, cs, p.Name, p.Namespace, &p.Status, podStatusCopy); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return utilerrors.NewAggregate(errs)
|
||||
}
|
||||
|
||||
// prepareCandidateAsync triggers a goroutine for some preparation work:
|
||||
// - Evict the victim pods
|
||||
// - Reject the victim pods if they are in waitingPod map
|
||||
@@ -504,7 +530,7 @@ func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName
|
||||
// nomination updates these pods and moves them to the active queue. It
|
||||
// lets scheduler find another place for them.
|
||||
nominatedPods := getLowerPriorityNominatedPods(logger, ev.Handler, pod, c.Name())
|
||||
if err := util.ClearNominatedNodeName(ctx, ev.Handler.ClientSet(), nominatedPods...); err != nil {
|
||||
if err := clearNominatedNodeName(ctx, ev.Handler.ClientSet(), ev.Handler.APICacher(), nominatedPods...); err != nil {
|
||||
utilruntime.HandleErrorWithContext(ctx, err, "Cannot clear 'NominatedNodeName' field from lower priority pods on the same target node", "node", c.Name())
|
||||
result = metrics.GoroutineResultError
|
||||
// We do not return as this error is not critical.
|
||||
|
||||
@@ -30,6 +30,7 @@ import (
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policy "k8s.io/api/policy/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
@@ -43,9 +44,12 @@ import (
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
extenderv1 "k8s.io/kube-scheduler/extender/v1"
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
|
||||
@@ -442,6 +446,9 @@ func TestPrepareCandidate(t *testing.T) {
|
||||
SchedulerName(defaultSchedulerName).Priority(highPriority).
|
||||
Containers([]v1.Container{st.MakeContainer().Name("container1").Obj()}).
|
||||
Obj()
|
||||
|
||||
errDeletePodFailed = errors.New("delete pod failed")
|
||||
errPatchStatusFailed = errors.New("patch pod status failed")
|
||||
)
|
||||
|
||||
tests := []struct {
|
||||
@@ -549,7 +556,7 @@ func TestPrepareCandidate(t *testing.T) {
|
||||
testPods: []*v1.Pod{},
|
||||
expectedDeletionError: true,
|
||||
nodeNames: []string{node1Name},
|
||||
expectedStatus: fwk.AsStatus(errors.New("delete pod failed")),
|
||||
expectedStatus: fwk.AsStatus(errDeletePodFailed),
|
||||
expectedPreemptingMap: sets.New(types.UID("preemptor")),
|
||||
expectedActivatedPods: map[string]*v1.Pod{preemptor.Name: preemptor},
|
||||
},
|
||||
@@ -586,7 +593,7 @@ func TestPrepareCandidate(t *testing.T) {
|
||||
testPods: []*v1.Pod{},
|
||||
expectedPatchError: true,
|
||||
nodeNames: []string{node1Name},
|
||||
expectedStatus: fwk.AsStatus(errors.New("patch pod status failed")),
|
||||
expectedStatus: fwk.AsStatus(errPatchStatusFailed),
|
||||
expectedPreemptingMap: sets.New(types.UID("preemptor")),
|
||||
expectedActivatedPods: map[string]*v1.Pod{preemptor.Name: preemptor},
|
||||
},
|
||||
@@ -614,192 +621,203 @@ func TestPrepareCandidate(t *testing.T) {
|
||||
// which results in the second victim not being deleted.
|
||||
"",
|
||||
},
|
||||
expectedStatus: fwk.AsStatus(errors.New("patch pod status failed")),
|
||||
expectedStatus: fwk.AsStatus(errPatchStatusFailed),
|
||||
expectedPreemptingMap: sets.New(types.UID("preemptor")),
|
||||
expectedActivatedPods: map[string]*v1.Pod{preemptor.Name: preemptor},
|
||||
},
|
||||
}
|
||||
|
||||
for _, asyncPreemptionEnabled := range []bool{true, false} {
|
||||
for _, tt := range tests {
|
||||
t.Run(fmt.Sprintf("%v (Async preemption enabled: %v)", tt.name, asyncPreemptionEnabled), func(t *testing.T) {
|
||||
metrics.Register()
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, tt := range tests {
|
||||
t.Run(fmt.Sprintf("%v (Async preemption enabled: %v, Async API calls enabled: %v)", tt.name, asyncPreemptionEnabled, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
metrics.Register()
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
nodes := make([]*v1.Node, len(tt.nodeNames))
|
||||
for i, nodeName := range tt.nodeNames {
|
||||
nodes[i] = st.MakeNode().Name(nodeName).Capacity(veryLargeRes).Obj()
|
||||
}
|
||||
registeredPlugins := append([]tf.RegisterPluginFunc{
|
||||
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New)},
|
||||
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
|
||||
)
|
||||
var objs []runtime.Object
|
||||
for _, pod := range tt.testPods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
|
||||
mu := &sync.RWMutex{}
|
||||
deletedPods := sets.New[string]()
|
||||
deletionFailure := false // whether any request to delete pod failed
|
||||
patchFailure := false // whether any request to patch pod status failed
|
||||
|
||||
cs := clientsetfake.NewClientset(objs...)
|
||||
cs.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
name := action.(clienttesting.DeleteAction).GetName()
|
||||
if name == "fail-victim" {
|
||||
deletionFailure = true
|
||||
return true, nil, fmt.Errorf("delete pod failed")
|
||||
nodes := make([]*v1.Node, len(tt.nodeNames))
|
||||
for i, nodeName := range tt.nodeNames {
|
||||
nodes[i] = st.MakeNode().Name(nodeName).Capacity(veryLargeRes).Obj()
|
||||
}
|
||||
registeredPlugins := append([]tf.RegisterPluginFunc{
|
||||
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New)},
|
||||
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
|
||||
)
|
||||
var objs []runtime.Object
|
||||
for _, pod := range tt.testPods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
|
||||
deletedPods.Insert(name)
|
||||
return true, nil, nil
|
||||
})
|
||||
mu := &sync.RWMutex{}
|
||||
deletedPods := sets.New[string]()
|
||||
deletionFailure := false // whether any request to delete pod failed
|
||||
patchFailure := false // whether any request to patch pod status failed
|
||||
|
||||
cs.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if action.(clienttesting.PatchAction).GetName() == "fail-victim" {
|
||||
patchFailure = true
|
||||
return true, nil, fmt.Errorf("patch pod status failed")
|
||||
}
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
informerFactory := informers.NewSharedInformerFactory(cs, 0)
|
||||
eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: cs.EventsV1()})
|
||||
fakeActivator := &fakePodActivator{activatedPods: make(map[string]*v1.Pod), mu: mu}
|
||||
|
||||
// Note: NominatedPodsForNode is called at the beginning of the goroutine in any case.
|
||||
// fakePodNominator can delay the response of NominatedPodsForNode until the channel is closed,
|
||||
// which allows us to test the preempting map before the goroutine does nothing yet.
|
||||
requestStopper := make(chan struct{})
|
||||
nominator := &fakePodNominator{
|
||||
SchedulingQueue: internalqueue.NewSchedulingQueue(nil, informerFactory),
|
||||
requestStopper: requestStopper,
|
||||
}
|
||||
fwk, err := tf.NewFramework(
|
||||
ctx,
|
||||
registeredPlugins, "",
|
||||
frameworkruntime.WithClientSet(cs),
|
||||
frameworkruntime.WithLogger(logger),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
|
||||
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.testPods, nodes)),
|
||||
frameworkruntime.WithPodNominator(nominator),
|
||||
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, "test-scheduler")),
|
||||
frameworkruntime.WithPodActivator(fakeActivator),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
informerFactory.Start(ctx.Done())
|
||||
informerFactory.WaitForCacheSync(ctx.Done())
|
||||
fakePreemptionScorePostFilterPlugin := &FakePreemptionScorePostFilterPlugin{}
|
||||
pe := NewEvaluator("FakePreemptionScorePostFilter", fwk, fakePreemptionScorePostFilterPlugin, asyncPreemptionEnabled)
|
||||
|
||||
if asyncPreemptionEnabled {
|
||||
pe.prepareCandidateAsync(tt.candidate, tt.preemptor, "test-plugin")
|
||||
pe.mu.Lock()
|
||||
// The preempting map should be registered synchronously
|
||||
// so we don't need wait.Poll.
|
||||
if !tt.expectedPreemptingMap.Equal(pe.preempting) {
|
||||
t.Errorf("expected preempting map %v, got %v", tt.expectedPreemptingMap, pe.preempting)
|
||||
close(requestStopper)
|
||||
pe.mu.Unlock()
|
||||
return
|
||||
}
|
||||
pe.mu.Unlock()
|
||||
// make the requests complete
|
||||
close(requestStopper)
|
||||
} else {
|
||||
close(requestStopper) // no need to stop requests
|
||||
status := pe.prepareCandidate(ctx, tt.candidate, tt.preemptor, "test-plugin")
|
||||
if tt.expectedStatus == nil {
|
||||
if status != nil {
|
||||
t.Errorf("expect nil status, but got %v", status)
|
||||
cs := clientsetfake.NewClientset(objs...)
|
||||
cs.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
name := action.(clienttesting.DeleteAction).GetName()
|
||||
if name == "fail-victim" {
|
||||
deletionFailure = true
|
||||
return true, nil, errDeletePodFailed
|
||||
}
|
||||
} else {
|
||||
if status == nil {
|
||||
t.Errorf("expect status %v, but got nil", tt.expectedStatus)
|
||||
} else if status.Code() != tt.expectedStatus.Code() {
|
||||
t.Errorf("expect status code %v, but got %v", tt.expectedStatus.Code(), status.Code())
|
||||
} else if status.Message() != tt.expectedStatus.Message() {
|
||||
t.Errorf("expect status message %v, but got %v", tt.expectedStatus.Message(), status.Message())
|
||||
|
||||
deletedPods.Insert(name)
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
cs.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if action.(clienttesting.PatchAction).GetName() == "fail-victim" {
|
||||
patchFailure = true
|
||||
return true, nil, errPatchStatusFailed
|
||||
}
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
informerFactory := informers.NewSharedInformerFactory(cs, 0)
|
||||
eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: cs.EventsV1()})
|
||||
fakeActivator := &fakePodActivator{activatedPods: make(map[string]*v1.Pod), mu: mu}
|
||||
|
||||
// Note: NominatedPodsForNode is called at the beginning of the goroutine in any case.
|
||||
// fakePodNominator can delay the response of NominatedPodsForNode until the channel is closed,
|
||||
// which allows us to test the preempting map before the goroutine does nothing yet.
|
||||
requestStopper := make(chan struct{})
|
||||
nominator := &fakePodNominator{
|
||||
SchedulingQueue: internalqueue.NewSchedulingQueue(nil, informerFactory),
|
||||
requestStopper: requestStopper,
|
||||
}
|
||||
}
|
||||
|
||||
var lastErrMsg string
|
||||
if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
mu.RLock()
|
||||
defer mu.RUnlock()
|
||||
|
||||
pe.mu.Lock()
|
||||
defer pe.mu.Unlock()
|
||||
if len(pe.preempting) != 0 {
|
||||
// The preempting map should be empty after the goroutine in all test cases.
|
||||
lastErrMsg = fmt.Sprintf("expected no preempting pods, got %v", pe.preempting)
|
||||
return false, nil
|
||||
var apiDispatcher *apidispatcher.APIDispatcher
|
||||
if asyncAPICallsEnabled {
|
||||
apiDispatcher = apidispatcher.New(cs, 16, apicalls.Relevances)
|
||||
apiDispatcher.Run(logger)
|
||||
defer apiDispatcher.Close()
|
||||
}
|
||||
|
||||
if tt.expectedDeletionError != deletionFailure {
|
||||
lastErrMsg = fmt.Sprintf("expected deletion error %v, got %v", tt.expectedDeletionError, deletionFailure)
|
||||
return false, nil
|
||||
fwk, err := tf.NewFramework(
|
||||
ctx,
|
||||
registeredPlugins, "",
|
||||
frameworkruntime.WithClientSet(cs),
|
||||
frameworkruntime.WithAPIDispatcher(apiDispatcher),
|
||||
frameworkruntime.WithLogger(logger),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
|
||||
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.testPods, nodes)),
|
||||
frameworkruntime.WithPodNominator(nominator),
|
||||
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, "test-scheduler")),
|
||||
frameworkruntime.WithPodActivator(fakeActivator),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if tt.expectedPatchError != patchFailure {
|
||||
lastErrMsg = fmt.Sprintf("expected patch error %v, got %v", tt.expectedPatchError, patchFailure)
|
||||
return false, nil
|
||||
informerFactory.Start(ctx.Done())
|
||||
informerFactory.WaitForCacheSync(ctx.Done())
|
||||
fakePreemptionScorePostFilterPlugin := &FakePreemptionScorePostFilterPlugin{}
|
||||
if asyncAPICallsEnabled {
|
||||
cache := internalcache.New(ctx, 100*time.Millisecond, apiDispatcher)
|
||||
fwk.SetAPICacher(apicache.New(nil, cache))
|
||||
}
|
||||
|
||||
pe := NewEvaluator("FakePreemptionScorePostFilter", fwk, fakePreemptionScorePostFilterPlugin, asyncPreemptionEnabled)
|
||||
|
||||
if asyncPreemptionEnabled {
|
||||
if diff := cmp.Diff(tt.expectedActivatedPods, fakeActivator.activatedPods); tt.expectedActivatedPods != nil && diff != "" {
|
||||
lastErrMsg = fmt.Sprintf("Unexpected activated pods (-want,+got):\n%s", diff)
|
||||
return false, nil
|
||||
pe.prepareCandidateAsync(tt.candidate, tt.preemptor, "test-plugin")
|
||||
pe.mu.Lock()
|
||||
// The preempting map should be registered synchronously
|
||||
// so we don't need wait.Poll.
|
||||
if !tt.expectedPreemptingMap.Equal(pe.preempting) {
|
||||
t.Errorf("expected preempting map %v, got %v", tt.expectedPreemptingMap, pe.preempting)
|
||||
close(requestStopper)
|
||||
pe.mu.Unlock()
|
||||
return
|
||||
}
|
||||
if tt.expectedActivatedPods == nil && len(fakeActivator.activatedPods) != 0 {
|
||||
lastErrMsg = fmt.Sprintf("expected no activated pods, got %v", fakeActivator.activatedPods)
|
||||
return false, nil
|
||||
pe.mu.Unlock()
|
||||
// make the requests complete
|
||||
close(requestStopper)
|
||||
} else {
|
||||
close(requestStopper) // no need to stop requests
|
||||
status := pe.prepareCandidate(ctx, tt.candidate, tt.preemptor, "test-plugin")
|
||||
if tt.expectedStatus == nil {
|
||||
if status != nil {
|
||||
t.Errorf("expect nil status, but got %v", status)
|
||||
}
|
||||
} else {
|
||||
if !cmp.Equal(status, tt.expectedStatus) {
|
||||
t.Errorf("expect status %v, but got %v", tt.expectedStatus, status)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if deletedPods.Len() > 1 {
|
||||
// For now, we only expect at most one pod to be deleted in all test cases.
|
||||
// If we need to test multiple pods deletion, we need to update the test table definition.
|
||||
return false, fmt.Errorf("expected at most one pod to be deleted, got %v", deletedPods.UnsortedList())
|
||||
}
|
||||
var lastErrMsg string
|
||||
if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
mu.RLock()
|
||||
defer mu.RUnlock()
|
||||
|
||||
if len(tt.expectedDeletedPod) == 0 {
|
||||
if deletedPods.Len() != 0 {
|
||||
// When tt.expectedDeletedPod is empty, we expect no pod to be deleted.
|
||||
return false, fmt.Errorf("expected no pod to be deleted, got %v", deletedPods.UnsortedList())
|
||||
pe.mu.Lock()
|
||||
defer pe.mu.Unlock()
|
||||
if len(pe.preempting) != 0 {
|
||||
// The preempting map should be empty after the goroutine in all test cases.
|
||||
lastErrMsg = fmt.Sprintf("expected no preempting pods, got %v", pe.preempting)
|
||||
return false, nil
|
||||
}
|
||||
// nothing further to check.
|
||||
|
||||
if tt.expectedDeletionError != deletionFailure {
|
||||
lastErrMsg = fmt.Sprintf("expected deletion error %v, got %v", tt.expectedDeletionError, deletionFailure)
|
||||
return false, nil
|
||||
}
|
||||
if tt.expectedPatchError != patchFailure {
|
||||
lastErrMsg = fmt.Sprintf("expected patch error %v, got %v", tt.expectedPatchError, patchFailure)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if asyncPreemptionEnabled {
|
||||
if diff := cmp.Diff(tt.expectedActivatedPods, fakeActivator.activatedPods); tt.expectedActivatedPods != nil && diff != "" {
|
||||
lastErrMsg = fmt.Sprintf("Unexpected activated pods (-want,+got):\n%s", diff)
|
||||
return false, nil
|
||||
}
|
||||
if tt.expectedActivatedPods == nil && len(fakeActivator.activatedPods) != 0 {
|
||||
lastErrMsg = fmt.Sprintf("expected no activated pods, got %v", fakeActivator.activatedPods)
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
if deletedPods.Len() > 1 {
|
||||
// For now, we only expect at most one pod to be deleted in all test cases.
|
||||
// If we need to test multiple pods deletion, we need to update the test table definition.
|
||||
return false, fmt.Errorf("expected at most one pod to be deleted, got %v", deletedPods.UnsortedList())
|
||||
}
|
||||
|
||||
if len(tt.expectedDeletedPod) == 0 {
|
||||
if deletedPods.Len() != 0 {
|
||||
// When tt.expectedDeletedPod is empty, we expect no pod to be deleted.
|
||||
return false, fmt.Errorf("expected no pod to be deleted, got %v", deletedPods.UnsortedList())
|
||||
}
|
||||
// nothing further to check.
|
||||
return true, nil
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, podName := range tt.expectedDeletedPod {
|
||||
if deletedPods.Has(podName) ||
|
||||
// If podName is empty, we expect no pod to be deleted.
|
||||
(deletedPods.Len() == 0 && podName == "") {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
lastErrMsg = fmt.Sprintf("expected pod %v to be deleted, but %v is deleted", strings.Join(tt.expectedDeletedPod, " or "), deletedPods.UnsortedList())
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}); err != nil {
|
||||
t.Fatal(lastErrMsg)
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, podName := range tt.expectedDeletedPod {
|
||||
if deletedPods.Has(podName) ||
|
||||
// If podName is empty, we expect no pod to be deleted.
|
||||
(deletedPods.Len() == 0 && podName == "") {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
lastErrMsg = fmt.Sprintf("expected pod %v to be deleted, but %v is deleted", strings.Join(tt.expectedDeletedPod, " or "), deletedPods.UnsortedList())
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}); err != nil {
|
||||
t.Fatal(lastErrMsg)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1032,10 +1050,15 @@ func TestCallExtenders(t *testing.T) {
|
||||
objs = append(objs, preemptor)
|
||||
cs := clientsetfake.NewClientset(objs...)
|
||||
informerFactory := informers.NewSharedInformerFactory(cs, 0)
|
||||
apiDispatcher := apidispatcher.New(cs, 16, apicalls.Relevances)
|
||||
apiDispatcher.Run(logger)
|
||||
defer apiDispatcher.Close()
|
||||
|
||||
fwk, err := tf.NewFramework(
|
||||
ctx,
|
||||
registeredPlugins, "",
|
||||
frameworkruntime.WithClientSet(cs),
|
||||
frameworkruntime.WithAPIDispatcher(apiDispatcher),
|
||||
frameworkruntime.WithLogger(logger),
|
||||
frameworkruntime.WithExtenders(tt.extenders),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
@@ -1047,6 +1070,8 @@ func TestCallExtenders(t *testing.T) {
|
||||
}
|
||||
informerFactory.Start(ctx.Done())
|
||||
informerFactory.WaitForCacheSync(ctx.Done())
|
||||
cache := internalcache.New(ctx, 100*time.Millisecond, apiDispatcher)
|
||||
fwk.SetAPICacher(apicache.New(nil, cache))
|
||||
|
||||
fakePreemptionScorePostFilterPlugin := &FakePreemptionScorePostFilterPlugin{}
|
||||
pe := Evaluator{
|
||||
@@ -1075,3 +1100,85 @@ func TestCallExtenders(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveNominatedNodeName(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
currentNominatedNodeName string
|
||||
newNominatedNodeName string
|
||||
expectPatchRequest bool
|
||||
expectedPatchData string
|
||||
}{
|
||||
{
|
||||
name: "Should make patch request to clear node name",
|
||||
currentNominatedNodeName: "node1",
|
||||
expectPatchRequest: true,
|
||||
expectedPatchData: `{"status":{"nominatedNodeName":null}}`,
|
||||
},
|
||||
{
|
||||
name: "Should not make patch request if nominated node is already cleared",
|
||||
currentNominatedNodeName: "",
|
||||
expectPatchRequest: false,
|
||||
},
|
||||
}
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
actualPatchRequests := 0
|
||||
var actualPatchData string
|
||||
cs := &clientsetfake.Clientset{}
|
||||
patchCalled := make(chan struct{}, 1)
|
||||
cs.AddReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
actualPatchRequests++
|
||||
patch := action.(clienttesting.PatchAction)
|
||||
actualPatchData = string(patch.GetPatch())
|
||||
patchCalled <- struct{}{}
|
||||
// For this test, we don't care about the result of the patched pod, just that we got the expected
|
||||
// patch request, so just returning &v1.Pod{} here is OK because scheduler doesn't use the response.
|
||||
return true, &v1.Pod{}, nil
|
||||
})
|
||||
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
|
||||
Status: v1.PodStatus{NominatedNodeName: test.currentNominatedNodeName},
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
var apiCacher framework.APICacher
|
||||
if asyncAPICallsEnabled {
|
||||
apiDispatcher := apidispatcher.New(cs, 16, apicalls.Relevances)
|
||||
apiDispatcher.Run(logger)
|
||||
defer apiDispatcher.Close()
|
||||
|
||||
informerFactory := informers.NewSharedInformerFactory(cs, 0)
|
||||
queue := internalqueue.NewSchedulingQueue(nil, informerFactory, internalqueue.WithAPIDispatcher(apiDispatcher))
|
||||
apiCacher = apicache.New(queue, nil)
|
||||
}
|
||||
|
||||
if err := clearNominatedNodeName(ctx, cs, apiCacher, pod); err != nil {
|
||||
t.Fatalf("Error calling removeNominatedNodeName: %v", err)
|
||||
}
|
||||
|
||||
if test.expectPatchRequest {
|
||||
select {
|
||||
case <-patchCalled:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatalf("Timed out while waiting for patch to be called")
|
||||
}
|
||||
if actualPatchData != test.expectedPatchData {
|
||||
t.Fatalf("Patch data mismatch: Actual was %v, but expected %v", actualPatchData, test.expectedPatchData)
|
||||
}
|
||||
} else {
|
||||
select {
|
||||
case <-patchCalled:
|
||||
t.Fatalf("Expected patch not to be called, actual patch data: %v", actualPatchData)
|
||||
case <-time.After(time.Second):
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ import (
|
||||
"k8s.io/klog/v2"
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
@@ -86,6 +87,8 @@ type frameworkImpl struct {
|
||||
extenders []framework.Extender
|
||||
framework.PodNominator
|
||||
framework.PodActivator
|
||||
apiDispatcher *apidispatcher.APIDispatcher
|
||||
apiCacher framework.APICacher
|
||||
|
||||
parallelizer parallelize.Parallelizer
|
||||
}
|
||||
@@ -138,6 +141,7 @@ type frameworkOptions struct {
|
||||
captureProfile CaptureProfile
|
||||
parallelizer parallelize.Parallelizer
|
||||
waitingPods *waitingPodsMap
|
||||
apiDispatcher *apidispatcher.APIDispatcher
|
||||
logger *klog.Logger
|
||||
}
|
||||
|
||||
@@ -223,6 +227,13 @@ func WithParallelism(parallelism int) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithAPIDispatcher sets API dispatcher for the scheduling frameworkImpl.
|
||||
func WithAPIDispatcher(apiDispatcher *apidispatcher.APIDispatcher) Option {
|
||||
return func(o *frameworkOptions) {
|
||||
o.apiDispatcher = apiDispatcher
|
||||
}
|
||||
}
|
||||
|
||||
// CaptureProfile is a callback to capture a finalized profile.
|
||||
type CaptureProfile func(config.KubeSchedulerProfile)
|
||||
|
||||
@@ -289,6 +300,7 @@ func NewFramework(ctx context.Context, r Registry, profile *config.KubeScheduler
|
||||
extenders: options.extenders,
|
||||
PodNominator: options.podNominator,
|
||||
PodActivator: options.podActivator,
|
||||
apiDispatcher: options.apiDispatcher,
|
||||
parallelizer: options.parallelizer,
|
||||
logger: logger,
|
||||
}
|
||||
@@ -441,6 +453,10 @@ func (f *frameworkImpl) SetPodActivator(a framework.PodActivator) {
|
||||
f.PodActivator = a
|
||||
}
|
||||
|
||||
func (f *frameworkImpl) SetAPICacher(c framework.APICacher) {
|
||||
f.apiCacher = c
|
||||
}
|
||||
|
||||
// Close closes each plugin, when they implement io.Closer interface.
|
||||
func (f *frameworkImpl) Close() error {
|
||||
var errs []error
|
||||
@@ -1679,3 +1695,22 @@ func (f *frameworkImpl) PercentageOfNodesToScore() *int32 {
|
||||
func (f *frameworkImpl) Parallelizer() parallelize.Parallelizer {
|
||||
return f.parallelizer
|
||||
}
|
||||
|
||||
// APIDispatcher returns an apiDispatcher that can be used to dispatch API calls.
|
||||
// This requires SchedulerAsyncAPICalls feature gate to be enabled.
|
||||
func (f *frameworkImpl) APIDispatcher() fwk.APIDispatcher {
|
||||
if f.apiDispatcher == nil {
|
||||
return nil
|
||||
}
|
||||
return f.apiDispatcher
|
||||
}
|
||||
|
||||
// APICacher returns an apiCacher that can be used to dispatch API calls through scheduler's cache
|
||||
// instead of directly using APIDispatcher().
|
||||
// This requires SchedulerAsyncAPICalls feature gate to be enabled.
|
||||
func (f *frameworkImpl) APICacher() framework.APICacher {
|
||||
if f.apiCacher == nil {
|
||||
return nil
|
||||
}
|
||||
return f.apiCacher
|
||||
}
|
||||
|
||||
@@ -1090,7 +1090,7 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo
|
||||
|
||||
msg := truncateMessage(errMsg)
|
||||
fwk.EventRecorder().Eventf(pod, nil, v1.EventTypeWarning, "FailedScheduling", "Scheduling", msg)
|
||||
if err := updatePod(ctx, sched.client, pod, &v1.PodCondition{
|
||||
if err := updatePod(ctx, sched.client, fwk.APICacher(), pod, &v1.PodCondition{
|
||||
Type: v1.PodScheduled,
|
||||
ObservedGeneration: podutil.CalculatePodConditionObservedGeneration(&pod.Status, pod.Generation, v1.PodScheduled),
|
||||
Status: v1.ConditionFalse,
|
||||
@@ -1111,7 +1111,12 @@ func truncateMessage(message string) string {
|
||||
return message[:max-len(suffix)] + suffix
|
||||
}
|
||||
|
||||
func updatePod(ctx context.Context, client clientset.Interface, pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) error {
|
||||
func updatePod(ctx context.Context, client clientset.Interface, apiCacher framework.APICacher, pod *v1.Pod, condition *v1.PodCondition, nominatingInfo *framework.NominatingInfo) error {
|
||||
if apiCacher != nil {
|
||||
// When API cacher is available, use it to patch the status.
|
||||
_, err := apiCacher.PatchPodStatus(pod, condition, nominatingInfo)
|
||||
return err
|
||||
}
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(3).Info("Updating pod condition", "pod", klog.KObj(pod), "conditionType", condition.Type, "conditionStatus", condition.Status, "conditionReason", condition.Reason)
|
||||
podStatusCopy := pod.Status.DeepCopy()
|
||||
@@ -1124,5 +1129,5 @@ func updatePod(ctx context.Context, client clientset.Interface, pod *v1.Pod, con
|
||||
if nnnNeedsUpdate {
|
||||
podStatusCopy.NominatedNodeName = nominatingInfo.NominatedNodeName
|
||||
}
|
||||
return util.PatchPodStatus(ctx, client, pod, podStatusCopy)
|
||||
return util.PatchPodStatus(ctx, client, pod.Name, pod.Namespace, &pod.Status, podStatusCopy)
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -40,10 +40,13 @@ import (
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config/scheme"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
cachedebugger "k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger"
|
||||
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
|
||||
frameworkplugins "k8s.io/kubernetes/pkg/scheduler/framework/plugins"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources"
|
||||
@@ -93,6 +96,12 @@ type Scheduler struct {
|
||||
// SchedulingQueue holds pods to be scheduled
|
||||
SchedulingQueue internalqueue.SchedulingQueue
|
||||
|
||||
// If possible, indirect operation on APIDispatcher, e.g. through SchedulingQueue, is preferred.
|
||||
// Is nil iff SchedulerAsyncAPICalls feature gate is disabled.
|
||||
// Adding a call to APIDispatcher should not be done directly by in-tree usages.
|
||||
// framework.APICache should be used instead.
|
||||
APIDispatcher *apidispatcher.APIDispatcher
|
||||
|
||||
// Profiles are the scheduling profiles.
|
||||
Profiles profile.Map
|
||||
|
||||
@@ -331,6 +340,10 @@ func New(ctx context.Context,
|
||||
}
|
||||
draManager = dynamicresources.NewDRAManager(ctx, resourceClaimCache, resourceSliceTracker, informerFactory)
|
||||
}
|
||||
var apiDispatcher *apidispatcher.APIDispatcher
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerAsyncAPICalls) {
|
||||
apiDispatcher = apidispatcher.New(client, int(options.parallelism), apicalls.Relevances)
|
||||
}
|
||||
|
||||
profiles, err := profile.NewMap(ctx, options.profiles, registry, recorderFactory,
|
||||
frameworkruntime.WithComponentConfigVersion(options.componentConfigVersion),
|
||||
@@ -344,6 +357,7 @@ func New(ctx context.Context,
|
||||
frameworkruntime.WithExtenders(extenders),
|
||||
frameworkruntime.WithMetricsRecorder(metricsRecorder),
|
||||
frameworkruntime.WithWaitingPods(waitingPods),
|
||||
frameworkruntime.WithAPIDispatcher(apiDispatcher),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("initializing profiles: %v", err)
|
||||
@@ -385,15 +399,22 @@ func New(ctx context.Context,
|
||||
internalqueue.WithQueueingHintMapPerProfile(queueingHintsPerProfile),
|
||||
internalqueue.WithPluginMetricsSamplePercent(pluginMetricsSamplePercent),
|
||||
internalqueue.WithMetricsRecorder(*metricsRecorder),
|
||||
internalqueue.WithAPIDispatcher(apiDispatcher),
|
||||
)
|
||||
|
||||
schedulerCache := internalcache.New(ctx, durationToExpireAssumedPod, apiDispatcher)
|
||||
|
||||
var apiCache framework.APICacher
|
||||
if apiDispatcher != nil {
|
||||
apiCache = apicache.New(podQueue, schedulerCache)
|
||||
}
|
||||
|
||||
for _, fwk := range profiles {
|
||||
fwk.SetPodNominator(podQueue)
|
||||
fwk.SetPodActivator(podQueue)
|
||||
fwk.SetAPICacher(apiCache)
|
||||
}
|
||||
|
||||
schedulerCache := internalcache.New(ctx, durationToExpireAssumedPod)
|
||||
|
||||
// Setup cache debugger.
|
||||
debugger := cachedebugger.New(nodeLister, podLister, schedulerCache, podQueue)
|
||||
debugger.ListenForSignal(ctx)
|
||||
@@ -408,6 +429,7 @@ func New(ctx context.Context,
|
||||
SchedulingQueue: podQueue,
|
||||
Profiles: profiles,
|
||||
logger: logger,
|
||||
APIDispatcher: apiDispatcher,
|
||||
}
|
||||
sched.NextPod = podQueue.Pop
|
||||
sched.applyDefaultHandlers()
|
||||
@@ -499,6 +521,10 @@ func (sched *Scheduler) Run(ctx context.Context) {
|
||||
logger := klog.FromContext(ctx)
|
||||
sched.SchedulingQueue.Run(logger)
|
||||
|
||||
if sched.APIDispatcher != nil {
|
||||
go sched.APIDispatcher.Run(logger)
|
||||
}
|
||||
|
||||
// We need to start scheduleOne loop in a dedicated goroutine,
|
||||
// because scheduleOne function hangs on getting the next item
|
||||
// from the SchedulingQueue.
|
||||
@@ -508,6 +534,9 @@ func (sched *Scheduler) Run(ctx context.Context) {
|
||||
go wait.UntilWithContext(ctx, sched.ScheduleOne, 0)
|
||||
|
||||
<-ctx.Done()
|
||||
if sched.APIDispatcher != nil {
|
||||
sched.APIDispatcher.Close()
|
||||
}
|
||||
sched.SchedulingQueue.Close()
|
||||
|
||||
// If the plugins satisfy the io.Closer interface, they are closed.
|
||||
|
||||
@@ -53,9 +53,12 @@ import (
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config/testing/defaults"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_cache"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/api_dispatcher"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/api_calls"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
|
||||
@@ -282,97 +285,125 @@ func TestFailureHandler(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, tt := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", tt.name, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
client := fake.NewClientset(&v1.PodList{Items: []v1.Pod{*testPod}})
|
||||
informerFactory := informers.NewSharedInformerFactory(client, 0)
|
||||
podInformer := informerFactory.Core().V1().Pods()
|
||||
// Need to add/update/delete testPod to the store.
|
||||
if err := podInformer.Informer().GetStore().Add(testPod); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var apiDispatcher *apidispatcher.APIDispatcher
|
||||
if asyncAPICallsEnabled {
|
||||
apiDispatcher = apidispatcher.New(client, 16, apicalls.Relevances)
|
||||
apiDispatcher.Run(logger)
|
||||
defer apiDispatcher.Close()
|
||||
}
|
||||
|
||||
recorder := metrics.NewMetricsAsyncRecorder(3, 20*time.Microsecond, ctx.Done())
|
||||
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())), internalqueue.WithMetricsRecorder(*recorder), internalqueue.WithAPIDispatcher(apiDispatcher))
|
||||
schedulerCache := internalcache.New(ctx, 30*time.Second, apiDispatcher)
|
||||
|
||||
queue.Add(logger, testPod)
|
||||
|
||||
if _, err := queue.Pop(logger); err != nil {
|
||||
t.Fatalf("Pop failed: %v", err)
|
||||
}
|
||||
|
||||
if tt.podUpdatedDuringScheduling {
|
||||
if err := podInformer.Informer().GetStore().Update(testPodUpdated); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
queue.Update(logger, testPod, testPodUpdated)
|
||||
}
|
||||
if tt.podDeletedDuringScheduling {
|
||||
if err := podInformer.Informer().GetStore().Delete(testPod); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
queue.Delete(testPod)
|
||||
}
|
||||
|
||||
s, schedFramework, err := initScheduler(ctx, schedulerCache, queue, apiDispatcher, client, informerFactory)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
testPodInfo := &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(t, testPod)}
|
||||
s.FailureHandler(ctx, schedFramework, testPodInfo, fwk.NewStatus(fwk.Unschedulable), nil, time.Now())
|
||||
|
||||
var got *v1.Pod
|
||||
if tt.podUpdatedDuringScheduling {
|
||||
pInfo, ok := queue.GetPod(testPod.Name, testPod.Namespace)
|
||||
if !ok {
|
||||
t.Fatalf("Failed to get pod %s/%s from queue", testPod.Namespace, testPod.Name)
|
||||
}
|
||||
got = pInfo.Pod
|
||||
} else {
|
||||
got = getPodFromPriorityQueue(queue, testPod)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(tt.expect, got); diff != "" {
|
||||
t.Errorf("Unexpected pod (-want, +got): %s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFailureHandler_PodAlreadyBound(t *testing.T) {
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
t.Run(fmt.Sprintf("Async API calls enabled: %v", asyncAPICallsEnabled), func(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
client := fake.NewClientset(&v1.PodList{Items: []v1.Pod{*testPod}})
|
||||
nodeFoo := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}
|
||||
testPod := st.MakePod().Name("test-pod").Namespace(v1.NamespaceDefault).Node("foo").Obj()
|
||||
|
||||
client := fake.NewClientset(&v1.PodList{Items: []v1.Pod{*testPod}}, &v1.NodeList{Items: []v1.Node{nodeFoo}})
|
||||
informerFactory := informers.NewSharedInformerFactory(client, 0)
|
||||
podInformer := informerFactory.Core().V1().Pods()
|
||||
// Need to add/update/delete testPod to the store.
|
||||
podInformer.Informer().GetStore().Add(testPod)
|
||||
|
||||
recorder := metrics.NewMetricsAsyncRecorder(3, 20*time.Microsecond, ctx.Done())
|
||||
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())), internalqueue.WithMetricsRecorder(*recorder))
|
||||
schedulerCache := internalcache.New(ctx, 30*time.Second)
|
||||
|
||||
queue.Add(logger, testPod)
|
||||
|
||||
if _, err := queue.Pop(logger); err != nil {
|
||||
t.Fatalf("Pop failed: %v", err)
|
||||
// Need to add testPod to the store.
|
||||
if err := podInformer.Informer().GetStore().Add(testPod); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if tt.podUpdatedDuringScheduling {
|
||||
podInformer.Informer().GetStore().Update(testPodUpdated)
|
||||
queue.Update(logger, testPod, testPodUpdated)
|
||||
}
|
||||
if tt.podDeletedDuringScheduling {
|
||||
podInformer.Informer().GetStore().Delete(testPod)
|
||||
queue.Delete(testPod)
|
||||
var apiDispatcher *apidispatcher.APIDispatcher
|
||||
if asyncAPICallsEnabled {
|
||||
apiDispatcher = apidispatcher.New(client, 16, apicalls.Relevances)
|
||||
apiDispatcher.Run(logger)
|
||||
defer apiDispatcher.Close()
|
||||
}
|
||||
|
||||
s, schedFramework, err := initScheduler(ctx, schedulerCache, queue, client, informerFactory)
|
||||
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())), internalqueue.WithAPIDispatcher(apiDispatcher))
|
||||
schedulerCache := internalcache.New(ctx, 30*time.Second, apiDispatcher)
|
||||
|
||||
// Add node to schedulerCache no matter it's deleted in API server or not.
|
||||
schedulerCache.AddNode(logger, &nodeFoo)
|
||||
|
||||
s, schedFramework, err := initScheduler(ctx, schedulerCache, queue, apiDispatcher, client, informerFactory)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
testPodInfo := &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(t, testPod)}
|
||||
s.FailureHandler(ctx, schedFramework, testPodInfo, fwk.NewStatus(fwk.Unschedulable), nil, time.Now())
|
||||
s.FailureHandler(ctx, schedFramework, testPodInfo, fwk.NewStatus(fwk.Unschedulable).WithError(fmt.Errorf("binding rejected: timeout")), nil, time.Now())
|
||||
|
||||
var got *v1.Pod
|
||||
if tt.podUpdatedDuringScheduling {
|
||||
pInfo, ok := queue.GetPod(testPod.Name, testPod.Namespace)
|
||||
if !ok {
|
||||
t.Fatalf("Failed to get pod %s/%s from queue", testPod.Namespace, testPod.Name)
|
||||
}
|
||||
got = pInfo.Pod
|
||||
} else {
|
||||
got = getPodFromPriorityQueue(queue, testPod)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(tt.expect, got); diff != "" {
|
||||
t.Errorf("Unexpected pod (-want, +got): %s", diff)
|
||||
pod := getPodFromPriorityQueue(queue, testPod)
|
||||
if pod != nil {
|
||||
t.Fatalf("Unexpected pod: %v should not be in PriorityQueue when the NodeName of pod is not empty", pod.Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFailureHandler_PodAlreadyBound(t *testing.T) {
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
nodeFoo := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}
|
||||
testPod := st.MakePod().Name("test-pod").Namespace(v1.NamespaceDefault).Node("foo").Obj()
|
||||
|
||||
client := fake.NewClientset(&v1.PodList{Items: []v1.Pod{*testPod}}, &v1.NodeList{Items: []v1.Node{nodeFoo}})
|
||||
informerFactory := informers.NewSharedInformerFactory(client, 0)
|
||||
podInformer := informerFactory.Core().V1().Pods()
|
||||
// Need to add testPod to the store.
|
||||
podInformer.Informer().GetStore().Add(testPod)
|
||||
|
||||
queue := internalqueue.NewPriorityQueue(nil, informerFactory, internalqueue.WithClock(testingclock.NewFakeClock(time.Now())))
|
||||
schedulerCache := internalcache.New(ctx, 30*time.Second)
|
||||
|
||||
// Add node to schedulerCache no matter it's deleted in API server or not.
|
||||
schedulerCache.AddNode(logger, &nodeFoo)
|
||||
|
||||
s, schedFramework, err := initScheduler(ctx, schedulerCache, queue, client, informerFactory)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
testPodInfo := &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(t, testPod)}
|
||||
s.FailureHandler(ctx, schedFramework, testPodInfo, fwk.NewStatus(fwk.Unschedulable).WithError(fmt.Errorf("binding rejected: timeout")), nil, time.Now())
|
||||
|
||||
pod := getPodFromPriorityQueue(queue, testPod)
|
||||
if pod != nil {
|
||||
t.Fatalf("Unexpected pod: %v should not be in PriorityQueue when the NodeName of pod is not empty", pod.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWithPercentageOfNodesToScore tests scheduler's PercentageOfNodesToScore is set correctly.
|
||||
func TestWithPercentageOfNodesToScore(t *testing.T) {
|
||||
tests := []struct {
|
||||
@@ -445,7 +476,7 @@ func getPodFromPriorityQueue(queue *internalqueue.PriorityQueue, pod *v1.Pod) *v
|
||||
return nil
|
||||
}
|
||||
|
||||
func initScheduler(ctx context.Context, cache internalcache.Cache, queue internalqueue.SchedulingQueue,
|
||||
func initScheduler(ctx context.Context, cache internalcache.Cache, queue internalqueue.SchedulingQueue, apiDispatcher *apidispatcher.APIDispatcher,
|
||||
client kubernetes.Interface, informerFactory informers.SharedInformerFactory) (*Scheduler, framework.Framework, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
registerPluginFuncs := []tf.RegisterPluginFunc{
|
||||
@@ -458,6 +489,7 @@ func initScheduler(ctx context.Context, cache internalcache.Cache, queue interna
|
||||
registerPluginFuncs,
|
||||
testSchedulerName,
|
||||
frameworkruntime.WithClientSet(client),
|
||||
frameworkruntime.WithAPIDispatcher(apiDispatcher),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, testSchedulerName)),
|
||||
frameworkruntime.WithWaitingPods(waitingPods),
|
||||
@@ -465,12 +497,16 @@ func initScheduler(ctx context.Context, cache internalcache.Cache, queue interna
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if apiDispatcher != nil {
|
||||
fwk.SetAPICacher(apicache.New(queue, cache))
|
||||
}
|
||||
|
||||
s := &Scheduler{
|
||||
Cache: cache,
|
||||
client: client,
|
||||
StopEverything: ctx.Done(),
|
||||
SchedulingQueue: queue,
|
||||
APIDispatcher: apiDispatcher,
|
||||
Profiles: profile.Map{testSchedulerName: fwk},
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
@@ -26,7 +26,6 @@ import (
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apimachinery/pkg/util/net"
|
||||
"k8s.io/apimachinery/pkg/util/strategicpatch"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
@@ -101,12 +100,12 @@ func Retriable(err error) bool {
|
||||
|
||||
// PatchPodStatus calculates the delta bytes change from <old.Status> to <newStatus>,
|
||||
// and then submit a request to API server to patch the pod changes.
|
||||
func PatchPodStatus(ctx context.Context, cs kubernetes.Interface, old *v1.Pod, newStatus *v1.PodStatus) error {
|
||||
func PatchPodStatus(ctx context.Context, cs kubernetes.Interface, name string, namespace string, oldStatus *v1.PodStatus, newStatus *v1.PodStatus) error {
|
||||
if newStatus == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
oldData, err := json.Marshal(v1.Pod{Status: old.Status})
|
||||
oldData, err := json.Marshal(v1.Pod{Status: *oldStatus})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -117,7 +116,7 @@ func PatchPodStatus(ctx context.Context, cs kubernetes.Interface, old *v1.Pod, n
|
||||
}
|
||||
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Pod{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create merge patch for pod %q/%q: %v", old.Namespace, old.Name, err)
|
||||
return fmt.Errorf("failed to create merge patch for pod %q/%q: %w", namespace, name, err)
|
||||
}
|
||||
|
||||
if "{}" == string(patchBytes) {
|
||||
@@ -125,7 +124,7 @@ func PatchPodStatus(ctx context.Context, cs kubernetes.Interface, old *v1.Pod, n
|
||||
}
|
||||
|
||||
patchFn := func() error {
|
||||
_, err := cs.CoreV1().Pods(old.Namespace).Patch(ctx, old.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status")
|
||||
_, err := cs.CoreV1().Pods(namespace).Patch(ctx, name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -137,23 +136,6 @@ func DeletePod(ctx context.Context, cs kubernetes.Interface, pod *v1.Pod) error
|
||||
return cs.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
|
||||
}
|
||||
|
||||
// ClearNominatedNodeName internally submit a patch request to API server
|
||||
// to set each pods[*].Status.NominatedNodeName> to "".
|
||||
func ClearNominatedNodeName(ctx context.Context, cs kubernetes.Interface, pods ...*v1.Pod) utilerrors.Aggregate {
|
||||
var errs []error
|
||||
for _, p := range pods {
|
||||
if len(p.Status.NominatedNodeName) == 0 {
|
||||
continue
|
||||
}
|
||||
podStatusCopy := p.Status.DeepCopy()
|
||||
podStatusCopy.NominatedNodeName = ""
|
||||
if err := PatchPodStatus(ctx, cs, p, podStatusCopy); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
return utilerrors.NewAggregate(errs)
|
||||
}
|
||||
|
||||
// IsScalarResourceName validates the resource for Extended, Hugepages, Native and AttachableVolume resources
|
||||
func IsScalarResourceName(name v1.ResourceName) bool {
|
||||
return v1helper.IsExtendedResourceName(name) || v1helper.IsHugePageResourceName(name) ||
|
||||
|
||||
@@ -161,63 +161,6 @@ func TestMoreImportantPod(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveNominatedNodeName(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
currentNominatedNodeName string
|
||||
newNominatedNodeName string
|
||||
expectedPatchRequests int
|
||||
expectedPatchData string
|
||||
}{
|
||||
{
|
||||
name: "Should make patch request to clear node name",
|
||||
currentNominatedNodeName: "node1",
|
||||
expectedPatchRequests: 1,
|
||||
expectedPatchData: `{"status":{"nominatedNodeName":null}}`,
|
||||
},
|
||||
{
|
||||
name: "Should not make patch request if nominated node is already cleared",
|
||||
currentNominatedNodeName: "",
|
||||
expectedPatchRequests: 0,
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
actualPatchRequests := 0
|
||||
var actualPatchData string
|
||||
cs := &clientsetfake.Clientset{}
|
||||
cs.AddReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
|
||||
actualPatchRequests++
|
||||
patch := action.(clienttesting.PatchAction)
|
||||
actualPatchData = string(patch.GetPatch())
|
||||
// For this test, we don't care about the result of the patched pod, just that we got the expected
|
||||
// patch request, so just returning &v1.Pod{} here is OK because scheduler doesn't use the response.
|
||||
return true, &v1.Pod{}, nil
|
||||
})
|
||||
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
|
||||
Status: v1.PodStatus{NominatedNodeName: test.currentNominatedNodeName},
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
if err := ClearNominatedNodeName(ctx, cs, pod); err != nil {
|
||||
t.Fatalf("Error calling removeNominatedNodeName: %v", err)
|
||||
}
|
||||
|
||||
if actualPatchRequests != test.expectedPatchRequests {
|
||||
t.Fatalf("Actual patch requests (%d) dos not equal expected patch requests (%d)", actualPatchRequests, test.expectedPatchRequests)
|
||||
}
|
||||
|
||||
if test.expectedPatchRequests > 0 && actualPatchData != test.expectedPatchData {
|
||||
t.Fatalf("Patch data mismatch: Actual was %v, but expected %v", actualPatchData, test.expectedPatchData)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPatchPodStatus(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -366,7 +309,7 @@ func TestPatchPodStatus(t *testing.T) {
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
err = PatchPodStatus(ctx, client, &tc.pod, &tc.statusToUpdate)
|
||||
err = PatchPodStatus(ctx, client, tc.pod.Name, tc.pod.Namespace, &tc.pod.Status, &tc.statusToUpdate)
|
||||
if err != nil && tc.validateErr == nil {
|
||||
// shouldn't be error
|
||||
t.Fatal(err)
|
||||
|
||||
@@ -17,54 +17,70 @@ limitations under the License.
|
||||
package bind
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
st "k8s.io/kubernetes/pkg/scheduler/testing"
|
||||
testutil "k8s.io/kubernetes/test/integration/util"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
)
|
||||
|
||||
// TestDefaultBinder tests the binding process in the scheduler.
|
||||
func TestDefaultBinder(t *testing.T) {
|
||||
testCtx := testutil.InitTestSchedulerWithOptions(t, testutil.InitTestAPIServer(t, "", nil), 0)
|
||||
testutil.SyncSchedulerInformerFactory(testCtx)
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
t.Run(fmt.Sprintf("Async API calls enabled: %v", asyncAPICallsEnabled), func(t *testing.T) {
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
|
||||
|
||||
// Add a node.
|
||||
node, err := testutil.CreateNode(testCtx.ClientSet, st.MakeNode().Name("testnode").Obj())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
testCtx := testutil.InitTestSchedulerWithOptions(t, testutil.InitTestAPIServer(t, "", nil), 0)
|
||||
testutil.SyncSchedulerInformerFactory(testCtx)
|
||||
if testCtx.Scheduler.APIDispatcher != nil {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
testCtx.Scheduler.APIDispatcher.Run(logger)
|
||||
defer testCtx.Scheduler.APIDispatcher.Close()
|
||||
}
|
||||
|
||||
tests := map[string]struct {
|
||||
anotherUID bool
|
||||
wantStatusCode fwk.Code
|
||||
}{
|
||||
"same UID": {
|
||||
wantStatusCode: fwk.Success,
|
||||
},
|
||||
"different UID": {
|
||||
anotherUID: true,
|
||||
wantStatusCode: fwk.Error,
|
||||
},
|
||||
}
|
||||
for name, tc := range tests {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
pod, err := testutil.CreatePausePodWithResource(testCtx.ClientSet, "fixed-name", testCtx.NS.Name, nil)
|
||||
// Add a node.
|
||||
node, err := testutil.CreateNode(testCtx.ClientSet, st.MakeNode().Name("testnode").Obj())
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create pod: %v", err)
|
||||
}
|
||||
defer testutil.CleanupPods(testCtx.Ctx, testCtx.ClientSet, t, []*corev1.Pod{pod})
|
||||
|
||||
podCopy := pod.DeepCopy()
|
||||
if tc.anotherUID {
|
||||
podCopy.UID = "another"
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
status := testCtx.Scheduler.Profiles["default-scheduler"].RunBindPlugins(testCtx.Ctx, framework.NewCycleState(), podCopy, node.Name)
|
||||
if code := status.Code(); code != tc.wantStatusCode {
|
||||
t.Errorf("Bind returned code %s, want %s", code, tc.wantStatusCode)
|
||||
tests := map[string]struct {
|
||||
anotherUID bool
|
||||
wantStatusCode fwk.Code
|
||||
}{
|
||||
"same UID": {
|
||||
wantStatusCode: fwk.Success,
|
||||
},
|
||||
"different UID": {
|
||||
anotherUID: true,
|
||||
wantStatusCode: fwk.Error,
|
||||
},
|
||||
}
|
||||
for name, tc := range tests {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
pod, err := testutil.CreatePausePodWithResource(testCtx.ClientSet, "fixed-name", testCtx.NS.Name, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create pod: %v", err)
|
||||
}
|
||||
defer testutil.CleanupPods(testCtx.Ctx, testCtx.ClientSet, t, []*corev1.Pod{pod})
|
||||
|
||||
podCopy := pod.DeepCopy()
|
||||
if tc.anotherUID {
|
||||
podCopy.UID = "another"
|
||||
}
|
||||
|
||||
status := testCtx.Scheduler.Profiles["default-scheduler"].RunBindPlugins(testCtx.Ctx, framework.NewCycleState(), podCopy, node.Name)
|
||||
if code := status.Code(); code != tc.wantStatusCode {
|
||||
t.Errorf("Bind returned code %s, want %s", code, tc.wantStatusCode)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -212,80 +212,89 @@ func TestUpdateNominatedNodeName(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
for _, qHintEnabled := range []bool{false, true} {
|
||||
t.Run(fmt.Sprintf("%s, with queuehint(%v)", tt.name, qHintEnabled), func(t *testing.T) {
|
||||
if !qHintEnabled {
|
||||
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, version.MustParse("1.33"))
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, false)
|
||||
for _, asyncAPICallsEnabled := range []bool{false, true} {
|
||||
if !qHintEnabled && asyncAPICallsEnabled {
|
||||
// This can't happen.
|
||||
continue
|
||||
}
|
||||
// Set the SchedulerPopFromBackoffQ feature to false, because when it's enabled, we can't be sure the pod won't be popped from the backoffQ.
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerPopFromBackoffQ, false)
|
||||
t.Run(fmt.Sprintf("%s (Queueing hints enabled: %v, Async API calls enabled: %v)", tt.name, qHintEnabled, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
if !qHintEnabled {
|
||||
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, version.MustParse("1.33"))
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, false)
|
||||
} else {
|
||||
// Handle SchedulerAsyncAPICalls feature only in 1.34+.
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
|
||||
}
|
||||
// Set the SchedulerPopFromBackoffQ feature to false, because when it's enabled, we can't be sure the pod won't be popped from the backoffQ.
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerPopFromBackoffQ, false)
|
||||
|
||||
testCtx, teardown := schedulerutils.InitTestSchedulerForFrameworkTest(t, testContext, 0, true,
|
||||
scheduler.WithClock(fakeClock),
|
||||
// UpdateFunc needs to be called when the nominated pod is still in the backoff queue, thus small, but non 0 value.
|
||||
scheduler.WithPodInitialBackoffSeconds(int64(testBackoff.Seconds())),
|
||||
scheduler.WithPodMaxBackoffSeconds(int64(testBackoff.Seconds())),
|
||||
)
|
||||
defer teardown()
|
||||
testCtx, teardown := schedulerutils.InitTestSchedulerForFrameworkTest(t, testContext, 0, true,
|
||||
scheduler.WithClock(fakeClock),
|
||||
// UpdateFunc needs to be called when the nominated pod is still in the backoff queue, thus small, but non 0 value.
|
||||
scheduler.WithPodInitialBackoffSeconds(int64(testBackoff.Seconds())),
|
||||
scheduler.WithPodMaxBackoffSeconds(int64(testBackoff.Seconds())),
|
||||
)
|
||||
defer teardown()
|
||||
|
||||
_, err := testutils.CreateNode(testCtx.ClientSet, testNode)
|
||||
if err != nil {
|
||||
t.Fatalf("Creating node error: %v", err)
|
||||
}
|
||||
_, err := testutils.CreateNode(testCtx.ClientSet, testNode)
|
||||
if err != nil {
|
||||
t.Fatalf("Creating node error: %v", err)
|
||||
}
|
||||
|
||||
// Ensure node is present in scheduler cache.
|
||||
if err := testutils.WaitForNodesInCache(testCtx.Ctx, testCtx.Scheduler, 1); err != nil {
|
||||
t.Fatalf("Waiting for node in cache error: %v", err)
|
||||
}
|
||||
// Ensure node is present in scheduler cache.
|
||||
if err := testutils.WaitForNodesInCache(testCtx.Ctx, testCtx.Scheduler, 1); err != nil {
|
||||
t.Fatalf("Waiting for node in cache error: %v", err)
|
||||
}
|
||||
|
||||
// Create initial low-priority pod and wait until it's scheduled.
|
||||
pod, err := testutils.CreatePausePod(testCtx.ClientSet, podLow)
|
||||
if err != nil {
|
||||
t.Fatalf("Creating pod error: %v", err)
|
||||
}
|
||||
if err := testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Fatalf("Pod %v was not scheduled: %v", pod.Name, err)
|
||||
}
|
||||
// Create initial low-priority pod and wait until it's scheduled.
|
||||
pod, err := testutils.CreatePausePod(testCtx.ClientSet, podLow)
|
||||
if err != nil {
|
||||
t.Fatalf("Creating pod error: %v", err)
|
||||
}
|
||||
if err := testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Fatalf("Pod %v was not scheduled: %v", pod.Name, err)
|
||||
}
|
||||
|
||||
// Create mid-priority pod and wait until it becomes nominated (preempt low-priority pod) and remain uschedulable.
|
||||
pod, err = testutils.CreatePausePod(testCtx.ClientSet, podMidNominated)
|
||||
if err != nil {
|
||||
t.Fatalf("Creating pod error: %v", err)
|
||||
}
|
||||
if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("NominatedNodeName field was not set for pod %v: %v", pod.Name, err)
|
||||
}
|
||||
if err := testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("Pod %v haven't become unschedulabe: %v", pod.Name, err)
|
||||
}
|
||||
// Create mid-priority pod and wait until it becomes nominated (preempt low-priority pod) and remain uschedulable.
|
||||
pod, err = testutils.CreatePausePod(testCtx.ClientSet, podMidNominated)
|
||||
if err != nil {
|
||||
t.Fatalf("Creating pod error: %v", err)
|
||||
}
|
||||
if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("NominatedNodeName field was not set for pod %v: %v", pod.Name, err)
|
||||
}
|
||||
if err := testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("Pod %v haven't become unschedulabe: %v", pod.Name, err)
|
||||
}
|
||||
|
||||
// Remove the initial low-priority pod, which will move the nominated unschedulable pod back to the backoff queue.
|
||||
if err := testutils.DeletePod(testCtx.ClientSet, podLow.Name, podLow.Namespace); err != nil {
|
||||
t.Fatalf("Deleting pod error: %v", err)
|
||||
}
|
||||
// Remove the initial low-priority pod, which will move the nominated unschedulable pod back to the backoff queue.
|
||||
if err := testutils.DeletePod(testCtx.ClientSet, podLow.Name, podLow.Namespace); err != nil {
|
||||
t.Fatalf("Deleting pod error: %v", err)
|
||||
}
|
||||
|
||||
// Create another low-priority pods which cannot be scheduled because the mid-priority pod is nominated on the node and the node doesn't have enough resource to have two pods both.
|
||||
pod, err = testutils.CreatePausePod(testCtx.ClientSet, podLow)
|
||||
if err != nil {
|
||||
t.Fatalf("Creating pod error: %v", err)
|
||||
}
|
||||
if err := testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Fatalf("Pod %v was not scheduled: %v", pod.Name, err)
|
||||
}
|
||||
// Create another low-priority pods which cannot be scheduled because the mid-priority pod is nominated on the node and the node doesn't have enough resource to have two pods both.
|
||||
pod, err = testutils.CreatePausePod(testCtx.ClientSet, podLow)
|
||||
if err != nil {
|
||||
t.Fatalf("Creating pod error: %v", err)
|
||||
}
|
||||
if err := testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Fatalf("Pod %v was not scheduled: %v", pod.Name, err)
|
||||
}
|
||||
|
||||
// Update causing the nominated pod to be removed or to get its nominated node name removed, which should trigger scheduling of the low priority pod.
|
||||
// Note that the update has to happen since the nominated pod is still in the backoffQ to actually test updates of nominated, but not bound yet pods.
|
||||
tt.updateFunc(testCtx)
|
||||
// Update causing the nominated pod to be removed or to get its nominated node name removed, which should trigger scheduling of the low priority pod.
|
||||
// Note that the update has to happen since the nominated pod is still in the backoffQ to actually test updates of nominated, but not bound yet pods.
|
||||
tt.updateFunc(testCtx)
|
||||
|
||||
// Advance time by the 2 * maxPodBackoffSeconds to move low priority pod out of the backoff queue.
|
||||
fakeClock.Step(2 * testBackoff)
|
||||
// Advance time by the 2 * maxPodBackoffSeconds to move low priority pod out of the backoff queue.
|
||||
fakeClock.Step(2 * testBackoff)
|
||||
|
||||
// Expect the low-priority pod is notified about unnominated mid-pririty pod and gets scheduled, as it should fit this time.
|
||||
if err := testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, podLow); err != nil {
|
||||
t.Fatalf("Pod %v was not scheduled: %v", podLow.Name, err)
|
||||
}
|
||||
testutils.CleanupPods(testCtx.Ctx, testCtx.ClientSet, t, cleanupPods)
|
||||
})
|
||||
// Expect the low-priority pod is notified about unnominated mid-pririty pod and gets scheduled, as it should fit this time.
|
||||
if err := testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, podLow); err != nil {
|
||||
t.Fatalf("Pod %v was not scheduled: %v", podLow.Name, err)
|
||||
}
|
||||
testutils.CleanupPods(testCtx.Ctx, testCtx.ClientSet, t, cleanupPods)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -171,16 +171,6 @@ func TestPreemption(t *testing.T) {
|
||||
}},
|
||||
})
|
||||
|
||||
testCtx := testutils.InitTestSchedulerWithOptions(t,
|
||||
testutils.InitTestAPIServer(t, "preemption", nil),
|
||||
0,
|
||||
scheduler.WithProfiles(cfg.Profiles...),
|
||||
scheduler.WithFrameworkOutOfTreeRegistry(registry))
|
||||
testutils.SyncSchedulerInformerFactory(testCtx)
|
||||
go testCtx.Scheduler.Run(testCtx.Ctx)
|
||||
|
||||
cs := testCtx.ClientSet
|
||||
|
||||
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI)},
|
||||
@@ -201,9 +191,8 @@ func TestPreemption(t *testing.T) {
|
||||
initTokens: maxTokens,
|
||||
existingPods: []*v1.Pod{
|
||||
initPausePod(&testutils.PausePodConfig{
|
||||
Name: "victim-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &lowPriority,
|
||||
Name: "victim-pod",
|
||||
Priority: &lowPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
|
||||
@@ -211,9 +200,8 @@ func TestPreemption(t *testing.T) {
|
||||
}),
|
||||
},
|
||||
pod: initPausePod(&testutils.PausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &highPriority,
|
||||
Name: "preemptor-pod",
|
||||
Priority: &highPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
|
||||
@@ -226,9 +214,8 @@ func TestPreemption(t *testing.T) {
|
||||
initTokens: 1,
|
||||
existingPods: []*v1.Pod{
|
||||
initPausePod(&testutils.PausePodConfig{
|
||||
Name: "victim-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &lowPriority,
|
||||
Name: "victim-pod",
|
||||
Priority: &lowPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
|
||||
@@ -236,9 +223,8 @@ func TestPreemption(t *testing.T) {
|
||||
}),
|
||||
},
|
||||
pod: initPausePod(&testutils.PausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &highPriority,
|
||||
Name: "preemptor-pod",
|
||||
Priority: &highPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
|
||||
@@ -256,9 +242,8 @@ func TestPreemption(t *testing.T) {
|
||||
enablePreFilter: true,
|
||||
existingPods: []*v1.Pod{
|
||||
initPausePod(&testutils.PausePodConfig{
|
||||
Name: "victim-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &lowPriority,
|
||||
Name: "victim-pod",
|
||||
Priority: &lowPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
|
||||
@@ -266,9 +251,8 @@ func TestPreemption(t *testing.T) {
|
||||
}),
|
||||
},
|
||||
pod: initPausePod(&testutils.PausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &highPriority,
|
||||
Name: "preemptor-pod",
|
||||
Priority: &highPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
|
||||
@@ -283,9 +267,8 @@ func TestPreemption(t *testing.T) {
|
||||
unresolvable: true,
|
||||
existingPods: []*v1.Pod{
|
||||
initPausePod(&testutils.PausePodConfig{
|
||||
Name: "victim-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &lowPriority,
|
||||
Name: "victim-pod",
|
||||
Priority: &lowPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
|
||||
@@ -293,9 +276,8 @@ func TestPreemption(t *testing.T) {
|
||||
}),
|
||||
},
|
||||
pod: initPausePod(&testutils.PausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &highPriority,
|
||||
Name: "preemptor-pod",
|
||||
Priority: &highPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
|
||||
@@ -308,13 +290,13 @@ func TestPreemption(t *testing.T) {
|
||||
initTokens: maxTokens,
|
||||
existingPods: []*v1.Pod{
|
||||
initPausePod(&testutils.PausePodConfig{
|
||||
Name: "pod-0", Namespace: testCtx.NS.Name,
|
||||
Name: "pod-0",
|
||||
Priority: &mediumPriority,
|
||||
Labels: map[string]string{"pod": "p0"},
|
||||
Resources: defaultPodRes,
|
||||
}),
|
||||
initPausePod(&testutils.PausePodConfig{
|
||||
Name: "pod-1", Namespace: testCtx.NS.Name,
|
||||
Name: "pod-1",
|
||||
Priority: &lowPriority,
|
||||
Labels: map[string]string{"pod": "p1"},
|
||||
Resources: defaultPodRes,
|
||||
@@ -341,7 +323,6 @@ func TestPreemption(t *testing.T) {
|
||||
// A higher priority pod with anti-affinity.
|
||||
pod: initPausePod(&testutils.PausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &highPriority,
|
||||
Labels: map[string]string{"pod": "preemptor"},
|
||||
Resources: defaultPodRes,
|
||||
@@ -372,13 +353,13 @@ func TestPreemption(t *testing.T) {
|
||||
initTokens: maxTokens,
|
||||
existingPods: []*v1.Pod{
|
||||
initPausePod(&testutils.PausePodConfig{
|
||||
Name: "pod-0", Namespace: testCtx.NS.Name,
|
||||
Name: "pod-0",
|
||||
Priority: &mediumPriority,
|
||||
Labels: map[string]string{"pod": "p0"},
|
||||
Resources: defaultPodRes,
|
||||
}),
|
||||
initPausePod(&testutils.PausePodConfig{
|
||||
Name: "pod-1", Namespace: testCtx.NS.Name,
|
||||
Name: "pod-1",
|
||||
Priority: &highPriority,
|
||||
Labels: map[string]string{"pod": "p1"},
|
||||
Resources: defaultPodRes,
|
||||
@@ -405,7 +386,6 @@ func TestPreemption(t *testing.T) {
|
||||
// A higher priority pod with anti-affinity.
|
||||
pod: initPausePod(&testutils.PausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Priority: &highPriority,
|
||||
Labels: map[string]string{"pod": "preemptor"},
|
||||
Resources: defaultPodRes,
|
||||
@@ -439,61 +419,77 @@ func TestPreemption(t *testing.T) {
|
||||
v1.ResourceMemory: "500",
|
||||
}
|
||||
nodeObject := st.MakeNode().Name("node1").Capacity(nodeRes).Label("node", "node1").Obj()
|
||||
if _, err := createNode(testCtx.ClientSet, nodeObject); err != nil {
|
||||
t.Fatalf("Error creating node: %v", err)
|
||||
}
|
||||
|
||||
for _, asyncPreemptionEnabled := range []bool{true, false} {
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", test.name, asyncPreemptionEnabled), func(t *testing.T) {
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled)
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, Async API calls enabled: %v)", test.name, asyncPreemptionEnabled, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled)
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
|
||||
|
||||
filter.Tokens = test.initTokens
|
||||
filter.EnablePreFilter = test.enablePreFilter
|
||||
filter.Unresolvable = test.unresolvable
|
||||
pods := make([]*v1.Pod, len(test.existingPods))
|
||||
// Create and run existingPods.
|
||||
for i, p := range test.existingPods {
|
||||
pods[i], err = runPausePod(cs, p)
|
||||
if err != nil {
|
||||
t.Fatalf("Error running pause pod: %v", err)
|
||||
testCtx := testutils.InitTestSchedulerWithOptions(t,
|
||||
testutils.InitTestAPIServer(t, "preemption", nil),
|
||||
0,
|
||||
scheduler.WithProfiles(cfg.Profiles...),
|
||||
scheduler.WithFrameworkOutOfTreeRegistry(registry))
|
||||
testutils.SyncSchedulerInformerFactory(testCtx)
|
||||
go testCtx.Scheduler.Run(testCtx.Ctx)
|
||||
|
||||
if _, err := createNode(testCtx.ClientSet, nodeObject); err != nil {
|
||||
t.Fatalf("Error creating node: %v", err)
|
||||
}
|
||||
}
|
||||
// Create the "pod".
|
||||
preemptor, err := createPausePod(cs, test.pod)
|
||||
if err != nil {
|
||||
t.Errorf("Error while creating high priority pod: %v", err)
|
||||
}
|
||||
// Wait for preemption of pods and make sure the other ones are not preempted.
|
||||
for i, p := range pods {
|
||||
if _, found := test.preemptedPodIndexes[i]; found {
|
||||
if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false,
|
||||
podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
|
||||
t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name)
|
||||
}
|
||||
pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{})
|
||||
|
||||
cs := testCtx.ClientSet
|
||||
|
||||
filter.Tokens = test.initTokens
|
||||
filter.EnablePreFilter = test.enablePreFilter
|
||||
filter.Unresolvable = test.unresolvable
|
||||
pods := make([]*v1.Pod, len(test.existingPods))
|
||||
// Create and run existingPods.
|
||||
for i, p := range test.existingPods {
|
||||
p.Namespace = testCtx.NS.Name
|
||||
pods[i], err = runPausePod(cs, p)
|
||||
if err != nil {
|
||||
t.Errorf("Error %v when getting the updated status for pod %v/%v ", err, p.Namespace, p.Name)
|
||||
t.Fatalf("Error running pause pod: %v", err)
|
||||
}
|
||||
_, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget)
|
||||
if cond == nil {
|
||||
t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(pod), v1.DisruptionTarget)
|
||||
}
|
||||
// Create the "pod".
|
||||
test.pod.Namespace = testCtx.NS.Name
|
||||
preemptor, err := createPausePod(cs, test.pod)
|
||||
if err != nil {
|
||||
t.Errorf("Error while creating high priority pod: %v", err)
|
||||
}
|
||||
// Wait for preemption of pods and make sure the other ones are not preempted.
|
||||
for i, p := range pods {
|
||||
if _, found := test.preemptedPodIndexes[i]; found {
|
||||
if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false,
|
||||
podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
|
||||
t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name)
|
||||
}
|
||||
pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("Error %v when getting the updated status for pod %v/%v ", err, p.Namespace, p.Name)
|
||||
}
|
||||
_, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget)
|
||||
if cond == nil {
|
||||
t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(pod), v1.DisruptionTarget)
|
||||
}
|
||||
} else if p.DeletionTimestamp != nil {
|
||||
t.Errorf("Didn't expect pod %v to get preempted.", p.Name)
|
||||
}
|
||||
} else if p.DeletionTimestamp != nil {
|
||||
t.Errorf("Didn't expect pod %v to get preempted.", p.Name)
|
||||
}
|
||||
}
|
||||
// Also check that the preemptor pod gets the NominatedNodeName field set.
|
||||
if len(test.preemptedPodIndexes) > 0 {
|
||||
if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil {
|
||||
t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err)
|
||||
// Also check that the preemptor pod gets the NominatedNodeName field set.
|
||||
if len(test.preemptedPodIndexes) > 0 {
|
||||
if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil {
|
||||
t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
pods = append(pods, preemptor)
|
||||
testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
|
||||
})
|
||||
// Cleanup
|
||||
pods = append(pods, preemptor)
|
||||
testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -809,177 +805,185 @@ func TestAsyncPreemption(t *testing.T) {
|
||||
|
||||
// All test cases have the same node.
|
||||
node := st.MakeNode().Name("node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj()
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
// We need to use a custom preemption plugin to test async preemption behavior
|
||||
delayedPreemptionPluginName := "delay-preemption"
|
||||
// keyed by the pod name
|
||||
preemptionDoneChannels := make(map[string]chan struct{})
|
||||
defer func() {
|
||||
for _, ch := range preemptionDoneChannels {
|
||||
close(ch)
|
||||
}
|
||||
}()
|
||||
registry := make(frameworkruntime.Registry)
|
||||
var preemptionPlugin *defaultpreemption.DefaultPreemption
|
||||
err := registry.Register(delayedPreemptionPluginName, func(c context.Context, r runtime.Object, fh framework.Handle) (framework.Plugin, error) {
|
||||
p, err := frameworkruntime.FactoryAdapter(plfeature.Features{EnableAsyncPreemption: true}, defaultpreemption.New)(c, &config.DefaultPreemptionArgs{
|
||||
// Set default values to pass the validation at the initialization, not related to the test.
|
||||
MinCandidateNodesPercentage: 10,
|
||||
MinCandidateNodesAbsolute: 100,
|
||||
}, fh)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating default preemption plugin: %w", err)
|
||||
}
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", test.name, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
|
||||
|
||||
var ok bool
|
||||
preemptionPlugin, ok = p.(*defaultpreemption.DefaultPreemption)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unexpected plugin type %T", p)
|
||||
}
|
||||
|
||||
preemptPodFn := preemptionPlugin.Evaluator.PreemptPod
|
||||
preemptionPlugin.Evaluator.PreemptPod = func(ctx context.Context, c preemption.Candidate, preemptor, victim *v1.Pod, pluginName string) error {
|
||||
// block the preemption goroutine to complete until the test case allows it to proceed.
|
||||
if ch, ok := preemptionDoneChannels[preemptor.Name]; ok {
|
||||
<-ch
|
||||
// We need to use a custom preemption plugin to test async preemption behavior
|
||||
delayedPreemptionPluginName := "delay-preemption"
|
||||
// keyed by the pod name
|
||||
preemptionDoneChannels := make(map[string]chan struct{})
|
||||
defer func() {
|
||||
for _, ch := range preemptionDoneChannels {
|
||||
close(ch)
|
||||
}
|
||||
}()
|
||||
registry := make(frameworkruntime.Registry)
|
||||
var preemptionPlugin *defaultpreemption.DefaultPreemption
|
||||
err := registry.Register(delayedPreemptionPluginName, func(c context.Context, r runtime.Object, fh framework.Handle) (framework.Plugin, error) {
|
||||
p, err := frameworkruntime.FactoryAdapter(plfeature.Features{EnableAsyncPreemption: true}, defaultpreemption.New)(c, &config.DefaultPreemptionArgs{
|
||||
// Set default values to pass the validation at the initialization, not related to the test.
|
||||
MinCandidateNodesPercentage: 10,
|
||||
MinCandidateNodesAbsolute: 100,
|
||||
}, fh)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating default preemption plugin: %w", err)
|
||||
}
|
||||
return preemptPodFn(ctx, c, preemptor, victim, pluginName)
|
||||
}
|
||||
|
||||
return preemptionPlugin, nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Error registering a filter: %v", err)
|
||||
}
|
||||
cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
|
||||
Profiles: []configv1.KubeSchedulerProfile{{
|
||||
SchedulerName: ptr.To(v1.DefaultSchedulerName),
|
||||
Plugins: &configv1.Plugins{
|
||||
MultiPoint: configv1.PluginSet{
|
||||
Enabled: []configv1.Plugin{
|
||||
{Name: delayedPreemptionPluginName},
|
||||
},
|
||||
Disabled: []configv1.Plugin{
|
||||
{Name: names.DefaultPreemption},
|
||||
var ok bool
|
||||
preemptionPlugin, ok = p.(*defaultpreemption.DefaultPreemption)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unexpected plugin type %T", p)
|
||||
}
|
||||
|
||||
preemptPodFn := preemptionPlugin.Evaluator.PreemptPod
|
||||
preemptionPlugin.Evaluator.PreemptPod = func(ctx context.Context, c preemption.Candidate, preemptor, victim *v1.Pod, pluginName string) error {
|
||||
// block the preemption goroutine to complete until the test case allows it to proceed.
|
||||
if ch, ok := preemptionDoneChannels[preemptor.Name]; ok {
|
||||
<-ch
|
||||
}
|
||||
return preemptPodFn(ctx, c, preemptor, victim, pluginName)
|
||||
}
|
||||
|
||||
return preemptionPlugin, nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Error registering a filter: %v", err)
|
||||
}
|
||||
cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
|
||||
Profiles: []configv1.KubeSchedulerProfile{{
|
||||
SchedulerName: ptr.To(v1.DefaultSchedulerName),
|
||||
Plugins: &configv1.Plugins{
|
||||
MultiPoint: configv1.PluginSet{
|
||||
Enabled: []configv1.Plugin{
|
||||
{Name: delayedPreemptionPluginName},
|
||||
},
|
||||
Disabled: []configv1.Plugin{
|
||||
{Name: names.DefaultPreemption},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
}},
|
||||
})
|
||||
|
||||
// It initializes the scheduler, but doesn't start.
|
||||
// We manually trigger the scheduling cycle.
|
||||
testCtx := testutils.InitTestSchedulerWithOptions(t,
|
||||
testutils.InitTestAPIServer(t, "preemption", nil),
|
||||
0,
|
||||
scheduler.WithProfiles(cfg.Profiles...),
|
||||
scheduler.WithFrameworkOutOfTreeRegistry(registry),
|
||||
// disable backoff
|
||||
scheduler.WithPodMaxBackoffSeconds(0),
|
||||
scheduler.WithPodInitialBackoffSeconds(0),
|
||||
)
|
||||
testutils.SyncSchedulerInformerFactory(testCtx)
|
||||
cs := testCtx.ClientSet
|
||||
|
||||
if preemptionPlugin == nil {
|
||||
t.Fatalf("the preemption plugin should be initialized")
|
||||
}
|
||||
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
if testCtx.Scheduler.APIDispatcher != nil {
|
||||
testCtx.Scheduler.APIDispatcher.Run(logger)
|
||||
defer testCtx.Scheduler.APIDispatcher.Close()
|
||||
}
|
||||
testCtx.Scheduler.SchedulingQueue.Run(logger)
|
||||
defer testCtx.Scheduler.SchedulingQueue.Close()
|
||||
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, true)
|
||||
|
||||
createdPods := []*v1.Pod{}
|
||||
defer testutils.CleanupPods(testCtx.Ctx, cs, t, createdPods)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
|
||||
t.Fatalf("Failed to create an initial Node %q: %v", node.Name, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := cs.CoreV1().Nodes().Delete(ctx, node.Name, metav1.DeleteOptions{}); err != nil {
|
||||
t.Fatalf("Failed to delete the Node %q: %v", node.Name, err)
|
||||
}
|
||||
}()
|
||||
|
||||
for _, scenario := range test.scenarios {
|
||||
t.Logf("Running scenario: %s", scenario.name)
|
||||
switch {
|
||||
case scenario.createPod != nil:
|
||||
if scenario.createPod.count == nil {
|
||||
scenario.createPod.count = ptr.To(1)
|
||||
}
|
||||
|
||||
for i := 0; i < *scenario.createPod.count; i++ {
|
||||
pod, err := cs.CoreV1().Pods(testCtx.NS.Name).Create(ctx, scenario.createPod.pod, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create a Pod %q: %v", pod.Name, err)
|
||||
}
|
||||
createdPods = append(createdPods, pod)
|
||||
}
|
||||
case scenario.schedulePod != nil:
|
||||
lastFailure := ""
|
||||
if err := wait.PollUntilContextTimeout(testCtx.Ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
if len(testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()) == 0 {
|
||||
lastFailure = fmt.Sprintf("Expected the pod %s to be scheduled, but no pod arrives at the activeQ", scenario.schedulePod.podName)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()[0].Name != scenario.schedulePod.podName {
|
||||
// need to wait more because maybe the queue will get another Pod that higher priority than the current top pod.
|
||||
lastFailure = fmt.Sprintf("The pod %s is expected to be scheduled, but the top Pod is %s", scenario.schedulePod.podName, testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()[0].Name)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}); err != nil {
|
||||
t.Fatal(lastFailure)
|
||||
}
|
||||
|
||||
preemptionDoneChannels[scenario.schedulePod.podName] = make(chan struct{})
|
||||
testCtx.Scheduler.ScheduleOne(testCtx.Ctx)
|
||||
if scenario.schedulePod.expectSuccess {
|
||||
if err := wait.PollUntilContextTimeout(testCtx.Ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, testCtx.NS.Name, scenario.schedulePod.podName)); err != nil {
|
||||
t.Fatalf("Expected the pod %s to be scheduled", scenario.schedulePod.podName)
|
||||
}
|
||||
} else {
|
||||
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.schedulePod.podName) {
|
||||
t.Fatalf("Expected the pod %s to be in the queue after the scheduling attempt", scenario.schedulePod.podName)
|
||||
}
|
||||
}
|
||||
case scenario.completePreemption != "":
|
||||
if _, ok := preemptionDoneChannels[scenario.completePreemption]; !ok {
|
||||
t.Fatalf("The preemptor Pod %q is not running preemption", scenario.completePreemption)
|
||||
}
|
||||
|
||||
close(preemptionDoneChannels[scenario.completePreemption])
|
||||
delete(preemptionDoneChannels, scenario.completePreemption)
|
||||
case scenario.podGatedInQueue != "":
|
||||
// make sure the Pod is in the queue in the first place.
|
||||
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.podGatedInQueue) {
|
||||
t.Fatalf("Expected the pod %s to be in the queue", scenario.podGatedInQueue)
|
||||
}
|
||||
|
||||
// Make sure this Pod is gated by the preemption at PreEnqueue extension point
|
||||
// by activating the Pod and see if it's still in the unsched pod pool.
|
||||
testCtx.Scheduler.SchedulingQueue.Activate(logger, map[string]*v1.Pod{scenario.podGatedInQueue: st.MakePod().Namespace(testCtx.NS.Name).Name(scenario.podGatedInQueue).Obj()})
|
||||
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.podGatedInQueue) {
|
||||
t.Fatalf("Expected the pod %s to be in the queue even after the activation", scenario.podGatedInQueue)
|
||||
}
|
||||
case scenario.podRunningPreemption != nil:
|
||||
if err := wait.PollUntilContextTimeout(testCtx.Ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
return preemptionPlugin.Evaluator.IsPodRunningPreemption(createdPods[*scenario.podRunningPreemption].GetUID()), nil
|
||||
}); err != nil {
|
||||
t.Fatalf("Expected the pod %s to be running preemption", createdPods[*scenario.podRunningPreemption].Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// It initializes the scheduler, but doesn't start.
|
||||
// We manually trigger the scheduling cycle.
|
||||
testCtx := testutils.InitTestSchedulerWithOptions(t,
|
||||
testutils.InitTestAPIServer(t, "preemption", nil),
|
||||
0,
|
||||
scheduler.WithProfiles(cfg.Profiles...),
|
||||
scheduler.WithFrameworkOutOfTreeRegistry(registry),
|
||||
// disable backoff
|
||||
scheduler.WithPodMaxBackoffSeconds(0),
|
||||
scheduler.WithPodInitialBackoffSeconds(0),
|
||||
)
|
||||
testutils.SyncSchedulerInformerFactory(testCtx)
|
||||
cs := testCtx.ClientSet
|
||||
|
||||
if preemptionPlugin == nil {
|
||||
t.Fatalf("the preemption plugin should be initialized")
|
||||
}
|
||||
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
testCtx.Scheduler.SchedulingQueue.Run(logger)
|
||||
defer testCtx.Scheduler.SchedulingQueue.Close()
|
||||
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, true)
|
||||
|
||||
createdPods := []*v1.Pod{}
|
||||
defer testutils.CleanupPods(testCtx.Ctx, cs, t, createdPods)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
|
||||
t.Fatalf("Failed to create an initial Node %q: %v", node.Name, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := cs.CoreV1().Nodes().Delete(ctx, node.Name, metav1.DeleteOptions{}); err != nil {
|
||||
t.Fatalf("Failed to delete the Node %q: %v", node.Name, err)
|
||||
}
|
||||
}()
|
||||
|
||||
for _, scenario := range test.scenarios {
|
||||
t.Logf("Running scenario: %s", scenario.name)
|
||||
switch {
|
||||
case scenario.createPod != nil:
|
||||
if scenario.createPod.count == nil {
|
||||
scenario.createPod.count = ptr.To(1)
|
||||
}
|
||||
|
||||
for i := 0; i < *scenario.createPod.count; i++ {
|
||||
pod, err := cs.CoreV1().Pods(testCtx.NS.Name).Create(ctx, scenario.createPod.pod, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create a Pod %q: %v", pod.Name, err)
|
||||
}
|
||||
createdPods = append(createdPods, pod)
|
||||
}
|
||||
case scenario.schedulePod != nil:
|
||||
lastFailure := ""
|
||||
if err := wait.PollUntilContextTimeout(testCtx.Ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
if len(testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()) == 0 {
|
||||
lastFailure = fmt.Sprintf("Expected the pod %s to be scheduled, but no pod arrives at the activeQ", scenario.schedulePod.podName)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()[0].Name != scenario.schedulePod.podName {
|
||||
// need to wait more because maybe the queue will get another Pod that higher priority than the current top pod.
|
||||
lastFailure = fmt.Sprintf("The pod %s is expected to be scheduled, but the top Pod is %s", scenario.schedulePod.podName, testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()[0].Name)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}); err != nil {
|
||||
t.Fatal(lastFailure)
|
||||
}
|
||||
|
||||
preemptionDoneChannels[scenario.schedulePod.podName] = make(chan struct{})
|
||||
testCtx.Scheduler.ScheduleOne(testCtx.Ctx)
|
||||
if scenario.schedulePod.expectSuccess {
|
||||
if err := wait.PollUntilContextTimeout(testCtx.Ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, testCtx.NS.Name, scenario.schedulePod.podName)); err != nil {
|
||||
t.Fatalf("Expected the pod %s to be scheduled", scenario.schedulePod.podName)
|
||||
}
|
||||
} else {
|
||||
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.schedulePod.podName) {
|
||||
t.Fatalf("Expected the pod %s to be in the queue after the scheduling attempt", scenario.schedulePod.podName)
|
||||
}
|
||||
}
|
||||
case scenario.completePreemption != "":
|
||||
if _, ok := preemptionDoneChannels[scenario.completePreemption]; !ok {
|
||||
t.Fatalf("The preemptor Pod %q is not running preemption", scenario.completePreemption)
|
||||
}
|
||||
|
||||
close(preemptionDoneChannels[scenario.completePreemption])
|
||||
delete(preemptionDoneChannels, scenario.completePreemption)
|
||||
case scenario.podGatedInQueue != "":
|
||||
// make sure the Pod is in the queue in the first place.
|
||||
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.podGatedInQueue) {
|
||||
t.Fatalf("Expected the pod %s to be in the queue", scenario.podGatedInQueue)
|
||||
}
|
||||
|
||||
// Make sure this Pod is gated by the preemption at PreEnqueue extension point
|
||||
// by activating the Pod and see if it's still in the unsched pod pool.
|
||||
testCtx.Scheduler.SchedulingQueue.Activate(logger, map[string]*v1.Pod{scenario.podGatedInQueue: st.MakePod().Namespace(testCtx.NS.Name).Name(scenario.podGatedInQueue).Obj()})
|
||||
if !podInUnschedulablePodPool(t, testCtx.Scheduler.SchedulingQueue, scenario.podGatedInQueue) {
|
||||
t.Fatalf("Expected the pod %s to be in the queue even after the activation", scenario.podGatedInQueue)
|
||||
}
|
||||
case scenario.podRunningPreemption != nil:
|
||||
if err := wait.PollUntilContextTimeout(testCtx.Ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
return preemptionPlugin.Evaluator.IsPodRunningPreemption(createdPods[*scenario.podRunningPreemption].GetUID()), nil
|
||||
}); err != nil {
|
||||
t.Fatalf("Expected the pod %s to be running preemption", createdPods[*scenario.podRunningPreemption].Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1680,83 +1684,86 @@ func TestNominatedNodeCleanUp(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, asyncPreemptionEnabled := range []bool{true, false} {
|
||||
for _, tt := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", tt.name, asyncPreemptionEnabled), func(t *testing.T) {
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled)
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, tt := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, Async API calls enabled: %v)", tt.name, asyncPreemptionEnabled, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled)
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
|
||||
|
||||
cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
|
||||
Profiles: []configv1.KubeSchedulerProfile{{
|
||||
SchedulerName: ptr.To(v1.DefaultSchedulerName),
|
||||
Plugins: tt.customPlugins,
|
||||
}},
|
||||
})
|
||||
testCtx := initTest(
|
||||
t,
|
||||
"preemption",
|
||||
scheduler.WithProfiles(cfg.Profiles...),
|
||||
scheduler.WithFrameworkOutOfTreeRegistry(tt.outOfTreeRegistry),
|
||||
)
|
||||
cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
|
||||
Profiles: []configv1.KubeSchedulerProfile{{
|
||||
SchedulerName: ptr.To(v1.DefaultSchedulerName),
|
||||
Plugins: tt.customPlugins,
|
||||
}},
|
||||
})
|
||||
testCtx := initTest(
|
||||
t,
|
||||
"preemption",
|
||||
scheduler.WithProfiles(cfg.Profiles...),
|
||||
scheduler.WithFrameworkOutOfTreeRegistry(tt.outOfTreeRegistry),
|
||||
)
|
||||
|
||||
cs, ns := testCtx.ClientSet, testCtx.NS.Name
|
||||
for _, node := range tt.initNodes {
|
||||
if _, err := createNode(cs, node); err != nil {
|
||||
t.Fatalf("Error creating initial node %v: %v", node.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create a node with the specified capacity.
|
||||
nodeName := "fake-node"
|
||||
if _, err := createNode(cs, st.MakeNode().Name(nodeName).Capacity(tt.nodeCapacity).Obj()); err != nil {
|
||||
t.Fatalf("Error creating node %v: %v", nodeName, err)
|
||||
}
|
||||
|
||||
// Create pods and run post check if necessary.
|
||||
for i, pods := range tt.podsToCreate {
|
||||
for _, p := range pods {
|
||||
p.Namespace = ns
|
||||
if _, err := createPausePod(cs, p); err != nil {
|
||||
t.Fatalf("Error creating pod %v: %v", p.Name, err)
|
||||
cs, ns := testCtx.ClientSet, testCtx.NS.Name
|
||||
for _, node := range tt.initNodes {
|
||||
if _, err := createNode(cs, node); err != nil {
|
||||
t.Fatalf("Error creating initial node %v: %v", node.Name, err)
|
||||
}
|
||||
}
|
||||
// If necessary, run the post check function.
|
||||
if len(tt.postChecks) > i && tt.postChecks[i] != nil {
|
||||
|
||||
// Create a node with the specified capacity.
|
||||
nodeName := "fake-node"
|
||||
if _, err := createNode(cs, st.MakeNode().Name(nodeName).Capacity(tt.nodeCapacity).Obj()); err != nil {
|
||||
t.Fatalf("Error creating node %v: %v", nodeName, err)
|
||||
}
|
||||
|
||||
// Create pods and run post check if necessary.
|
||||
for i, pods := range tt.podsToCreate {
|
||||
for _, p := range pods {
|
||||
if err := tt.postChecks[i](testCtx.Ctx, cs, p); err != nil {
|
||||
t.Fatalf("Pod %v didn't pass the postChecks[%v]: %v", p.Name, i, err)
|
||||
p.Namespace = ns
|
||||
if _, err := createPausePod(cs, p); err != nil {
|
||||
t.Fatalf("Error creating pod %v: %v", p.Name, err)
|
||||
}
|
||||
}
|
||||
// If necessary, run the post check function.
|
||||
if len(tt.postChecks) > i && tt.postChecks[i] != nil {
|
||||
for _, p := range pods {
|
||||
if err := tt.postChecks[i](testCtx.Ctx, cs, p); err != nil {
|
||||
t.Fatalf("Pod %v didn't pass the postChecks[%v]: %v", p.Name, i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Delete the fake node if necessary.
|
||||
if tt.deleteFakeNode {
|
||||
if err := cs.CoreV1().Nodes().Delete(testCtx.Ctx, nodeName, *metav1.NewDeleteOptions(0)); err != nil {
|
||||
t.Fatalf("Node %v cannot be deleted: %v", nodeName, err)
|
||||
// Delete the fake node if necessary.
|
||||
if tt.deleteFakeNode {
|
||||
if err := cs.CoreV1().Nodes().Delete(testCtx.Ctx, nodeName, *metav1.NewDeleteOptions(0)); err != nil {
|
||||
t.Fatalf("Node %v cannot be deleted: %v", nodeName, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Force deleting the terminating pods if necessary.
|
||||
// This is required if we demand to delete terminating Pods physically.
|
||||
for _, podName := range tt.podNamesToDelete {
|
||||
if err := deletePod(cs, podName, ns); err != nil {
|
||||
t.Fatalf("Pod %v cannot be deleted: %v", podName, err)
|
||||
// Force deleting the terminating pods if necessary.
|
||||
// This is required if we demand to delete terminating Pods physically.
|
||||
for _, podName := range tt.podNamesToDelete {
|
||||
if err := deletePod(cs, podName, ns); err != nil {
|
||||
t.Fatalf("Pod %v cannot be deleted: %v", podName, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Verify if .status.nominatedNodeName is cleared.
|
||||
if err := wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
pod, err := cs.CoreV1().Pods(ns).Get(ctx, "medium", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("Error getting the medium pod: %v", err)
|
||||
// Verify if .status.nominatedNodeName is cleared.
|
||||
if err := wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
pod, err := cs.CoreV1().Pods(ns).Get(ctx, "medium", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("Error getting the medium pod: %v", err)
|
||||
}
|
||||
if len(pod.Status.NominatedNodeName) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
return false, err
|
||||
}); err != nil {
|
||||
t.Errorf(".status.nominatedNodeName of the medium pod was not cleared: %v", err)
|
||||
}
|
||||
if len(pod.Status.NominatedNodeName) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
return false, err
|
||||
}); err != nil {
|
||||
t.Errorf(".status.nominatedNodeName of the medium pod was not cleared: %v", err)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,6 +49,7 @@ import (
|
||||
testfwk "k8s.io/kubernetes/test/integration/framework"
|
||||
testutils "k8s.io/kubernetes/test/integration/util"
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
@@ -112,6 +113,12 @@ func TestSchedulingGates(t *testing.T) {
|
||||
)
|
||||
testutils.SyncSchedulerInformerFactory(testCtx)
|
||||
|
||||
if testCtx.Scheduler.APIDispatcher != nil {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
testCtx.Scheduler.APIDispatcher.Run(logger)
|
||||
defer testCtx.Scheduler.APIDispatcher.Close()
|
||||
}
|
||||
|
||||
cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx
|
||||
|
||||
// Create node, so we can schedule pods.
|
||||
|
||||
@@ -18,13 +18,17 @@ package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/klog/v2"
|
||||
fwk "k8s.io/kube-scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
st "k8s.io/kubernetes/pkg/scheduler/testing"
|
||||
@@ -36,6 +40,11 @@ var _ framework.EnqueueExtensions = &PermitPlugin{}
|
||||
var _ framework.ReservePlugin = &ReservePlugin{}
|
||||
var _ framework.EnqueueExtensions = &ReservePlugin{}
|
||||
|
||||
type ResettablePlugin interface {
|
||||
framework.Plugin
|
||||
Reset()
|
||||
}
|
||||
|
||||
type ReservePlugin struct {
|
||||
name string
|
||||
statusCode fwk.Code
|
||||
@@ -78,6 +87,11 @@ func (rp *ReservePlugin) EventsToRegister(_ context.Context) ([]fwk.ClusterEvent
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (rp *ReservePlugin) Reset() {
|
||||
rp.numReserveCalled = 0
|
||||
rp.numUnreserveCalled = 0
|
||||
}
|
||||
|
||||
type PermitPlugin struct {
|
||||
name string
|
||||
statusCode fwk.Code
|
||||
@@ -115,12 +129,16 @@ func (pp *PermitPlugin) EventsToRegister(_ context.Context) ([]fwk.ClusterEventW
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (pp *PermitPlugin) Reset() {
|
||||
pp.numPermitCalled = 0
|
||||
}
|
||||
|
||||
func TestReScheduling(t *testing.T) {
|
||||
testContext := testutils.InitTestAPIServer(t, "permit-plugin", nil)
|
||||
tests := []struct {
|
||||
name string
|
||||
plugins []framework.Plugin
|
||||
action func() error
|
||||
name string
|
||||
plugin ResettablePlugin
|
||||
action func() error
|
||||
// The first time for pod scheduling, we make pod scheduled error or unschedulable on purpose.
|
||||
// This is controlled by wantFirstSchedulingError. By default, pod is unschedulable.
|
||||
wantFirstSchedulingError bool
|
||||
@@ -130,10 +148,8 @@ func TestReScheduling(t *testing.T) {
|
||||
wantError bool
|
||||
}{
|
||||
{
|
||||
name: "Rescheduling pod rejected by Permit Plugin",
|
||||
plugins: []framework.Plugin{
|
||||
&PermitPlugin{name: "permit", statusCode: fwk.Unschedulable},
|
||||
},
|
||||
name: "Rescheduling pod rejected by Permit Plugin",
|
||||
plugin: &PermitPlugin{name: "permit", statusCode: fwk.Unschedulable},
|
||||
action: func() error {
|
||||
_, err := testutils.CreateNode(testContext.ClientSet, st.MakeNode().Name("fake-node").Obj())
|
||||
return err
|
||||
@@ -141,10 +157,8 @@ func TestReScheduling(t *testing.T) {
|
||||
wantScheduled: true,
|
||||
},
|
||||
{
|
||||
name: "Rescheduling pod rejected by Permit Plugin with unrelated event",
|
||||
plugins: []framework.Plugin{
|
||||
&PermitPlugin{name: "permit", statusCode: fwk.Unschedulable},
|
||||
},
|
||||
name: "Rescheduling pod rejected by Permit Plugin with unrelated event",
|
||||
plugin: &PermitPlugin{name: "permit", statusCode: fwk.Unschedulable},
|
||||
action: func() error {
|
||||
_, err := testutils.CreatePausePod(testContext.ClientSet,
|
||||
testutils.InitPausePod(&testutils.PausePodConfig{Name: "test-pod-2", Namespace: testContext.NS.Name}))
|
||||
@@ -153,10 +167,8 @@ func TestReScheduling(t *testing.T) {
|
||||
wantScheduled: false,
|
||||
},
|
||||
{
|
||||
name: "Rescheduling pod failed by Permit Plugin",
|
||||
plugins: []framework.Plugin{
|
||||
&PermitPlugin{name: "permit", statusCode: fwk.Error},
|
||||
},
|
||||
name: "Rescheduling pod failed by Permit Plugin",
|
||||
plugin: &PermitPlugin{name: "permit", statusCode: fwk.Error},
|
||||
action: func() error {
|
||||
_, err := testutils.CreateNode(testContext.ClientSet, st.MakeNode().Name("fake-node").Obj())
|
||||
return err
|
||||
@@ -165,10 +177,8 @@ func TestReScheduling(t *testing.T) {
|
||||
wantError: true,
|
||||
},
|
||||
{
|
||||
name: "Rescheduling pod rejected by Reserve Plugin",
|
||||
plugins: []framework.Plugin{
|
||||
&ReservePlugin{name: "reserve", statusCode: fwk.Unschedulable},
|
||||
},
|
||||
name: "Rescheduling pod rejected by Reserve Plugin",
|
||||
plugin: &ReservePlugin{name: "reserve", statusCode: fwk.Unschedulable},
|
||||
action: func() error {
|
||||
_, err := testutils.CreateNode(testContext.ClientSet, st.MakeNode().Name("fake-node").Obj())
|
||||
return err
|
||||
@@ -176,10 +186,8 @@ func TestReScheduling(t *testing.T) {
|
||||
wantScheduled: true,
|
||||
},
|
||||
{
|
||||
name: "Rescheduling pod rejected by Reserve Plugin with unrelated event",
|
||||
plugins: []framework.Plugin{
|
||||
&ReservePlugin{name: "reserve", statusCode: fwk.Unschedulable},
|
||||
},
|
||||
name: "Rescheduling pod rejected by Reserve Plugin with unrelated event",
|
||||
plugin: &ReservePlugin{name: "reserve", statusCode: fwk.Unschedulable},
|
||||
action: func() error {
|
||||
_, err := testutils.CreatePausePod(testContext.ClientSet,
|
||||
testutils.InitPausePod(&testutils.PausePodConfig{Name: "test-pod-2", Namespace: testContext.NS.Name}))
|
||||
@@ -188,10 +196,8 @@ func TestReScheduling(t *testing.T) {
|
||||
wantScheduled: false,
|
||||
},
|
||||
{
|
||||
name: "Rescheduling pod failed by Reserve Plugin",
|
||||
plugins: []framework.Plugin{
|
||||
&ReservePlugin{name: "reserve", statusCode: fwk.Error},
|
||||
},
|
||||
name: "Rescheduling pod failed by Reserve Plugin",
|
||||
plugin: &ReservePlugin{name: "reserve", statusCode: fwk.Error},
|
||||
action: func() error {
|
||||
_, err := testutils.CreateNode(testContext.ClientSet, st.MakeNode().Name("fake-node").Obj())
|
||||
return err
|
||||
@@ -201,54 +207,59 @@ func TestReScheduling(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
// Create a plugin registry for testing. Register only a permit plugin.
|
||||
registry, prof := InitRegistryAndConfig(t, nil, test.plugins...)
|
||||
for _, asyncAPICallsEnabled := range []bool{true, false} {
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("%s (Async API calls enabled: %v)", test.name, asyncAPICallsEnabled), func(t *testing.T) {
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled)
|
||||
|
||||
testCtx, teardown := InitTestSchedulerForFrameworkTest(t, testContext, 2, true,
|
||||
scheduler.WithProfiles(prof),
|
||||
scheduler.WithFrameworkOutOfTreeRegistry(registry))
|
||||
defer teardown()
|
||||
// Create a plugin registry for testing. Register only a permit plugin.
|
||||
registry, prof := InitRegistryAndConfig(t, nil, test.plugin)
|
||||
t.Cleanup(test.plugin.Reset)
|
||||
|
||||
pod, err := testutils.CreatePausePod(testCtx.ClientSet,
|
||||
testutils.InitPausePod(&testutils.PausePodConfig{Name: "test-pod", Namespace: testCtx.NS.Name}))
|
||||
if err != nil {
|
||||
t.Errorf("Error while creating a test pod: %v", err)
|
||||
}
|
||||
testCtx, teardown := InitTestSchedulerForFrameworkTest(t, testContext, 2, true,
|
||||
scheduler.WithProfiles(prof),
|
||||
scheduler.WithFrameworkOutOfTreeRegistry(registry))
|
||||
defer teardown()
|
||||
|
||||
// The first time for scheduling, pod is error or unschedulable, controlled by wantFirstSchedulingError
|
||||
if test.wantFirstSchedulingError {
|
||||
if err = wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Millisecond, 30*time.Second, false,
|
||||
testutils.PodSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil {
|
||||
t.Errorf("Expected a scheduling error, but got: %v", err)
|
||||
pod, err := testutils.CreatePausePod(testCtx.ClientSet,
|
||||
testutils.InitPausePod(&testutils.PausePodConfig{Name: "test-pod", Namespace: testCtx.NS.Name}))
|
||||
if err != nil {
|
||||
t.Errorf("Error while creating a test pod: %v", err)
|
||||
}
|
||||
} else {
|
||||
if err = testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("Didn't expect the pod to be scheduled. error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if test.action() != nil {
|
||||
if err = test.action(); err != nil {
|
||||
t.Errorf("Perform action() error: %v", err)
|
||||
// The first time for scheduling, pod is error or unschedulable, controlled by wantFirstSchedulingError
|
||||
if test.wantFirstSchedulingError {
|
||||
if err = wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Millisecond, 30*time.Second, false,
|
||||
testutils.PodSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil {
|
||||
t.Errorf("Expected a scheduling error, but got: %v", err)
|
||||
}
|
||||
} else {
|
||||
if err = testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("Didn't expect the pod to be scheduled. error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if test.wantScheduled {
|
||||
if err = testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("Didn't expect the pod to be unschedulable. error: %v", err)
|
||||
if test.action != nil {
|
||||
if err = test.action(); err != nil {
|
||||
t.Errorf("Perform action() error: %v", err)
|
||||
}
|
||||
}
|
||||
} else if test.wantError {
|
||||
if err = wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Millisecond, 30*time.Second, false,
|
||||
testutils.PodSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil {
|
||||
t.Errorf("Expected a scheduling error, but got: %v", err)
|
||||
|
||||
if test.wantScheduled {
|
||||
if err = testutils.WaitForPodToSchedule(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("Didn't expect the pod to be unschedulable. error: %v", err)
|
||||
}
|
||||
} else if test.wantError {
|
||||
if err = wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Millisecond, 30*time.Second, false,
|
||||
testutils.PodSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil {
|
||||
t.Errorf("Expected a scheduling error, but got: %v", err)
|
||||
}
|
||||
} else {
|
||||
if err = testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("Didn't expect the pod to be scheduled. error: %v", err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if err = testutils.WaitForPodUnschedulable(testCtx.Ctx, testCtx.ClientSet, pod); err != nil {
|
||||
t.Errorf("Didn't expect the pod to be scheduled. error: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,6 +39,16 @@
|
||||
- name: 5Nodes_QueueingHintsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: false
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 5
|
||||
measurePods: 10
|
||||
- name: 5Nodes_AsyncAPICallsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: true
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
@@ -77,6 +87,16 @@
|
||||
- name: 5000Nodes_50000Pods_QueueingHintsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: false
|
||||
labels: [performance]
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 5000
|
||||
measurePods: 50000
|
||||
- name: 5000Nodes_50000Pods_AsyncAPICallsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: true
|
||||
labels: [performance]
|
||||
params:
|
||||
initNodes: 5000
|
||||
@@ -158,6 +178,16 @@
|
||||
- name: 5Nodes_QueueingHintsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: false
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 20
|
||||
measurePods: 5
|
||||
- name: 5Nodes_AsyncAPICallsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: true
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
@@ -174,6 +204,16 @@
|
||||
- name: 1000Nodes_QueueingHintsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: false
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 1000
|
||||
initPods: 4000
|
||||
measurePods: 1000
|
||||
- name: 1000Nodes_AsyncAPICallsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: true
|
||||
labels: [performance, short]
|
||||
params:
|
||||
initNodes: 1000
|
||||
@@ -222,6 +262,16 @@
|
||||
- name: 5Nodes_QueueingHintsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: false
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
initPods: 20
|
||||
measurePods: 5
|
||||
- name: 5Nodes_AsyncAPICallsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: true
|
||||
labels: [integration-test, short]
|
||||
params:
|
||||
initNodes: 5
|
||||
@@ -248,6 +298,7 @@
|
||||
labels: [performance]
|
||||
featureGates:
|
||||
SchedulerAsyncPreemption: true
|
||||
SchedulerAsyncAPICalls: false
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 20000
|
||||
@@ -255,6 +306,17 @@
|
||||
- name: 5000Nodes_QueueingHintsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: false
|
||||
labels: [performance]
|
||||
threshold: 160
|
||||
params:
|
||||
initNodes: 5000
|
||||
initPods: 20000
|
||||
measurePods: 5000
|
||||
- name: 5000Nodes_AsyncAPICallsEnabled
|
||||
featureGates:
|
||||
SchedulerQueueingHints: true
|
||||
SchedulerAsyncAPICalls: true
|
||||
labels: [performance]
|
||||
threshold: 160
|
||||
params:
|
||||
|
||||
Reference in New Issue
Block a user