feature(scheduling_queue): track events per Pods (#118438)

* feature(sscheduling_queue): track events per Pods

* fix typos

* record events in one slice and make each in-flight Pod to refer it

* fix: use Pop() in test before AddUnschedulableIfNotPresent to register in-flight Pods

* eliminate MakeNextPodFuncs

* call Done inside the scheduling queue

* fix comment

* implement done() not to require lock in it

* fix UTs

* improve the receivedEvents implementation based on suggestions

* call DonePod when we don't call AddUnschedulableIfNotPresent

* fix UT

* use queuehint to filter out events for in-flight Pods

* fix based on suggestion from aldo

* fix based on suggestion from Wei

* rename lastEventBefore → previousEvent

* fix based on suggestion

* address comments from aldo

* fix based on the suggestion from Abdullah

* gate in-flight Pods logic by the SchedulingQueueHints feature gate
This commit is contained in:
Kensei Nakada
2023-07-18 07:53:07 +09:00
committed by GitHub
parent a776bf0462
commit c7e7eee554
7 changed files with 800 additions and 57 deletions

View File

@@ -1477,13 +1477,13 @@ func initTestPreferNominatedNode(t *testing.T, nsPrefix string, opts ...schedule
testutils.SyncSchedulerInformerFactory(testCtx)
// wraps the NextPod() method to make it appear the preemption has been done already and the nominated node has been set.
f := testCtx.Scheduler.NextPod
testCtx.Scheduler.NextPod = func() (podInfo *framework.QueuedPodInfo) {
podInfo = f()
testCtx.Scheduler.NextPod = func() (*framework.QueuedPodInfo, error) {
podInfo, _ := f()
// Scheduler.Next() may return nil when scheduler is shutting down.
if podInfo != nil {
podInfo.Pod.Status.NominatedNodeName = "node-1"
}
return podInfo
return podInfo, nil
}
go testCtx.Scheduler.Run(testCtx.Ctx)
return testCtx

View File

@@ -1097,7 +1097,7 @@ func NextPodOrDie(t *testing.T, testCtx *TestContext) *schedulerframework.Queued
// NextPod() is a blocking operation. Wrap it in timeout() to avoid relying on
// default go testing timeout (10m) to abort.
if err := timeout(testCtx.Ctx, time.Second*5, func() {
podInfo = testCtx.Scheduler.NextPod()
podInfo, _ = testCtx.Scheduler.NextPod()
}); err != nil {
t.Fatalf("Timed out waiting for the Pod to be popped: %v", err)
}
@@ -1112,7 +1112,7 @@ func NextPod(t *testing.T, testCtx *TestContext) *schedulerframework.QueuedPodIn
// NextPod() is a blocking operation. Wrap it in timeout() to avoid relying on
// default go testing timeout (10m) to abort.
if err := timeout(testCtx.Ctx, time.Second*5, func() {
podInfo = testCtx.Scheduler.NextPod()
podInfo, _ = testCtx.Scheduler.NextPod()
}); err != nil {
return nil
}