feature(scheduler): simplify QueueingHint by introducing new statuses

This commit is contained in:
Kensei Nakada
2023-10-19 11:02:11 +00:00
parent de054fbf94
commit cb5dc46edf
17 changed files with 360 additions and 284 deletions

View File

@@ -315,7 +315,7 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
originalClaim, modifiedClaim, err := schedutil.As[*resourcev1alpha2.ResourceClaim](oldObj, newObj)
if err != nil {
// Shouldn't happen.
return framework.QueueAfterBackoff, fmt.Errorf("unexpected object in isSchedulableAfterClaimChange: %w", err)
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterClaimChange: %w", err)
}
usesClaim := false
@@ -339,7 +339,7 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
if originalClaim == nil {
logger.V(4).Info("claim for pod got created", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
return framework.QueueImmediately, nil
return framework.Queue, nil
}
// Modifications may or may not be relevant. If the entire
@@ -357,7 +357,7 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
}
logger.V(4).Info("status of claim for pod got updated", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
return framework.QueueImmediately, nil
return framework.Queue, nil
}
// isSchedulableAfterPodSchedulingContextChange is invoked for all
@@ -376,7 +376,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
oldPodScheduling, newPodScheduling, err := schedutil.As[*resourcev1alpha2.PodSchedulingContext](oldObj, newObj)
if err != nil {
// Shouldn't happen.
return framework.QueueAfterBackoff, fmt.Errorf("unexpected object in isSchedulableAfterPodSchedulingContextChange: %w", err)
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterPodSchedulingContextChange: %w", err)
}
podScheduling := newPodScheduling // Never nil because deletes are handled above.
@@ -423,7 +423,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
len(oldPodScheduling.Status.ResourceClaims) < len(podScheduling.Status.ResourceClaims) /* new information and not incomplete (checked above) */ {
// This definitely is new information for the scheduler. Try again immediately.
logger.V(4).Info("PodSchedulingContext for pod has all required information, schedule immediately", "pod", klog.KObj(pod))
return framework.QueueImmediately, nil
return framework.Queue, nil
}
// The other situation where the scheduler needs to do
@@ -448,7 +448,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
for _, claimStatus := range podScheduling.Status.ResourceClaims {
if sliceContains(claimStatus.UnsuitableNodes, podScheduling.Spec.SelectedNode) {
logger.V(5).Info("PodSchedulingContext has unsuitable selected node, schedule immediately", "pod", klog.KObj(pod), "selectedNode", podScheduling.Spec.SelectedNode, "podResourceName", claimStatus.Name)
return framework.QueueImmediately, nil
return framework.Queue, nil
}
}
}
@@ -463,7 +463,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
// Once we get here, all changes which are known to require special responses
// have been checked for. Whatever the change was, we don't know exactly how
// to handle it and thus return QueueAfterBackoff. This will cause the
// to handle it and thus return Queue. This will cause the
// scheduler to treat the event as if no event hint callback had been provided.
// Developers who want to investigate this can enable a diff at log level 6.
if loggerV := logger.V(6); loggerV.Enabled() {
@@ -471,7 +471,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
} else {
logger.V(5).Info("PodSchedulingContext for pod with unknown changes, maybe schedule", "pod", klog.KObj(pod))
}
return framework.QueueAfterBackoff, nil
return framework.Queue, nil
}
@@ -961,7 +961,7 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
if err := state.podSchedulingState.publish(ctx, pod, pl.clientset); err != nil {
return statusError(logger, err)
}
return statusUnschedulable(logger, "waiting for resource driver to allocate resource", "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
return statusPending(logger, "waiting for resource driver to allocate resource", "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
}
}
@@ -977,7 +977,7 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
// provisioning? On the one hand, volume provisioning is currently
// irreversible, so it better should come last. On the other hand,
// triggering both in parallel might be faster.
return statusUnschedulable(logger, "waiting for resource driver to provide information", "pod", klog.KObj(pod))
return statusPending(logger, "waiting for resource driver to provide information", "pod", klog.KObj(pod))
}
func containsNode(hay []string, needle string) bool {
@@ -1073,6 +1073,21 @@ func statusUnschedulable(logger klog.Logger, reason string, kv ...interface{}) *
return framework.NewStatus(framework.UnschedulableAndUnresolvable, reason)
}
// statusPending ensures that there is a log message associated with the
// line where the status originated.
func statusPending(logger klog.Logger, reason string, kv ...interface{}) *framework.Status {
if loggerV := logger.V(5); loggerV.Enabled() {
helper, loggerV := loggerV.WithCallStackHelper()
helper()
kv = append(kv, "reason", reason)
// nolint: logcheck // warns because it cannot check key/values
loggerV.Info("pod waiting for external component", kv...)
}
// When we return Pending, we want to block the Pod at the same time.
return framework.NewStatus(framework.Pending, reason)
}
// statusError ensures that there is a log message associated with the
// line where the error originated.
func statusError(logger klog.Logger, err error, kv ...interface{}) *framework.Status {

View File

@@ -346,7 +346,7 @@ func TestPlugin(t *testing.T) {
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
want: want{
reserve: result{
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `waiting for resource driver to allocate resource`),
status: framework.NewStatus(framework.Pending, `waiting for resource driver to allocate resource`),
added: []metav1.Object{schedulingSelectedPotential},
},
},
@@ -360,7 +360,7 @@ func TestPlugin(t *testing.T) {
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
want: want{
reserve: result{
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `waiting for resource driver to provide information`),
status: framework.NewStatus(framework.Pending, `waiting for resource driver to provide information`),
added: []metav1.Object{schedulingPotential},
},
},
@@ -374,7 +374,7 @@ func TestPlugin(t *testing.T) {
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
want: want{
reserve: result{
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `waiting for resource driver to allocate resource`),
status: framework.NewStatus(framework.Pending, `waiting for resource driver to allocate resource`),
changes: change{
scheduling: func(in *resourcev1alpha2.PodSchedulingContext) *resourcev1alpha2.PodSchedulingContext {
return st.FromPodSchedulingContexts(in).
@@ -925,7 +925,7 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
"queue-on-add": {
pod: podWithClaimName,
newObj: pendingImmediateClaim,
expectedHint: framework.QueueImmediately,
expectedHint: framework.Queue,
},
"backoff-wrong-old-object": {
pod: podWithClaimName,
@@ -953,7 +953,7 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
claim.Status.Allocation = &resourcev1alpha2.AllocationResult{}
return claim
}(),
expectedHint: framework.QueueImmediately,
expectedHint: framework.Queue,
},
}
@@ -1051,7 +1051,7 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim},
oldObj: scheduling,
newObj: schedulingInfo,
expectedHint: framework.QueueImmediately,
expectedHint: framework.Queue,
},
"queue-bad-selected-node": {
pod: podWithClaimTemplateInStatus,
@@ -1067,7 +1067,7 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
scheduling.Status.ResourceClaims[0].UnsuitableNodes = append(scheduling.Status.ResourceClaims[0].UnsuitableNodes, scheduling.Spec.SelectedNode)
return scheduling
}(),
expectedHint: framework.QueueImmediately,
expectedHint: framework.Queue,
},
"skip-spec-changes": {
pod: podWithClaimTemplateInStatus,
@@ -1089,7 +1089,7 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
scheduling.Finalizers = append(scheduling.Finalizers, "foo")
return scheduling
}(),
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
},
}

View File

@@ -94,7 +94,7 @@ func (pl *NodeAffinity) EventsToRegister() []framework.ClusterEventWithHint {
func (pl *NodeAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
if err != nil {
return framework.QueueAfterBackoff, err
return framework.Queue, err
}
if pl.addedNodeSelector != nil && !pl.addedNodeSelector.Match(modifiedNode) {
@@ -105,7 +105,7 @@ func (pl *NodeAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1
requiredNodeAffinity := nodeaffinity.GetRequiredNodeAffinity(pod)
isMatched, err := requiredNodeAffinity.Match(modifiedNode)
if err != nil {
return framework.QueueAfterBackoff, err
return framework.Queue, err
}
if !isMatched {
logger.V(4).Info("node was created or updated, but doesn't matches with the pod's NodeAffinity", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
@@ -116,14 +116,14 @@ func (pl *NodeAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1
if originalNode != nil {
wasMatched, err = requiredNodeAffinity.Match(originalNode)
if err != nil {
return framework.QueueAfterBackoff, err
return framework.Queue, err
}
}
if !wasMatched {
// This modification makes this Node match with Pod's NodeAffinity.
logger.V(4).Info("node was created or updated, and matches with the pod's NodeAffinity", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.QueueAfterBackoff, nil
return framework.Queue, nil
}
logger.V(4).Info("node was created or updated, but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))

View File

@@ -1189,7 +1189,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
args: &config.NodeAffinityArgs{},
pod: podWithNodeAffinity.Obj(),
newObj: "not-a-node",
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
expectedErr: true,
},
"backoff-wrong-old-object": {
@@ -1197,7 +1197,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
pod: podWithNodeAffinity.Obj(),
oldObj: "not-a-node",
newObj: st.MakeNode().Obj(),
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
expectedErr: true,
},
"skip-queue-on-add": {
@@ -1210,7 +1210,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
args: &config.NodeAffinityArgs{},
pod: podWithNodeAffinity.Obj(),
newObj: st.MakeNode().Label("foo", "bar").Obj(),
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
},
"skip-unrelated-changes": {
args: &config.NodeAffinityArgs{},
@@ -1245,7 +1245,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
pod: podWithNodeAffinity.Obj(),
oldObj: st.MakeNode().Obj(),
newObj: st.MakeNode().Label("foo", "bar").Obj(),
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
},
"skip-queue-on-add-scheduler-enforced-node-affinity": {
args: &config.NodeAffinityArgs{
@@ -1289,7 +1289,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
},
pod: podWithNodeAffinity.Obj(),
newObj: st.MakeNode().Label("foo", "bar").Obj(),
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
},
}

View File

@@ -120,7 +120,7 @@ func (pl *NodePorts) EventsToRegister() []framework.ClusterEventWithHint {
// See: https://github.com/kubernetes/kubernetes/issues/109437
// And, we can remove NodeUpdated event once https://github.com/kubernetes/kubernetes/issues/110175 is solved.
// We don't need the QueueingHintFn here because the scheduling of Pods will be always retried with backoff when this Event happens.
// (the same as QueueAfterBackoff)
// (the same as Queue)
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.Update}},
}
}
@@ -130,7 +130,7 @@ func (pl *NodePorts) EventsToRegister() []framework.ClusterEventWithHint {
func (pl *NodePorts) isSchedulableAfterPodDeleted(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
deletedPod, _, err := util.As[*v1.Pod](oldObj, nil)
if err != nil {
return framework.QueueAfterBackoff, err
return framework.Queue, err
}
// If the deleted pod is unscheduled, it doesn't make the target pod schedulable.
@@ -163,8 +163,8 @@ func (pl *NodePorts) isSchedulableAfterPodDeleted(logger klog.Logger, pod *v1.Po
return framework.QueueSkip, nil
}
logger.V(4).Info("the deleted pod and the target pod have any common port(s), returning QueueAfterBackoff as deleting this Pod may make the Pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
return framework.QueueAfterBackoff, nil
logger.V(4).Info("the deleted pod and the target pod have any common port(s), returning Queue as deleting this Pod may make the Pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
return framework.Queue, nil
}
// Filter invoked at the filter extension point.

View File

@@ -313,7 +313,7 @@ func Test_isSchedulableAfterPodDeleted(t *testing.T) {
"backoff-wrong-old-object": {
pod: podWithHostPort.Obj(),
oldObj: "not-a-pod",
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
expectedErr: true,
},
"skip-queue-on-unscheduled": {
@@ -334,7 +334,7 @@ func Test_isSchedulableAfterPodDeleted(t *testing.T) {
"queue-on-released-hostport": {
pod: podWithHostPort.Obj(),
oldObj: st.MakePod().Node("fake-node").HostPort(8080).Obj(),
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
},
}

View File

@@ -61,7 +61,7 @@ func (pl *NodeUnschedulable) isSchedulableAfterNodeChange(logger klog.Logger, po
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
if err != nil {
logger.Error(err, "unexpected objects in isSchedulableAfterNodeChange", "oldObj", oldObj, "newObj", newObj)
return framework.QueueAfterBackoff, err
return framework.Queue, err
}
originalNodeSchedulable, modifiedNodeSchedulable := false, !modifiedNode.Spec.Unschedulable
@@ -71,7 +71,7 @@ func (pl *NodeUnschedulable) isSchedulableAfterNodeChange(logger klog.Logger, po
if !originalNodeSchedulable && modifiedNodeSchedulable {
logger.V(4).Info("node was created or updated, pod may be schedulable now", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.QueueAfterBackoff, nil
return framework.Queue, nil
}
logger.V(4).Info("node was created or updated, but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))

View File

@@ -98,7 +98,7 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
name: "backoff-wrong-new-object",
pod: &v1.Pod{},
newObj: "not-a-node",
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
expectedErr: true,
},
{
@@ -110,7 +110,7 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
},
},
oldObj: "not-a-node",
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
expectedErr: true,
},
{
@@ -131,7 +131,7 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
Unschedulable: false,
},
},
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
},
{
name: "skip-unrelated-change",
@@ -167,7 +167,7 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
Unschedulable: true,
},
},
expectedHint: framework.QueueAfterBackoff,
expectedHint: framework.Queue,
},
}