diff --git a/pkg/scheduler/backend/cache/cache.go b/pkg/scheduler/backend/cache/cache.go index 99736448115..91dc0b89e23 100644 --- a/pkg/scheduler/backend/cache/cache.go +++ b/pkg/scheduler/backend/cache/cache.go @@ -24,6 +24,7 @@ import ( "time" v1 "k8s.io/api/core/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" @@ -113,7 +114,7 @@ func newNodeInfoListItem(ni *framework.NodeInfo) *nodeInfoListItem { func (cache *cacheImpl) moveNodeInfoToHead(logger klog.Logger, name string) { ni, ok := cache.nodes[name] if !ok { - logger.Error(nil, "No node info with given name found in the cache", "node", klog.KRef("", name)) + utilruntime.HandleErrorWithLogger(logger, nil, "No node info with given name found in the cache", "node", klog.KRef("", name)) return } // if the node info list item is already at the head, we are done. @@ -141,7 +142,7 @@ func (cache *cacheImpl) moveNodeInfoToHead(logger klog.Logger, name string) { func (cache *cacheImpl) removeNodeInfoFromList(logger klog.Logger, name string) { ni, ok := cache.nodes[name] if !ok { - logger.Error(nil, "No node info with given name found in the cache", "node", klog.KRef("", name)) + utilruntime.HandleErrorWithLogger(logger, nil, "No node info with given name found in the cache", "node", klog.KRef("", name)) return } @@ -288,7 +289,7 @@ func (cache *cacheImpl) updateNodeInfoSnapshotList(logger klog.Logger, snapshot snapshot.nodeInfoList = make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes) nodesList, err := cache.nodeTree.list() if err != nil { - logger.Error(err, "Error occurred while retrieving the list of names of the nodes from node tree") + utilruntime.HandleErrorWithLogger(logger, err, "Error occurred while retrieving the list of names of the nodes from node tree") } for _, nodeName := range nodesList { if nodeInfo := snapshot.nodeInfoMap[nodeName]; nodeInfo != nil { @@ -303,7 +304,7 @@ func (cache *cacheImpl) updateNodeInfoSnapshotList(logger klog.Logger, snapshot snapshot.usedPVCSet.Insert(key) } } else { - logger.Error(nil, "Node exists in nodeTree but not in NodeInfoMap, this should not happen", "node", klog.KRef("", nodeName)) + utilruntime.HandleErrorWithLogger(logger, nil, "Node exists in nodeTree but not in NodeInfoMap, this should not happen", "node", klog.KRef("", nodeName)) } } } else { @@ -465,7 +466,7 @@ func (cache *cacheImpl) removePod(logger klog.Logger, pod *v1.Pod) error { n, ok := cache.nodes[pod.Spec.NodeName] if !ok { - logger.Error(nil, "Node not found when trying to remove pod", "node", klog.KRef("", pod.Spec.NodeName), "podKey", key, "pod", klog.KObj(pod)) + utilruntime.HandleErrorWithLogger(logger, nil, "Node not found when trying to remove pod", "node", klog.KRef("", pod.Spec.NodeName), "podKey", key, "pod", klog.KObj(pod)) } else { if err := n.info.RemovePod(logger, pod); err != nil { return err @@ -497,7 +498,7 @@ func (cache *cacheImpl) AddPod(logger klog.Logger, pod *v1.Pod) error { // When assuming, we've already added the Pod to cache, // Just update here to make sure the Pod's status is up-to-date. if err = cache.updatePod(logger, currState.pod, pod); err != nil { - logger.Error(err, "Error occurred while updating pod") + utilruntime.HandleErrorWithLogger(logger, err, "Error occurred while updating pod") } if currState.pod.Spec.NodeName != pod.Spec.NodeName { // The pod was added to a different node than it was assumed to. @@ -507,7 +508,7 @@ func (cache *cacheImpl) AddPod(logger klog.Logger, pod *v1.Pod) error { case !ok: // Pod was expired. We should add it back. if err = cache.addPod(logger, pod, false); err != nil { - logger.Error(err, "Error occurred while adding pod") + utilruntime.HandleErrorWithLogger(logger, err, "Error occurred while adding pod") } default: return fmt.Errorf("pod %v(%v) was already in added state", key, klog.KObj(pod)) @@ -536,8 +537,7 @@ func (cache *cacheImpl) UpdatePod(logger klog.Logger, oldPod, newPod *v1.Pod) er } if currState.pod.Spec.NodeName != newPod.Spec.NodeName { - logger.Error(nil, "Pod updated on a different node than previously added to", "podKey", key, "pod", klog.KObj(oldPod)) - logger.Error(nil, "scheduler cache is corrupted and can badly affect scheduling decisions") + utilruntime.HandleErrorWithLogger(logger, nil, "Pod updated on a different node than previously added to. Scheduler cache is corrupted and can badly affect scheduling decisions", "podKey", key, "pod", klog.KObj(oldPod)) klog.FlushAndExit(klog.ExitFlushTimeout, 1) } return cache.updatePod(logger, oldPod, newPod) @@ -557,11 +557,11 @@ func (cache *cacheImpl) RemovePod(logger klog.Logger, pod *v1.Pod) error { return fmt.Errorf("pod %v(%v) is not found in scheduler cache, so cannot be removed from it", key, klog.KObj(pod)) } if currState.pod.Spec.NodeName != pod.Spec.NodeName { - logger.Error(nil, "Pod was added to a different node than it was assumed", "podKey", key, "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName)) + utilruntime.HandleErrorWithLogger(logger, nil, "Pod was added to a different node than it was assumed", "podKey", key, "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName)) if pod.Spec.NodeName != "" { // An empty NodeName is possible when the scheduler misses a Delete // event and it gets the last known state from the informer cache. - logger.Error(nil, "scheduler cache is corrupted and can badly affect scheduling decisions") + utilruntime.HandleErrorWithLogger(logger, nil, "Scheduler cache is corrupted and can badly affect scheduling decisions") klog.FlushAndExit(klog.ExitFlushTimeout, 1) } } @@ -736,7 +736,7 @@ func (cache *cacheImpl) cleanupAssumedPods(logger klog.Logger, now time.Time) { for key := range cache.assumedPods { ps, ok := cache.podStates[key] if !ok { - logger.Error(nil, "Key found in assumed set but not in podStates, potentially a logical error") + utilruntime.HandleErrorWithLogger(logger, nil, "Key found in assumed set but not in podStates, potentially a logical error") klog.FlushAndExit(klog.ExitFlushTimeout, 1) } if !ps.bindingFinished { @@ -746,7 +746,7 @@ func (cache *cacheImpl) cleanupAssumedPods(logger klog.Logger, now time.Time) { if cache.ttl != 0 && now.After(*ps.deadline) { logger.Info("Pod expired", "podKey", key, "pod", klog.KObj(ps.pod)) if err := cache.removePod(logger, ps.pod); err != nil { - logger.Error(err, "ExpirePod failed", "podKey", key, "pod", klog.KObj(ps.pod)) + utilruntime.HandleErrorWithLogger(logger, err, "ExpirePod failed", "podKey", key, "pod", klog.KObj(ps.pod)) } } } diff --git a/pkg/scheduler/backend/queue/active_queue.go b/pkg/scheduler/backend/queue/active_queue.go index 13a53590127..00fafbb900c 100644 --- a/pkg/scheduler/backend/queue/active_queue.go +++ b/pkg/scheduler/backend/queue/active_queue.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/klog/v2" fwk "k8s.io/kube-scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/backend/heap" @@ -289,7 +290,7 @@ func (aq *activeQueue) unlockedPop(logger klog.Logger) (*framework.QueuedPodInfo if _, ok := aq.inFlightPods[pInfo.Pod.UID]; ok { // Just report it as an error, but no need to stop the scheduler // because it likely doesn't cause any visible issues from the scheduling perspective. - logger.Error(nil, "the same pod is tracked in multiple places in the scheduler, and just discard it", "pod", klog.KObj(pInfo.Pod)) + utilruntime.HandleErrorWithLogger(logger, nil, "The same pod is tracked in multiple places in the scheduler, and just discard it", "pod", klog.KObj(pInfo.Pod)) // Just ignore/discard this duplicated pod and try to pop the next one. return aq.unlockedPop(logger) } diff --git a/pkg/scheduler/backend/queue/backoff_queue.go b/pkg/scheduler/backend/queue/backoff_queue.go index 0ad7f478680..5d20f2593ce 100644 --- a/pkg/scheduler/backend/queue/backoff_queue.go +++ b/pkg/scheduler/backend/queue/backoff_queue.go @@ -21,6 +21,7 @@ import ( "time" v1 "k8s.io/api/core/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/backend/heap" "k8s.io/kubernetes/pkg/scheduler/framework" @@ -270,7 +271,7 @@ func (bq *backoffQueue) popAllBackoffCompletedWithQueue(logger klog.Logger, queu } _, err := queue.Pop() if err != nil { - logger.Error(err, "Unable to pop pod from backoff queue despite backoff completion", "pod", klog.KObj(pod)) + utilruntime.HandleErrorWithLogger(logger, err, "Unable to pop pod from backoff queue despite backoff completion", "pod", klog.KObj(pod)) break } poppedPods = append(poppedPods, pInfo) diff --git a/pkg/scheduler/backend/queue/scheduling_queue.go b/pkg/scheduler/backend/queue/scheduling_queue.go index 5b46616d27a..73126b4cf20 100644 --- a/pkg/scheduler/backend/queue/scheduling_queue.go +++ b/pkg/scheduler/backend/queue/scheduling_queue.go @@ -37,6 +37,7 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -456,7 +457,7 @@ func (p *PriorityQueue) isPodWorthRequeuing(logger klog.Logger, pInfo *framework hintMap, ok := p.queueingHintMap[pInfo.Pod.Spec.SchedulerName] if !ok { // shouldn't reach here unless bug. - logger.Error(nil, "No QueueingHintMap is registered for this profile", "profile", pInfo.Pod.Spec.SchedulerName, "pod", klog.KObj(pInfo.Pod)) + utilruntime.HandleErrorWithLogger(logger, nil, "No QueueingHintMap is registered for this profile", "profile", pInfo.Pod.Spec.SchedulerName, "pod", klog.KObj(pInfo.Pod)) return queueAfterBackoff } @@ -480,9 +481,9 @@ func (p *PriorityQueue) isPodWorthRequeuing(logger klog.Logger, pInfo *framework // the Pod from being stuck in the unschedulable pod pool. oldObjMeta, newObjMeta, asErr := util.As[klog.KMetadata](oldObj, newObj) if asErr != nil { - logger.Error(err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod)) + utilruntime.HandleErrorWithLogger(logger, err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod)) } else { - logger.Error(err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod), "oldObj", klog.KObj(oldObjMeta), "newObj", klog.KObj(newObjMeta)) + utilruntime.HandleErrorWithLogger(logger, err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod), "oldObj", klog.KObj(oldObjMeta), "newObj", klog.KObj(newObjMeta)) } hint = fwk.Queue } @@ -588,7 +589,7 @@ func (p *PriorityQueue) runPreEnqueuePlugin(ctx context.Context, logger klog.Log pInfo.GatingPlugin = pl.Name() pInfo.GatingPluginEvents = p.pluginToEventsMap[pInfo.GatingPlugin] if s.Code() == fwk.Error { - logger.Error(s.AsError(), "Unexpected error running PreEnqueue plugin", "pod", klog.KObj(pod), "plugin", pl.Name()) + utilruntime.HandleErrorWithContext(ctx, s.AsError(), "Unexpected error running PreEnqueue plugin", "pod", klog.KObj(pod), "plugin", pl.Name()) } else { logger.V(4).Info("Status after running PreEnqueue plugin", "pod", klog.KObj(pod), "plugin", pl.Name(), "status", s) } @@ -727,7 +728,7 @@ func (p *PriorityQueue) activate(logger klog.Logger, pod *v1.Pod) bool { if pInfo == nil { // Redundant safe check. We shouldn't reach here. - logger.Error(nil, "Internal error: cannot obtain pInfo") + utilruntime.HandleErrorWithLogger(logger, nil, "Internal error: cannot obtain pInfo") return false } @@ -751,7 +752,7 @@ func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger events, err := p.activeQ.clusterEventsForPod(logger, pInfo) if err != nil { - logger.Error(err, "Error getting cluster events for pod", "pod", klog.KObj(pInfo.Pod)) + utilruntime.HandleErrorWithLogger(logger, err, "Error getting cluster events for pod", "pod", klog.KObj(pInfo.Pod)) return queueAfterBackoff } diff --git a/pkg/scheduler/eventhandlers.go b/pkg/scheduler/eventhandlers.go index 6074f60f74f..6dd9cbb8787 100644 --- a/pkg/scheduler/eventhandlers.go +++ b/pkg/scheduler/eventhandlers.go @@ -56,7 +56,7 @@ func (sched *Scheduler) addNodeToCache(obj interface{}) { logger := sched.logger node, ok := obj.(*v1.Node) if !ok { - logger.Error(nil, "Cannot convert to *v1.Node", "obj", obj) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert to *v1.Node", "obj", obj) return } @@ -70,12 +70,12 @@ func (sched *Scheduler) updateNodeInCache(oldObj, newObj interface{}) { logger := sched.logger oldNode, ok := oldObj.(*v1.Node) if !ok { - logger.Error(nil, "Cannot convert oldObj to *v1.Node", "oldObj", oldObj) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert oldObj to *v1.Node", "oldObj", oldObj) return } newNode, ok := newObj.(*v1.Node) if !ok { - logger.Error(nil, "Cannot convert newObj to *v1.Node", "newObj", newObj) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert newObj to *v1.Node", "newObj", newObj) return } @@ -110,11 +110,11 @@ func (sched *Scheduler) deleteNodeFromCache(obj interface{}) { var ok bool node, ok = t.Obj.(*v1.Node) if !ok { - logger.Error(nil, "Cannot convert to *v1.Node", "obj", t.Obj) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert to *v1.Node", "obj", t.Obj) return } default: - logger.Error(nil, "Cannot convert to *v1.Node", "obj", t) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert to *v1.Node", "obj", t) return } @@ -122,7 +122,7 @@ func (sched *Scheduler) deleteNodeFromCache(obj interface{}) { logger.V(3).Info("Delete event for node", "node", klog.KObj(node)) if err := sched.Cache.RemoveNode(logger, node); err != nil { - logger.Error(err, "Scheduler cache RemoveNode failed") + utilruntime.HandleErrorWithLogger(logger, err, "Scheduler cache RemoveNode failed") } } @@ -202,7 +202,7 @@ func (sched *Scheduler) deletePodFromSchedulingQueue(obj interface{}) { if err != nil { // This shouldn't happen, because we only accept for scheduling the pods // which specify a scheduler name that matches one of the profiles. - logger.Error(err, "Unable to get profile", "pod", klog.KObj(pod)) + utilruntime.HandleErrorWithLogger(logger, err, "Unable to get profile", "pod", klog.KObj(pod)) return } // If a waiting pod is rejected, it indicates it's previously assumed and we're @@ -233,13 +233,13 @@ func (sched *Scheduler) addPodToCache(obj interface{}) { logger := sched.logger pod, ok := obj.(*v1.Pod) if !ok { - logger.Error(nil, "Cannot convert to *v1.Pod", "obj", obj) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert to *v1.Pod", "obj", obj) return } logger.V(3).Info("Add event for scheduled pod", "pod", klog.KObj(pod)) if err := sched.Cache.AddPod(logger, pod); err != nil { - logger.Error(err, "Scheduler cache AddPod failed", "pod", klog.KObj(pod)) + utilruntime.HandleErrorWithLogger(logger, err, "Scheduler cache AddPod failed", "pod", klog.KObj(pod)) } // SchedulingQueue.AssignedPodAdded has a problem: @@ -265,18 +265,18 @@ func (sched *Scheduler) updatePodInCache(oldObj, newObj interface{}) { logger := sched.logger oldPod, ok := oldObj.(*v1.Pod) if !ok { - logger.Error(nil, "Cannot convert oldObj to *v1.Pod", "oldObj", oldObj) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert oldObj to *v1.Pod", "oldObj", oldObj) return } newPod, ok := newObj.(*v1.Pod) if !ok { - logger.Error(nil, "Cannot convert newObj to *v1.Pod", "newObj", newObj) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert newObj to *v1.Pod", "newObj", newObj) return } logger.V(4).Info("Update event for scheduled pod", "pod", klog.KObj(oldPod)) if err := sched.Cache.UpdatePod(logger, oldPod, newPod); err != nil { - logger.Error(err, "Scheduler cache UpdatePod failed", "pod", klog.KObj(oldPod)) + utilruntime.HandleErrorWithLogger(logger, err, "Scheduler cache UpdatePod failed", "pod", klog.KObj(oldPod)) } events := framework.PodSchedulingPropertiesChange(newPod, oldPod) @@ -318,17 +318,17 @@ func (sched *Scheduler) deletePodFromCache(obj interface{}) { var ok bool pod, ok = t.Obj.(*v1.Pod) if !ok { - logger.Error(nil, "Cannot convert to *v1.Pod", "obj", t.Obj) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert to *v1.Pod", "obj", t.Obj) return } default: - logger.Error(nil, "Cannot convert to *v1.Pod", "obj", t) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert to *v1.Pod", "obj", t) return } logger.V(3).Info("Delete event for scheduled pod", "pod", klog.KObj(pod)) if err := sched.Cache.RemovePod(logger, pod); err != nil { - logger.Error(err, "Scheduler cache RemovePod failed", "pod", klog.KObj(pod)) + utilruntime.HandleErrorWithLogger(logger, err, "Scheduler cache RemovePod failed", "pod", klog.KObj(pod)) } sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, framework.EventAssignedPodDelete, pod, nil, nil) @@ -461,7 +461,7 @@ func addAllEventHandlers( if resource == fwk.StorageClass && !utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) { sc, ok := obj.(*storagev1.StorageClass) if !ok { - logger.Error(nil, "Cannot convert to *storagev1.StorageClass", "obj", obj) + utilruntime.HandleErrorWithLogger(logger, nil, "Cannot convert to *storagev1.StorageClass", "obj", obj) return } @@ -603,7 +603,7 @@ func addAllEventHandlers( // - foos.v1 (2 sections) // - foo.v1.example.com (the first section should be plural) if strings.Count(string(gvk), ".") < 2 { - logger.Error(nil, "incorrect event registration", "gvk", gvk) + utilruntime.HandleErrorWithLogger(logger, nil, "Incorrect event registration", "gvk", gvk) continue } // Fall back to try dynamic informers. diff --git a/pkg/scheduler/framework/preemption/preemption.go b/pkg/scheduler/framework/preemption/preemption.go index 51d5aaa3510..6628674b77a 100644 --- a/pkg/scheduler/framework/preemption/preemption.go +++ b/pkg/scheduler/framework/preemption/preemption.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" utilerrors "k8s.io/apimachinery/pkg/util/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" corelisters "k8s.io/client-go/listers/core/v1" policylisters "k8s.io/client-go/listers/policy/v1" @@ -416,7 +417,7 @@ func (ev *Evaluator) SelectCandidate(ctx context.Context, candidates []Candidate } // We shouldn't reach here. - logger.Error(errors.New("no candidate selected"), "Should not reach here", "candidates", candidates) + utilruntime.HandleErrorWithContext(ctx, nil, "Unexpected case no candidate was selected", "candidates", candidates) // To not break the whole flow, return the first candidate. return candidates[0] } @@ -450,7 +451,7 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1. // lets scheduler find another place for them. nominatedPods := getLowerPriorityNominatedPods(logger, fh, pod, c.Name()) if err := util.ClearNominatedNodeName(ctx, cs, nominatedPods...); err != nil { - logger.Error(err, "Cannot clear 'NominatedNodeName' field") + utilruntime.HandleErrorWithContext(ctx, err, "Cannot clear 'NominatedNodeName' field") // We do not return as this error is not critical. } @@ -504,7 +505,7 @@ func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName // lets scheduler find another place for them. nominatedPods := getLowerPriorityNominatedPods(logger, ev.Handler, pod, c.Name()) if err := util.ClearNominatedNodeName(ctx, ev.Handler.ClientSet(), nominatedPods...); err != nil { - logger.Error(err, "Cannot clear 'NominatedNodeName' field from lower priority pods on the same target node", "node", c.Name()) + utilruntime.HandleErrorWithContext(ctx, err, "Cannot clear 'NominatedNodeName' field from lower priority pods on the same target node", "node", c.Name()) result = metrics.GoroutineResultError // We do not return as this error is not critical. } @@ -525,7 +526,7 @@ func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName // by all the pod removal events being ignored. ev.Handler.Parallelizer().Until(ctx, len(c.Victims().Pods)-1, preemptPod, ev.PluginName) if err := errCh.ReceiveError(); err != nil { - logger.Error(err, "Error occurred during async preemption") + utilruntime.HandleErrorWithContext(ctx, err, "Error occurred during async preemption") result = metrics.GoroutineResultError } @@ -534,7 +535,7 @@ func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName ev.mu.Unlock() if err := ev.PreemptPod(ctx, c, pod, c.Victims().Pods[len(c.Victims().Pods)-1], pluginName); err != nil { - logger.Error(err, "Error occurred during async preemption") + utilruntime.HandleErrorWithContext(ctx, err, "Error occurred during async preemption") result = metrics.GoroutineResultError } @@ -603,7 +604,7 @@ func pickOneNodeForPreemption(logger klog.Logger, nodesToVictims map[string]*ext // Get the earliest start time of all pods on the current node. earliestStartTimeOnNode := util.GetEarliestPodStartTime(nodesToVictims[node]) if earliestStartTimeOnNode == nil { - logger.Error(errors.New("earliestStartTime is nil for node"), "Should not reach here", "node", node) + utilruntime.HandleErrorWithLogger(logger, nil, "Unexpected nil earliestStartTime for node", "node", node) return int64(math.MinInt64) } // The bigger the earliestStartTimeOnNode, the higher the score. diff --git a/pkg/scheduler/framework/types.go b/pkg/scheduler/framework/types.go index f961ec03905..19fbb3d1f49 100644 --- a/pkg/scheduler/framework/types.go +++ b/pkg/scheduler/framework/types.go @@ -29,6 +29,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" utilerrors "k8s.io/apimachinery/pkg/util/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/klog/v2" @@ -841,7 +842,7 @@ func removeFromSlice(logger klog.Logger, s []*PodInfo, k string) ([]*PodInfo, *P for i := range s { tmpKey, err := GetPodKey(s[i].Pod) if err != nil { - logger.Error(err, "Cannot get pod key", "pod", klog.KObj(s[i].Pod)) + utilruntime.HandleErrorWithLogger(logger, err, "Cannot get pod key", "pod", klog.KObj(s[i].Pod)) continue } if k == tmpKey { diff --git a/pkg/scheduler/schedule_one.go b/pkg/scheduler/schedule_one.go index a838bc4ff8b..b793a7c8d53 100644 --- a/pkg/scheduler/schedule_one.go +++ b/pkg/scheduler/schedule_one.go @@ -67,7 +67,7 @@ func (sched *Scheduler) ScheduleOne(ctx context.Context) { logger := klog.FromContext(ctx) podInfo, err := sched.NextPod(logger) if err != nil { - logger.Error(err, "Error while retrieving next pod from scheduling queue") + utilruntime.HandleErrorWithContext(ctx, err, "Error while retrieving next pod from scheduling queue") return } // pod could be nil when schedulerQueue is closed @@ -180,7 +180,7 @@ func (sched *Scheduler) schedulingCycle( msg := status.Message() fitError.Diagnosis.PostFilterMsg = msg if status.Code() == fwk.Error { - logger.Error(nil, "Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", msg) + utilruntime.HandleErrorWithContext(ctx, nil, "Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", msg) } else { logger.V(5).Info("Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", msg) } @@ -213,7 +213,7 @@ func (sched *Scheduler) schedulingCycle( // trigger un-reserve to clean up state associated with the reserved Pod schedFramework.RunReservePluginsUnreserve(ctx, state, assumedPod, scheduleResult.SuggestedHost) if forgetErr := sched.Cache.ForgetPod(logger, assumedPod); forgetErr != nil { - logger.Error(forgetErr, "Scheduler cache ForgetPod failed") + utilruntime.HandleErrorWithContext(ctx, forgetErr, "Scheduler cache ForgetPod failed") } if sts.IsRejected() { @@ -237,7 +237,7 @@ func (sched *Scheduler) schedulingCycle( // trigger un-reserve to clean up state associated with the reserved Pod schedFramework.RunReservePluginsUnreserve(ctx, state, assumedPod, scheduleResult.SuggestedHost) if forgetErr := sched.Cache.ForgetPod(logger, assumedPod); forgetErr != nil { - logger.Error(forgetErr, "Scheduler cache ForgetPod failed") + utilruntime.HandleErrorWithContext(ctx, forgetErr, "Scheduler cache ForgetPod failed") } if runPermitStatus.IsRejected() { @@ -348,7 +348,7 @@ func (sched *Scheduler) handleBindingCycleError( // trigger un-reserve plugins to clean up state associated with the reserved Pod fwk.RunReservePluginsUnreserve(ctx, state, assumedPod, scheduleResult.SuggestedHost) if forgetErr := sched.Cache.ForgetPod(logger, assumedPod); forgetErr != nil { - logger.Error(forgetErr, "scheduler cache ForgetPod failed") + utilruntime.HandleErrorWithContext(ctx, forgetErr, "scheduler cache ForgetPod failed") } else { // "Forget"ing an assumed Pod in binding cycle should be treated as a PodDelete event, // as the assumed Pod had occupied a certain amount of resources in scheduler cache. @@ -485,7 +485,7 @@ func (sched *Scheduler) findNodesThatFitPod(ctx context.Context, schedFramework if len(pod.Status.NominatedNodeName) > 0 { feasibleNodes, err := sched.evaluateNominatedNode(ctx, pod, schedFramework, state, diagnosis) if err != nil { - logger.Error(err, "Evaluation failed on nominated node", "pod", klog.KObj(pod), "node", pod.Status.NominatedNodeName) + utilruntime.HandleErrorWithContext(ctx, err, "Evaluation failed on nominated node", "pod", klog.KObj(pod), "node", pod.Status.NominatedNodeName) } // Nominated node passes all the filters, scheduler is good to assign this node to the pod. if len(feasibleNodes) != 0 { @@ -991,7 +991,7 @@ func (sched *Scheduler) extendersBinding(logger klog.Logger, pod *v1.Pod, node s func (sched *Scheduler) finishBinding(logger klog.Logger, fwk framework.Framework, assumed *v1.Pod, targetNode string, status *fwk.Status) { if finErr := sched.Cache.FinishBinding(logger, assumed); finErr != nil { - logger.Error(finErr, "Scheduler cache FinishBinding failed") + utilruntime.HandleErrorWithLogger(logger, finErr, "Scheduler cache FinishBinding failed") } if !status.IsSuccess() { logger.V(1).Info("Failed to bind pod", "pod", klog.KObj(assumed)) @@ -1047,7 +1047,7 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo podInfo.PendingPlugins = fitError.Diagnosis.PendingPlugins logger.V(2).Info("Unable to schedule pod; no fit; waiting", "pod", klog.KObj(pod), "err", errMsg) } else { - logger.Error(err, "Error scheduling pod; retrying", "pod", klog.KObj(pod)) + utilruntime.HandleErrorWithContext(ctx, err, "Error scheduling pod; retrying", "pod", klog.KObj(pod)) } // Check if the Pod exists in informer cache. @@ -1068,7 +1068,7 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo // and we can't fix the validation for backwards compatibility. podInfo.PodInfo, _ = framework.NewPodInfo(cachedPod.DeepCopy()) if err := sched.SchedulingQueue.AddUnschedulableIfNotPresent(logger, podInfo, sched.SchedulingQueue.SchedulingCycle()); err != nil { - logger.Error(err, "Error occurred") + utilruntime.HandleErrorWithContext(ctx, err, "Error occurred") } calledDone = true } @@ -1096,7 +1096,7 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo Reason: reason, Message: errMsg, }, nominatingInfo); err != nil { - logger.Error(err, "Error updating pod", "pod", klog.KObj(pod)) + utilruntime.HandleErrorWithContext(ctx, err, "Error updating pod", "pod", klog.KObj(pod)) } } diff --git a/pkg/scheduler/util/assumecache/assume_cache.go b/pkg/scheduler/util/assumecache/assume_cache.go index 1c410caf604..889cb9efe3b 100644 --- a/pkg/scheduler/util/assumecache/assume_cache.go +++ b/pkg/scheduler/util/assumecache/assume_cache.go @@ -223,7 +223,7 @@ func (c *AssumeCache) add(obj interface{}) { name, err := cache.MetaNamespaceKeyFunc(obj) if err != nil { - c.logger.Error(&ObjectNameError{err}, "Add failed") + utilruntime.HandleErrorWithLogger(c.logger, &ObjectNameError{err}, "Add failed") return } @@ -235,13 +235,13 @@ func (c *AssumeCache) add(obj interface{}) { if objInfo, _ := c.getObjInfo(name); objInfo != nil { newVersion, err := c.getObjVersion(name, obj) if err != nil { - c.logger.Error(err, "Add failed: couldn't get object version") + utilruntime.HandleErrorWithLogger(c.logger, err, "Add failed: couldn't get object version") return } storedVersion, err := c.getObjVersion(name, objInfo.latestObj) if err != nil { - c.logger.Error(err, "Add failed: couldn't get stored object version") + utilruntime.HandleErrorWithLogger(c.logger, err, "Add failed: couldn't get stored object version") return } @@ -274,7 +274,7 @@ func (c *AssumeCache) delete(obj interface{}) { name, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { - c.logger.Error(&ObjectNameError{err}, "Failed to delete") + utilruntime.HandleErrorWithLogger(c.logger, &ObjectNameError{err}, "Failed to delete") return } @@ -292,7 +292,7 @@ func (c *AssumeCache) delete(obj interface{}) { objInfo := &objInfo{name: name} err = c.store.Delete(objInfo) if err != nil { - c.logger.Error(err, "Failed to delete", "description", c.description, "cacheKey", name) + utilruntime.HandleErrorWithLogger(c.logger, err, "Failed to delete", "description", c.description, "cacheKey", name) } c.pushEvent(oldObj, nil) @@ -392,7 +392,7 @@ func (c *AssumeCache) listLocked(indexObj interface{}) []interface{} { if c.indexName != "" { o, err := c.store.Index(c.indexName, &objInfo{latestObj: indexObj}) if err != nil { - c.logger.Error(err, "List index error") + utilruntime.HandleErrorWithLogger(c.logger, err, "List index error") return nil } objs = o @@ -403,7 +403,7 @@ func (c *AssumeCache) listLocked(indexObj interface{}) []interface{} { for _, obj := range objs { objInfo, ok := obj.(*objInfo) if !ok { - c.logger.Error(&WrongTypeError{TypeName: "objInfo", Object: obj}, "List error") + utilruntime.HandleErrorWithLogger(c.logger, &WrongTypeError{TypeName: "objInfo", Object: obj}, "List error") continue } allObjs = append(allObjs, objInfo.latestObj)