From 4bc2ad6eea72f0413c52cd7c1287cfdb40d63b90 Mon Sep 17 00:00:00 2001 From: Ed Bartosh Date: Sun, 2 Feb 2025 17:36:18 +0200 Subject: [PATCH] migrate pkg/kubelet/preemption to contextual logging --- hack/golangci-hints.yaml | 1 + hack/golangci.yaml | 1 + hack/logcheck.conf | 1 + .../admission_failure_handler_stub.go | 6 ++++-- pkg/kubelet/lifecycle/predicate.go | 21 ++++++++++++------- pkg/kubelet/preemption/preemption.go | 14 +++++++------ pkg/kubelet/preemption/preemption_test.go | 4 +++- 7 files changed, 31 insertions(+), 17 deletions(-) diff --git a/hack/golangci-hints.yaml b/hack/golangci-hints.yaml index 2aacd445a7e..1af130c19eb 100644 --- a/hack/golangci-hints.yaml +++ b/hack/golangci-hints.yaml @@ -215,6 +215,7 @@ linters: contextual k8s.io/kubernetes/pkg/kubelet/kubeletconfig/.* contextual k8s.io/kubernetes/pkg/kubelet/nodeshutdown/.* contextual k8s.io/kubernetes/pkg/kubelet/pod/.* + contextual k8s.io/kubernetes/pkg/kubelet/preemption/.* # As long as contextual logging is alpha or beta, all WithName, WithValues, # NewContext calls have to go through klog. Once it is GA, we can lift diff --git a/hack/golangci.yaml b/hack/golangci.yaml index 90a7c7bfc37..31fc452c136 100644 --- a/hack/golangci.yaml +++ b/hack/golangci.yaml @@ -229,6 +229,7 @@ linters: contextual k8s.io/kubernetes/pkg/kubelet/kubeletconfig/.* contextual k8s.io/kubernetes/pkg/kubelet/nodeshutdown/.* contextual k8s.io/kubernetes/pkg/kubelet/pod/.* + contextual k8s.io/kubernetes/pkg/kubelet/preemption/.* # As long as contextual logging is alpha or beta, all WithName, WithValues, # NewContext calls have to go through klog. Once it is GA, we can lift diff --git a/hack/logcheck.conf b/hack/logcheck.conf index bdd11b8c525..0b99be8f5c7 100644 --- a/hack/logcheck.conf +++ b/hack/logcheck.conf @@ -61,6 +61,7 @@ contextual k8s.io/kubernetes/pkg/kubelet/apis/.* contextual k8s.io/kubernetes/pkg/kubelet/kubeletconfig/.* contextual k8s.io/kubernetes/pkg/kubelet/nodeshutdown/.* contextual k8s.io/kubernetes/pkg/kubelet/pod/.* +contextual k8s.io/kubernetes/pkg/kubelet/preemption/.* # As long as contextual logging is alpha or beta, all WithName, WithValues, # NewContext calls have to go through klog. Once it is GA, we can lift diff --git a/pkg/kubelet/lifecycle/admission_failure_handler_stub.go b/pkg/kubelet/lifecycle/admission_failure_handler_stub.go index d22df740955..2dc5a1944e4 100644 --- a/pkg/kubelet/lifecycle/admission_failure_handler_stub.go +++ b/pkg/kubelet/lifecycle/admission_failure_handler_stub.go @@ -17,7 +17,9 @@ limitations under the License. package lifecycle import ( - "k8s.io/api/core/v1" + "context" + + v1 "k8s.io/api/core/v1" ) // AdmissionFailureHandlerStub is an AdmissionFailureHandler that does not perform any handling of admission failure. @@ -32,6 +34,6 @@ func NewAdmissionFailureHandlerStub() *AdmissionFailureHandlerStub { } // HandleAdmissionFailure simply passes admission rejection on, with no special handling. -func (n *AdmissionFailureHandlerStub) HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error) { +func (n *AdmissionFailureHandlerStub) HandleAdmissionFailure(ctx context.Context, admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error) { return failureReasons, nil } diff --git a/pkg/kubelet/lifecycle/predicate.go b/pkg/kubelet/lifecycle/predicate.go index a505faca694..de5ae81f703 100644 --- a/pkg/kubelet/lifecycle/predicate.go +++ b/pkg/kubelet/lifecycle/predicate.go @@ -17,6 +17,7 @@ limitations under the License. package lifecycle import ( + "context" "fmt" "runtime" @@ -92,7 +93,7 @@ type pluginResourceUpdateFuncType func(*schedulerframework.NodeInfo, *PodAdmitAt // AdmissionFailureHandler is an interface which defines how to deal with a failure to admit a pod. // This allows for the graceful handling of pod admission failure. type AdmissionFailureHandler interface { - HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error) + HandleAdmissionFailure(ctx context.Context, admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error) } type predicateAdmitHandler struct { @@ -114,6 +115,10 @@ func NewPredicateAdmitHandler(getNodeAnyWayFunc getNodeAnyWayFuncType, admission } func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult { + // TODO: pass context to Admit when migrating this component to + // contextual logging + ctx := context.TODO() + logger := klog.FromContext(ctx) node, err := w.getNodeAnyWayFunc() if err != nil { klog.ErrorS(err, "Cannot get Node info") @@ -158,7 +163,7 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult // ensure the node has enough plugin resources for that required in pods if err = w.pluginResourceUpdateFunc(nodeInfo, attrs); err != nil { message := fmt.Sprintf("Update plugin resources failed due to %v, which is unexpected.", err) - klog.InfoS("Failed to admit pod", "pod", klog.KObj(admitPod), "message", message) + logger.Info("Failed to admit pod", "pod", klog.KObj(admitPod), "message", message) return PodAdmitResult{ Admit: false, Reason: UnexpectedAdmissionError, @@ -179,11 +184,11 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult reasons := generalFilter(podWithoutMissingExtendedResources, nodeInfo) fit := len(reasons) == 0 if !fit { - reasons, err = w.admissionFailureHandler.HandleAdmissionFailure(admitPod, reasons) + reasons, err = w.admissionFailureHandler.HandleAdmissionFailure(ctx, admitPod, reasons) fit = len(reasons) == 0 && err == nil if err != nil { message := fmt.Sprintf("Unexpected error while attempting to recover from admission failure: %v", err) - klog.InfoS("Failed to admit pod, unexpected error while attempting to recover from admission failure", "pod", klog.KObj(admitPod), "err", err) + logger.Info("Failed to admit pod, unexpected error while attempting to recover from admission failure", "pod", klog.KObj(admitPod), "err", err) return PodAdmitResult{ Admit: fit, Reason: UnexpectedAdmissionError, @@ -196,7 +201,7 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult var message string if len(reasons) == 0 { message = fmt.Sprint("GeneralPredicates failed due to unknown reason, which is unexpected.") - klog.InfoS("Failed to admit pod: GeneralPredicates failed due to unknown reason, which is unexpected", "pod", klog.KObj(admitPod)) + logger.Info("Failed to admit pod: GeneralPredicates failed due to unknown reason, which is unexpected", "pod", klog.KObj(admitPod)) return PodAdmitResult{ Admit: fit, Reason: UnknownReason, @@ -209,7 +214,7 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult case *PredicateFailureError: reason = re.PredicateName message = re.Error() - klog.V(2).InfoS("Predicate failed on Pod", "pod", klog.KObj(admitPod), "err", message) + logger.V(2).Info("Predicate failed on Pod", "pod", klog.KObj(admitPod), "err", message) case *InsufficientResourceError: switch re.ResourceName { case v1.ResourceCPU: @@ -224,11 +229,11 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult reason = fmt.Sprintf("%s%s", InsufficientResourcePrefix, re.ResourceName) } message = re.Error() - klog.V(2).InfoS("Predicate failed on Pod", "pod", klog.KObj(admitPod), "err", message) + logger.V(2).Info("Predicate failed on Pod", "pod", klog.KObj(admitPod), "err", message) default: reason = UnexpectedPredicateFailureType message = fmt.Sprintf("GeneralPredicates failed due to %v, which is unexpected.", r) - klog.InfoS("Failed to admit pod", "pod", klog.KObj(admitPod), "err", message) + logger.Info("Failed to admit pod", "pod", klog.KObj(admitPod), "err", message) } return PodAdmitResult{ Admit: fit, diff --git a/pkg/kubelet/preemption/preemption.go b/pkg/kubelet/preemption/preemption.go index c60316ae3c3..322b757aaba 100644 --- a/pkg/kubelet/preemption/preemption.go +++ b/pkg/kubelet/preemption/preemption.go @@ -17,6 +17,7 @@ limitations under the License. package preemption import ( + "context" "fmt" "math" @@ -60,7 +61,7 @@ func NewCriticalPodAdmissionHandler(getPodsFunc eviction.ActivePodsFunc, killPod // HandleAdmissionFailure gracefully handles admission rejection, and, in some cases, // to allow admission of the pod despite its previous failure. -func (c *CriticalPodAdmissionHandler) HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []lifecycle.PredicateFailureReason) ([]lifecycle.PredicateFailureReason, error) { +func (c *CriticalPodAdmissionHandler) HandleAdmissionFailure(ctx context.Context, admitPod *v1.Pod, failureReasons []lifecycle.PredicateFailureReason) ([]lifecycle.PredicateFailureReason, error) { if !kubetypes.IsCriticalPod(admitPod) { return failureReasons, nil } @@ -82,7 +83,7 @@ func (c *CriticalPodAdmissionHandler) HandleAdmissionFailure(admitPod *v1.Pod, f // Return only reasons that are not resource related, since critical pods cannot fail admission for resource reasons. return nonResourceReasons, nil } - err := c.evictPodsToFreeRequests(admitPod, admissionRequirementList(resourceReasons)) + err := c.evictPodsToFreeRequests(ctx, admitPod, admissionRequirementList(resourceReasons)) // if no error is returned, preemption succeeded and the pod is safe to admit. return nil, err } @@ -90,7 +91,8 @@ func (c *CriticalPodAdmissionHandler) HandleAdmissionFailure(admitPod *v1.Pod, f // evictPodsToFreeRequests takes a list of insufficient resources, and attempts to free them by evicting pods // based on requests. For example, if the only insufficient resource is 200Mb of memory, this function could // evict a pod with request=250Mb. -func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod, insufficientResources admissionRequirementList) error { +func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(ctx context.Context, admitPod *v1.Pod, insufficientResources admissionRequirementList) error { + logger := klog.FromContext(ctx) podsToPreempt, err := getPodsToPreempt(admitPod, c.getPodsFunc(), insufficientResources) if err != nil { return fmt.Errorf("preemption: error finding a set of pods to preempt: %v", err) @@ -99,7 +101,7 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod, // record that we are evicting the pod c.recorder.Eventf(pod, v1.EventTypeWarning, events.PreemptContainer, message) // this is a blocking call and should only return when the pod and its containers are killed. - klog.V(2).InfoS("Preempting pod to free up resources", "pod", klog.KObj(pod), "podUID", pod.UID, "insufficientResources", insufficientResources.toString(), "requestingPod", klog.KObj(admitPod)) + logger.V(2).Info("Preempting pod to free up resources", "pod", klog.KObj(pod), "podUID", pod.UID, "insufficientResources", insufficientResources.toString(), "requestingPod", klog.KObj(admitPod)) err := c.killPodFunc(pod, true, nil, func(status *v1.PodStatus) { status.Phase = v1.PodFailed status.Reason = events.PreemptContainer @@ -113,7 +115,7 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod, }) }) if err != nil { - klog.ErrorS(err, "Failed to evict pod", "pod", klog.KObj(pod)) + logger.Error(err, "Failed to evict pod", "pod", klog.KObj(pod)) // In future syncPod loops, the kubelet will retry the pod deletion steps that it was stuck on. continue } @@ -122,7 +124,7 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod, } else { metrics.Preemptions.WithLabelValues("").Inc() } - klog.InfoS("Pod evicted successfully", "pod", klog.KObj(pod)) + logger.Info("Pod evicted successfully", "pod", klog.KObj(pod)) } return nil } diff --git a/pkg/kubelet/preemption/preemption_test.go b/pkg/kubelet/preemption/preemption_test.go index 297f9852e94..74591a59c63 100644 --- a/pkg/kubelet/preemption/preemption_test.go +++ b/pkg/kubelet/preemption/preemption_test.go @@ -26,6 +26,7 @@ import ( "k8s.io/client-go/tools/record" kubeapi "k8s.io/kubernetes/pkg/apis/core" "k8s.io/kubernetes/pkg/apis/scheduling" + "k8s.io/kubernetes/test/utils/ktesting" ) const ( @@ -91,6 +92,7 @@ func getTestCriticalPodAdmissionHandler(podProvider *fakePodProvider, podKiller } func TestEvictPodsToFreeRequests(t *testing.T) { + tCtx := ktesting.Init(t) type testRun struct { testName string isPodKillerWithError bool @@ -144,7 +146,7 @@ func TestEvictPodsToFreeRequests(t *testing.T) { podKiller := newFakePodKiller(r.isPodKillerWithError) criticalPodAdmissionHandler := getTestCriticalPodAdmissionHandler(podProvider, podKiller) podProvider.setPods(r.inputPods) - outErr := criticalPodAdmissionHandler.evictPodsToFreeRequests(allPods[clusterCritical], r.insufficientResources) + outErr := criticalPodAdmissionHandler.evictPodsToFreeRequests(tCtx, allPods[clusterCritical], r.insufficientResources) outputPods := podKiller.getKilledPods() if !r.expectErr && outErr != nil { t.Errorf("evictPodsToFreeRequests returned an unexpected error during the %s test. Err: %v", r.testName, outErr)