Wire contexts to Core controllers

This commit is contained in:
Mike Dame
2021-04-22 14:27:59 -04:00
parent 657412713b
commit 4960d0976a
61 changed files with 842 additions and 780 deletions

View File

@@ -349,6 +349,7 @@ type Controller struct {
// NewNodeLifecycleController returns a new taint controller.
func NewNodeLifecycleController(
ctx context.Context,
leaseInformer coordinformers.LeaseInformer,
podInformer coreinformers.PodInformer,
nodeInformer coreinformers.NodeInformer,
@@ -484,7 +485,7 @@ func NewNodeLifecycleController(
podGetter := func(name, namespace string) (*v1.Pod, error) { return nc.podLister.Pods(namespace).Get(name) }
nodeLister := nodeInformer.Lister()
nodeGetter := func(name string) (*v1.Node, error) { return nodeLister.Get(name) }
nc.taintManager = scheduler.NewNoExecuteTaintManager(kubeClient, podGetter, nodeGetter, nc.getPodsAssignedToNode)
nc.taintManager = scheduler.NewNoExecuteTaintManager(ctx, kubeClient, podGetter, nodeGetter, nc.getPodsAssignedToNode)
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: nodeutil.CreateAddNodeHandler(func(node *v1.Node) error {
nc.taintManager.NodeUpdated(nil, node)
@@ -532,18 +533,18 @@ func NewNodeLifecycleController(
}
// Run starts an asynchronous loop that monitors the status of cluster nodes.
func (nc *Controller) Run(stopCh <-chan struct{}) {
func (nc *Controller) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
klog.Infof("Starting node controller")
defer klog.Infof("Shutting down node controller")
if !cache.WaitForNamedCacheSync("taint", stopCh, nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) {
if !cache.WaitForNamedCacheSync("taint", ctx.Done(), nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) {
return
}
if nc.runTaintManager {
go nc.taintManager.Run(stopCh)
go nc.taintManager.Run(ctx)
}
// Close node update queue to cleanup go routine.
@@ -556,35 +557,35 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
// the item is flagged when got from queue: if new event come, the new item will
// be re-queued until "Done", so no more than one worker handle the same item and
// no event missed.
go wait.Until(nc.doNodeProcessingPassWorker, time.Second, stopCh)
go wait.UntilWithContext(ctx, nc.doNodeProcessingPassWorker, time.Second)
}
for i := 0; i < podUpdateWorkerSize; i++ {
go wait.Until(nc.doPodProcessingWorker, time.Second, stopCh)
go wait.UntilWithContext(ctx, nc.doPodProcessingWorker, time.Second)
}
if nc.runTaintManager {
// Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated
// taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints.
go wait.Until(nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod, stopCh)
go wait.UntilWithContext(ctx, nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod)
} else {
// Managing eviction of nodes:
// When we delete pods off a node, if the node was not empty at the time we then
// queue an eviction watcher. If we hit an error, retry deletion.
go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, stopCh)
go wait.UntilWithContext(ctx, nc.doEvictionPass, scheduler.NodeEvictionPeriod)
}
// Incorporate the results of node health signal pushed from kubelet to master.
go wait.Until(func() {
if err := nc.monitorNodeHealth(); err != nil {
go wait.UntilWithContext(ctx, func(ctx context.Context) {
if err := nc.monitorNodeHealth(ctx); err != nil {
klog.Errorf("Error monitoring node health: %v", err)
}
}, nc.nodeMonitorPeriod, stopCh)
}, nc.nodeMonitorPeriod)
<-stopCh
<-ctx.Done()
}
func (nc *Controller) doNodeProcessingPassWorker() {
func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
for {
obj, shutdown := nc.nodeUpdateQueue.Get()
// "nodeUpdateQueue" will be shutdown when "stopCh" closed;
@@ -593,7 +594,7 @@ func (nc *Controller) doNodeProcessingPassWorker() {
return
}
nodeName := obj.(string)
if err := nc.doNoScheduleTaintingPass(nodeName); err != nil {
if err := nc.doNoScheduleTaintingPass(ctx, nodeName); err != nil {
klog.Errorf("Failed to taint NoSchedule on node <%s>, requeue it: %v", nodeName, err)
// TODO(k82cn): Add nodeName back to the queue
}
@@ -607,7 +608,7 @@ func (nc *Controller) doNodeProcessingPassWorker() {
}
}
func (nc *Controller) doNoScheduleTaintingPass(nodeName string) error {
func (nc *Controller) doNoScheduleTaintingPass(ctx context.Context, nodeName string) error {
node, err := nc.nodeLister.Get(nodeName)
if err != nil {
// If node not found, just ignore it.
@@ -656,13 +657,13 @@ func (nc *Controller) doNoScheduleTaintingPass(nodeName string) error {
if len(taintsToAdd) == 0 && len(taintsToDel) == 0 {
return nil
}
if !nodeutil.SwapNodeControllerTaint(nc.kubeClient, taintsToAdd, taintsToDel, node) {
if !nodeutil.SwapNodeControllerTaint(ctx, nc.kubeClient, taintsToAdd, taintsToDel, node) {
return fmt.Errorf("failed to swap taints of node %+v", node)
}
return nil
}
func (nc *Controller) doNoExecuteTaintingPass() {
func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
for k := range nc.zoneNoExecuteTainter {
@@ -694,7 +695,7 @@ func (nc *Controller) doNoExecuteTaintingPass() {
return true, 0
}
result := nodeutil.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
result := nodeutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
if result {
//count the evictionsNumber
zone := utilnode.GetZoneKey(node)
@@ -706,7 +707,7 @@ func (nc *Controller) doNoExecuteTaintingPass() {
}
}
func (nc *Controller) doEvictionPass() {
func (nc *Controller) doEvictionPass(ctx context.Context) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
for k := range nc.zonePodEvictor {
@@ -724,7 +725,7 @@ func (nc *Controller) doEvictionPass() {
utilruntime.HandleError(fmt.Errorf("unable to list pods from node %q: %v", value.Value, err))
return false, 0
}
remaining, err := nodeutil.DeletePods(nc.kubeClient, pods, nc.recorder, value.Value, nodeUID, nc.daemonSetStore)
remaining, err := nodeutil.DeletePods(ctx, nc.kubeClient, pods, nc.recorder, value.Value, nodeUID, nc.daemonSetStore)
if err != nil {
// We are not setting eviction status here.
// New pods will be handled by zonePodEvictor retry
@@ -752,7 +753,7 @@ func (nc *Controller) doEvictionPass() {
// monitorNodeHealth verifies node health are constantly updated by kubelet, and
// if not, post "NodeReady==ConditionUnknown".
// This function will taint nodes who are not ready or not reachable for a long period of time.
func (nc *Controller) monitorNodeHealth() error {
func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
// We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway.
nodes, err := nc.nodeLister.List(labels.Everything())
@@ -771,7 +772,7 @@ func (nc *Controller) monitorNodeHealth() error {
nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(added[i])
if nc.runTaintManager {
nc.markNodeAsReachable(added[i])
nc.markNodeAsReachable(ctx, added[i])
} else {
nc.cancelPodEviction(added[i])
}
@@ -790,12 +791,12 @@ func (nc *Controller) monitorNodeHealth() error {
var currentReadyCondition *v1.NodeCondition
node := nodes[i].DeepCopy()
if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeHealthUpdateRetry, func() (bool, error) {
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(node)
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(ctx, node)
if err == nil {
return true, nil
}
name := node.Name
node, err = nc.kubeClient.CoreV1().Nodes().Get(context.TODO(), name, metav1.GetOptions{})
node, err = nc.kubeClient.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
if err != nil {
klog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name)
return false, err
@@ -825,9 +826,9 @@ func (nc *Controller) monitorNodeHealth() error {
continue
}
if nc.runTaintManager {
nc.processTaintBaseEviction(node, &observedReadyCondition)
nc.processTaintBaseEviction(ctx, node, &observedReadyCondition)
} else {
if err := nc.processNoTaintBaseEviction(node, &observedReadyCondition, gracePeriod, pods); err != nil {
if err := nc.processNoTaintBaseEviction(ctx, node, &observedReadyCondition, gracePeriod, pods); err != nil {
utilruntime.HandleError(fmt.Errorf("unable to evict all pods from node %v: %v; queuing for retry", node.Name, err))
}
}
@@ -839,7 +840,7 @@ func (nc *Controller) monitorNodeHealth() error {
nodeutil.RecordNodeStatusChange(nc.recorder, node, "NodeNotReady")
fallthrough
case needsRetry && observedReadyCondition.Status != v1.ConditionTrue:
if err = nodeutil.MarkPodsNotReady(nc.kubeClient, nc.recorder, pods, node.Name); err != nil {
if err = nodeutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, node.Name); err != nil {
utilruntime.HandleError(fmt.Errorf("unable to mark all pods NotReady on node %v: %v; queuing for retry", node.Name, err))
nc.nodesToRetry.Store(node.Name, struct{}{})
continue
@@ -848,12 +849,12 @@ func (nc *Controller) monitorNodeHealth() error {
}
nc.nodesToRetry.Delete(node.Name)
}
nc.handleDisruption(zoneToNodeConditions, nodes)
nc.handleDisruption(ctx, zoneToNodeConditions, nodes)
return nil
}
func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondition *v1.NodeCondition) {
func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition) {
decisionTimestamp := nc.now()
// Check eviction timeout against decisionTimestamp
switch observedReadyCondition.Status {
@@ -861,7 +862,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
taintToAdd := *NotReadyTaintTemplate
if !nodeutil.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
if !nodeutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
klog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
}
} else if nc.markNodeForTainting(node, v1.ConditionFalse) {
@@ -874,7 +875,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
taintToAdd := *UnreachableTaintTemplate
if !nodeutil.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
if !nodeutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
klog.Errorf("Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle.")
}
} else if nc.markNodeForTainting(node, v1.ConditionUnknown) {
@@ -884,7 +885,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi
)
}
case v1.ConditionTrue:
removed, err := nc.markNodeAsReachable(node)
removed, err := nc.markNodeAsReachable(ctx, node)
if err != nil {
klog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
}
@@ -894,7 +895,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi
}
}
func (nc *Controller) processNoTaintBaseEviction(node *v1.Node, observedReadyCondition *v1.NodeCondition, gracePeriod time.Duration, pods []*v1.Pod) error {
func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition, gracePeriod time.Duration, pods []*v1.Pod) error {
decisionTimestamp := nc.now()
nodeHealthData := nc.nodeHealthMap.getDeepCopy(node.Name)
if nodeHealthData == nil {
@@ -904,7 +905,7 @@ func (nc *Controller) processNoTaintBaseEviction(node *v1.Node, observedReadyCon
switch observedReadyCondition.Status {
case v1.ConditionFalse:
if decisionTimestamp.After(nodeHealthData.readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
enqueued, err := nc.evictPods(node, pods)
enqueued, err := nc.evictPods(ctx, node, pods)
if err != nil {
return err
}
@@ -919,7 +920,7 @@ func (nc *Controller) processNoTaintBaseEviction(node *v1.Node, observedReadyCon
}
case v1.ConditionUnknown:
if decisionTimestamp.After(nodeHealthData.probeTimestamp.Add(nc.podEvictionTimeout)) {
enqueued, err := nc.evictPods(node, pods)
enqueued, err := nc.evictPods(ctx, node, pods)
if err != nil {
return err
}
@@ -953,7 +954,7 @@ func isNodeExcludedFromDisruptionChecks(node *v1.Node) bool {
// tryUpdateNodeHealth checks a given node's conditions and tries to update it. Returns grace period to
// which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred.
func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
nodeHealth := nc.nodeHealthMap.getDeepCopy(node.Name)
defer func() {
nc.nodeHealthMap.set(node.Name, nodeHealth)
@@ -1102,7 +1103,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
_, currentReadyCondition = nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(context.TODO(), node, metav1.UpdateOptions{}); err != nil {
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
klog.Errorf("Error updating node %s: %v", node.Name, err)
return gracePeriod, observedReadyCondition, currentReadyCondition, err
}
@@ -1119,7 +1120,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
return gracePeriod, observedReadyCondition, currentReadyCondition, nil
}
func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
newZoneStates := map[string]ZoneState{}
allAreFullyDisrupted := true
for k, v := range zoneToNodeConditions {
@@ -1163,7 +1164,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
klog.V(0).Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.")
for i := range nodes {
if nc.runTaintManager {
_, err := nc.markNodeAsReachable(nodes[i])
_, err := nc.markNodeAsReachable(ctx, nodes[i])
if err != nil {
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
}
@@ -1227,7 +1228,7 @@ func (nc *Controller) podUpdated(oldPod, newPod *v1.Pod) {
}
}
func (nc *Controller) doPodProcessingWorker() {
func (nc *Controller) doPodProcessingWorker(ctx context.Context) {
for {
obj, shutdown := nc.podUpdateQueue.Get()
// "podUpdateQueue" will be shutdown when "stopCh" closed;
@@ -1237,7 +1238,7 @@ func (nc *Controller) doPodProcessingWorker() {
}
podItem := obj.(podUpdateItem)
nc.processPod(podItem)
nc.processPod(ctx, podItem)
}
}
@@ -1245,7 +1246,7 @@ func (nc *Controller) doPodProcessingWorker() {
// 1. for NodeReady=true node, taint eviction for this pod will be cancelled
// 2. for NodeReady=false or unknown node, taint eviction of pod will happen and pod will be marked as not ready
// 3. if node doesn't exist in cache, it will be skipped and handled later by doEvictionPass
func (nc *Controller) processPod(podItem podUpdateItem) {
func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
defer nc.podUpdateQueue.Done(podItem)
pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name)
if err != nil {
@@ -1286,7 +1287,7 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
// In taint-based eviction mode, only node updates are processed by NodeLifecycleController.
// Pods are processed by TaintManager.
if !nc.runTaintManager {
if err := nc.processNoTaintBaseEviction(node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
if err := nc.processNoTaintBaseEviction(ctx, node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err)
nc.podUpdateQueue.AddRateLimited(podItem)
return
@@ -1294,7 +1295,7 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
}
if currentReadyCondition.Status != v1.ConditionTrue {
if err := nodeutil.MarkPodsNotReady(nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
if err := nodeutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
klog.Warningf("Unable to mark pod %+v NotReady on node %v: %v.", podItem, nodeName, err)
nc.podUpdateQueue.AddRateLimited(podItem)
}
@@ -1421,14 +1422,14 @@ func (nc *Controller) cancelPodEviction(node *v1.Node) bool {
// Returns false if the node name was already enqueued.
// - deletes pods immediately if node is already marked as evicted.
// Returns false, because the node wasn't added to the queue.
func (nc *Controller) evictPods(node *v1.Node, pods []*v1.Pod) (bool, error) {
func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.Pod) (bool, error) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
status, ok := nc.nodeEvictionMap.getStatus(node.Name)
if ok && status == evicted {
// Node eviction already happened for this node.
// Handling immediate pod deletion.
_, err := nodeutil.DeletePods(nc.kubeClient, pods, nc.recorder, node.Name, string(node.UID), nc.daemonSetStore)
_, err := nodeutil.DeletePods(ctx, nc.kubeClient, pods, nc.recorder, node.Name, string(node.UID), nc.daemonSetStore)
if err != nil {
return false, fmt.Errorf("unable to delete pods from node %q: %v", node.Name, err)
}
@@ -1458,15 +1459,15 @@ func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStat
return nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].Add(node.Name, string(node.UID))
}
func (nc *Controller) markNodeAsReachable(node *v1.Node) (bool, error) {
func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
err := controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
if err != nil {
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
return false, err
}
err = controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, NotReadyTaintTemplate)
err = controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, NotReadyTaintTemplate)
if err != nil {
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
return false, err