From 1214dc223c7fd39262d09e96b8045965c723dbbb Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Tue, 25 Feb 2025 08:09:18 +0000 Subject: [PATCH] kubelet: Use node addresses from informer The kubelet certificate manager was using a closure to get the node addresses, but this closure depended on a static field that was only updated during the node status update. This created a twisted dependency between the node.status reconcile loops and the certificate manager. This commit fixes this issue by using the node informer to get the node addresses directly. This ensures that the kubelet always requests a certificate with the latest node addresses. --- pkg/kubelet/kubelet.go | 3 - pkg/kubelet/kubelet_getters.go | 10 ++ pkg/kubelet/kubelet_getters_test.go | 181 ++++++++++++++++++++++++++++ pkg/kubelet/kubelet_node_status.go | 12 -- 4 files changed, 191 insertions(+), 15 deletions(-) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 617b636d44b..2a1a3e4aa2e 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1096,9 +1096,6 @@ type Kubelet struct { rootDirectory string podLogsDirectory string - lastObservedNodeAddressesMux sync.RWMutex - lastObservedNodeAddresses []v1.NodeAddress - // onRepeatedHeartbeatFailure is called when a heartbeat operation fails more than once. optional. onRepeatedHeartbeatFailure func() diff --git a/pkg/kubelet/kubelet_getters.go b/pkg/kubelet/kubelet_getters.go index 491ce594dd3..2928673b6c3 100644 --- a/pkg/kubelet/kubelet_getters.go +++ b/pkg/kubelet/kubelet_getters.go @@ -480,3 +480,13 @@ func (kl *Kubelet) setCachedMachineInfo(info *cadvisorapiv1.MachineInfo) { defer kl.machineInfoLock.Unlock() kl.machineInfo = info } + +// getLastStableNodeAddresses returns the last observed node addresses. +func (kl *Kubelet) getLastObservedNodeAddresses() []v1.NodeAddress { + node, err := kl.GetNode() + if err != nil || node == nil { + klog.V(4).InfoS("fail to obtain node from local cache", "node", kl.nodeName, "error", err) + return nil + } + return node.Status.Addresses +} diff --git a/pkg/kubelet/kubelet_getters_test.go b/pkg/kubelet/kubelet_getters_test.go index b66bc23d2c6..dca460fadd6 100644 --- a/pkg/kubelet/kubelet_getters_test.go +++ b/pkg/kubelet/kubelet_getters_test.go @@ -21,6 +21,10 @@ import ( "testing" "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + cloudproviderapi "k8s.io/cloud-provider/api" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) @@ -132,3 +136,180 @@ func TestHandlerSupportsUserNamespaces(t *testing.T) { assert.False(t, got) assert.Error(t, err) } + +func Test_getLastObservedNodeAddresses(t *testing.T) { + testCases := []struct { + name string + nodeName types.NodeName + node *v1.Node + externalCloudProvider bool + expectedAddrs []v1.NodeAddress + }{ + { + name: "node not found", + nodeName: "test-node", + node: nil, + expectedAddrs: nil, + }, + { + name: "empty addresses", + nodeName: "test-node", + node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + Status: v1.NodeStatus{Addresses: []v1.NodeAddress{}}, + }, + expectedAddrs: nil, + }, + { + name: "no taints", + nodeName: "test-node", + node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + Status: v1.NodeStatus{ + Addresses: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + }, + expectedAddrs: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + { + name: "other taints", + nodeName: "test-node", + node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + Spec: v1.NodeSpec{ + Taints: []v1.Taint{ + {Key: "other-taint", Effect: v1.TaintEffectNoSchedule}, + }, + }, + Status: v1.NodeStatus{ + Addresses: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + }, + expectedAddrs: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + { + name: "external cloud provider no taints", + nodeName: "test-node", + externalCloudProvider: true, + node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + Status: v1.NodeStatus{ + Addresses: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + }, + expectedAddrs: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + { + name: "no external cloud provider with external cloud provider taint", + nodeName: "test-node", + externalCloudProvider: false, + node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + Spec: v1.NodeSpec{ + Taints: []v1.Taint{ + {Key: cloudproviderapi.TaintExternalCloudProvider, Effect: v1.TaintEffectNoSchedule}, + }, + }, + Status: v1.NodeStatus{ + Addresses: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + }, + expectedAddrs: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + { + name: "external cloud provider taint", + nodeName: "test-node", + externalCloudProvider: true, + node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + Spec: v1.NodeSpec{ + Taints: []v1.Taint{ + {Key: cloudproviderapi.TaintExternalCloudProvider, Effect: v1.TaintEffectNoSchedule}, + }, + }, + Status: v1.NodeStatus{ + Addresses: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + }, + expectedAddrs: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }}, + { + name: "external cloud provider other taint", + nodeName: "test-node", + externalCloudProvider: true, + node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + Spec: v1.NodeSpec{ + Taints: []v1.Taint{ + {Key: "other-taint", Effect: v1.TaintEffectNoSchedule}, + }, + }, + Status: v1.NodeStatus{ + Addresses: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + }, + expectedAddrs: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeHostName, Address: "test-node"}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + defer testKubelet.Cleanup() + kl := testKubelet.kubelet + kl.nodeName = types.NodeName(tc.nodeName) + kl.externalCloudProvider = tc.externalCloudProvider + nodeLister := testNodeLister{} + if tc.node != nil { + nodeLister.nodes = append(nodeLister.nodes, tc.node) + } + kl.nodeLister = nodeLister + addrs := kl.getLastObservedNodeAddresses() + + if len(addrs) != len(tc.expectedAddrs) { + t.Errorf("expected %d addresses, got %d", len(tc.expectedAddrs), len(addrs)) + } else { + for i := range addrs { + if addrs[i] != tc.expectedAddrs[i] { + t.Errorf("expected address %v, got %v", tc.expectedAddrs[i], addrs[i]) + } + } + } + }) + } +} diff --git a/pkg/kubelet/kubelet_node_status.go b/pkg/kubelet/kubelet_node_status.go index ebe36617f05..b712220c78b 100644 --- a/pkg/kubelet/kubelet_node_status.go +++ b/pkg/kubelet/kubelet_node_status.go @@ -644,7 +644,6 @@ func (kl *Kubelet) patchNodeStatus(originalNode, node *v1.Node) (*v1.Node, error return nil, err } kl.lastStatusReportTime = kl.clock.Now() - kl.setLastObservedNodeAddresses(updatedNode.Status.Addresses) readyIdx, readyCondition := nodeutil.GetNodeCondition(&updatedNode.Status, v1.NodeReady) if readyIdx >= 0 && readyCondition.Status == v1.ConditionTrue { @@ -721,17 +720,6 @@ func (kl *Kubelet) setNodeStatus(ctx context.Context, node *v1.Node) { } } -func (kl *Kubelet) setLastObservedNodeAddresses(addresses []v1.NodeAddress) { - kl.lastObservedNodeAddressesMux.Lock() - defer kl.lastObservedNodeAddressesMux.Unlock() - kl.lastObservedNodeAddresses = addresses -} -func (kl *Kubelet) getLastObservedNodeAddresses() []v1.NodeAddress { - kl.lastObservedNodeAddressesMux.RLock() - defer kl.lastObservedNodeAddressesMux.RUnlock() - return kl.lastObservedNodeAddresses -} - // defaultNodeStatusFuncs is a factory that generates the default set of // setNodeStatus funcs func (kl *Kubelet) defaultNodeStatusFuncs() []func(context.Context, *v1.Node) error {