CE changes for vault-31750 (#29303)

* ce changes for vault-31750

* add changelog

* make proto

* refactor naming

* clarify error message

* update changelog

* one more time

* make proto AGAIN
This commit is contained in:
Josh Black
2025-01-09 11:58:29 -08:00
committed by GitHub
parent 36d7e0c6bd
commit f625f506ed
5 changed files with 38 additions and 6 deletions

3
changelog/29303.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:change
core (enterprise): Add tracking of performance standbys by their HA node ID so that RPC connections can be more easily cleaned up when nodes are removed.
```

View File

@@ -256,8 +256,9 @@ type RaftBackend struct {
// limits.
specialPathLimits map[string]uint64
removed *atomic.Bool
removedCallback func()
removed *atomic.Bool
removedCallback func()
removedServerCleanup func(context.Context, string) (bool, error)
}
func (b *RaftBackend) IsNodeRemoved(ctx context.Context, nodeID string) (bool, error) {
@@ -284,6 +285,23 @@ func (b *RaftBackend) RemoveSelf() error {
return b.stableStore.SetUint64(removedKey, 1)
}
func (b *RaftBackend) SetRemovedServerCleanupFunc(f func(context.Context, string) (bool, error)) {
b.l.Lock()
b.removedServerCleanup = f
b.l.Unlock()
}
func (b *RaftBackend) RemovedServerCleanup(ctx context.Context, nodeID string) (bool, error) {
b.l.RLock()
defer b.l.RUnlock()
if b.removedServerCleanup != nil {
return b.removedServerCleanup(ctx, nodeID)
}
return false, nil
}
// LeaderJoinInfo contains information required by a node to join itself as a
// follower to an existing raft cluster
type LeaderJoinInfo struct {

View File

@@ -666,6 +666,10 @@ func (d *Delegate) RemoveFailedServer(server *autopilot.Server) {
}
d.followerStates.Delete(string(server.ID))
_, err := d.RemovedServerCleanup(context.Background(), string(server.ID))
if err != nil {
d.logger.Error("failed to run cleanup", "error", err)
}
}()
}

View File

@@ -3761,10 +3761,10 @@ var File_sdk_plugin_pb_backend_proto protoreflect.FileDescriptor
var file_sdk_plugin_pb_backend_proto_rawDesc = []byte{
0x0a, 0x1b, 0x73, 0x64, 0x6b, 0x2f, 0x70, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x2f, 0x70, 0x62, 0x2f,
0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x02, 0x70,
0x62, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62,
0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f,
0x74, 0x6f, 0x1a, 0x1c, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f,
0x62, 0x75, 0x66, 0x2f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
0x62, 0x1a, 0x1c, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62,
0x75, 0x66, 0x2f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a,
0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66,
0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
0x1a, 0x17, 0x73, 0x64, 0x6b, 0x2f, 0x6c, 0x6f, 0x67, 0x69, 0x63, 0x61, 0x6c, 0x2f, 0x65, 0x76,
0x65, 0x6e, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x1a, 0x73, 0x64, 0x6b, 0x2f, 0x6c,
0x6f, 0x67, 0x69, 0x63, 0x61, 0x6c, 0x2f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x2e,

View File

@@ -268,6 +268,13 @@ func (b *SystemBackend) handleRaftRemovePeerUpdate() framework.OperationFunc {
}
b.Core.raftFollowerStates.Delete(serverID)
_, err := raftBackend.RemovedServerCleanup(ctx, serverID)
if err != nil {
// log the error but don't return it - we might get an error if we can't find the node in the cache, which
// is not an error condition in this instance.
b.logger.Info("attempted to remove node from perf standby cache but it failed, which might be fine", "server ID", serverID, "error", err)
return nil, nil
}
return nil, nil
}