From 1b50c640998cc0719fcb9875116628d8c156ae0d Mon Sep 17 00:00:00 2001 From: Mike Palmiotto Date: Fri, 28 Feb 2025 09:02:38 -0500 Subject: [PATCH] identity: Perform runtime cleanup in goroutine (#29759) Previously identity cleanup was running with the context of the activation request, which would time out for large workloads, resulting in bad failure states. This PR moves the ActivationFunc call to its own goroutine/background Context, so it can proceed uninterrupted. --- vault/identity_store.go | 29 ++++++++++++------------ vault/identity_store_structs.go | 12 +++++++++- vault/identity_store_test.go | 8 +++++++ vault/identity_store_util.go | 23 +++++++++++++++---- vault/logical_system_activation_flags.go | 3 ++- 5 files changed, 55 insertions(+), 20 deletions(-) diff --git a/vault/identity_store.go b/vault/identity_store.go index 702d41cdba..9763116d7c 100644 --- a/vault/identity_store.go +++ b/vault/identity_store.go @@ -58,20 +58,21 @@ func (i *IdentityStore) resetDB() error { func NewIdentityStore(ctx context.Context, core *Core, config *logical.BackendConfig, logger log.Logger) (*IdentityStore, error) { iStore := &IdentityStore{ - view: config.StorageView, - logger: logger, - router: core.router, - redirectAddr: core.redirectAddr, - localNode: core, - namespacer: core, - metrics: core.MetricSink(), - totpPersister: core, - tokenStorer: core, - entityCreator: core, - mountLister: core, - mfaBackend: core.loginMFABackend, - aliasLocks: locksutil.CreateLocks(), - renameDuplicates: core.FeatureActivationFlags, + view: config.StorageView, + logger: logger, + router: core.router, + redirectAddr: core.redirectAddr, + localNode: core, + namespacer: core, + metrics: core.MetricSink(), + totpPersister: core, + tokenStorer: core, + entityCreator: core, + mountLister: core, + mfaBackend: core.loginMFABackend, + aliasLocks: locksutil.CreateLocks(), + renameDuplicates: core.FeatureActivationFlags, + activationErrorHandler: core, } // Create a memdb instance, which by default, operates on lower cased diff --git a/vault/identity_store_structs.go b/vault/identity_store_structs.go index fb25bda366..17316d649f 100644 --- a/vault/identity_store_structs.go +++ b/vault/identity_store_structs.go @@ -123,7 +123,11 @@ type IdentityStore struct { // renameDuplicates holds the Core reference to feature activation flags, so // we can set and query enablement of the deduplication feature. - renameDuplicates activationflags.ActivationManager + renameDuplicates activationflags.ActivationManager + activationErrorHandler Sealer + + // activateDeduplicationDone is a channel used for synchronization in testing + activateDeduplicationDone chan struct{} } type groupDiff struct { @@ -175,3 +179,9 @@ type MountLister interface { } var _ MountLister = &Core{} + +type Sealer interface { + Shutdown() error +} + +var _ Sealer = &Core{} diff --git a/vault/identity_store_test.go b/vault/identity_store_test.go index 556b04ea39..4c780d5488 100644 --- a/vault/identity_store_test.go +++ b/vault/identity_store_test.go @@ -1643,7 +1643,9 @@ func identityStoreLoadingIsDeterministic(t *testing.T, flags *determinismTestFla c.FeatureActivationFlags.ActivateInMem(activationflags.IdentityDeduplication, true) require.NoError(t, err) + c.identityStore.activateDeduplicationDone = make(chan struct{}) err := c.systemBackend.activateIdentityDeduplication(ctx, nil) + <-c.identityStore.activateDeduplicationDone require.NoError(t, err) require.IsType(t, &renameResolver{}, c.identityStore.conflictResolver) require.False(t, c.identityStore.disableLowerCasedNames) @@ -1664,6 +1666,12 @@ func identityStoreLoadingIsDeterministic(t *testing.T, flags *determinismTestFla } prevErr = err + // Try to drain the channel if it's been created (if deduplication was + // enabled), but don't block in case it wasn't! + select { + case <-c.identityStore.activateDeduplicationDone: + default: + } // Identity store should be loaded now. Check it's contents. loadedNames := []string{} diff --git a/vault/identity_store_util.go b/vault/identity_store_util.go index 7bba2d2cba..af59c0d37e 100644 --- a/vault/identity_store_util.go +++ b/vault/identity_store_util.go @@ -145,11 +145,26 @@ func (i *IdentityStore) activateDeduplication(ctx context.Context, req *logical. return fmt.Errorf("failed to reset existing identity state: %w", err) } - if err := i.loadArtifacts(ctx, i.localNode.HAState() == consts.Active); err != nil { - return fmt.Errorf("failed to activate identity deduplication: %w", err) - } + go func() { + // If we fail to load from storage, we'll end up with a broken + // IdentityStore, so we're better of just sealing and letting another node + // take over! + if err := i.loadArtifacts(ctx, i.localNode.HAState() == consts.Active); err != nil { + i.logger.Error("failed to activate identity deduplication, shutting down") + i.activationErrorHandler.Shutdown() + return + } + + // Write to the test-synchronization channel if it's been created. + // Otherwise don't block trying to write to a nil chan. + select { + case i.activateDeduplicationDone <- struct{}{}: + default: + } + + i.logger.Info("identity deduplication activated, identity store reload complete") + }() - i.logger.Info("identity deduplication activated, identity store reload complete") return nil } diff --git a/vault/logical_system_activation_flags.go b/vault/logical_system_activation_flags.go index 5f5510a829..10328a032a 100644 --- a/vault/logical_system_activation_flags.go +++ b/vault/logical_system_activation_flags.go @@ -165,7 +165,8 @@ func (b *SystemBackend) activateIdentityDeduplication(ctx context.Context, _ *lo } if err := b.idStoreBackend.ActivationFunc(ctx, nil); err != nil { - return fmt.Errorf("failed to activate identity deduplication: %w", err) + b.logger.Error("activation flag error", "error", err) } + return nil }