mirror of
https://github.com/optim-enterprises-bv/vault.git
synced 2025-11-01 11:08:10 +00:00
Reduce blocking in approle tidy by grabbing only one lock at a time. (#8418)
Revamp race test to make it aware of when tidies start/stop, and to generate dangling accessors that need cleaning up. Also run for longer but with slightly less concurrency to ensure the writes and the cleanup actually overlap.
This commit is contained in:
@@ -3,7 +3,6 @@ package approle
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/vault/sdk/framework"
|
||||
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||
@@ -57,8 +56,6 @@ type backend struct {
|
||||
// secretIDListingLock is a dedicated lock for listing SecretIDAccessors
|
||||
// for all the SecretIDs issued against an approle
|
||||
secretIDListingLock sync.RWMutex
|
||||
|
||||
testTidyDelay time.Duration
|
||||
}
|
||||
|
||||
func Factory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/hashicorp/errwrap"
|
||||
"github.com/hashicorp/vault/sdk/framework"
|
||||
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||
"github.com/hashicorp/vault/sdk/helper/locksutil"
|
||||
"github.com/hashicorp/vault/sdk/logical"
|
||||
)
|
||||
|
||||
@@ -39,9 +40,20 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
s := req.Storage
|
||||
go b.tidySecretIDinternal(req.Storage)
|
||||
|
||||
go func() {
|
||||
resp := &logical.Response{}
|
||||
resp.AddWarning("Tidy operation successfully started. Any information from the operation will be printed to Vault's server logs.")
|
||||
return logical.RespondWithStatusCode(resp, req, http.StatusAccepted)
|
||||
|
||||
}
|
||||
|
||||
type tidyHelperSecretIDAccessor struct {
|
||||
secretIDAccessorStorageEntry
|
||||
saltedSecretIDAccessor string
|
||||
}
|
||||
|
||||
func (b *backend) tidySecretIDinternal(s logical.Storage) {
|
||||
defer atomic.StoreUint32(b.tidySecretIDCASGuard, 0)
|
||||
|
||||
logger := b.Logger().Named("tidy")
|
||||
@@ -49,35 +61,50 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi
|
||||
checkCount := 0
|
||||
|
||||
defer func() {
|
||||
if b.testTidyDelay > 0 {
|
||||
logger.Trace("done checking entries", "num_entries", checkCount)
|
||||
}
|
||||
}()
|
||||
|
||||
// Don't cancel when the original client request goes away
|
||||
ctx = context.Background()
|
||||
ctx := context.Background()
|
||||
|
||||
tidyFunc := func(secretIDPrefixToUse, accessorIDPrefixToUse string) error {
|
||||
logger.Trace("listing role HMACs", "prefix", secretIDPrefixToUse)
|
||||
|
||||
roleNameHMACs, err := s.List(ctx, secretIDPrefixToUse)
|
||||
salt, err := b.Salt(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
logger.Error("error tidying secret IDs", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
tidyFunc := func(secretIDPrefixToUse, accessorIDPrefixToUse string) error {
|
||||
logger.Trace("listing accessors", "prefix", accessorIDPrefixToUse)
|
||||
|
||||
// List all the accessors and add them all to a map
|
||||
// These hashes are the result of salting the accessor id.
|
||||
accessorHashes, err := s.List(ctx, accessorIDPrefixToUse)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
accessorMap := make(map[string]bool, len(accessorHashes))
|
||||
skipHashes := make(map[string]bool, len(accessorHashes))
|
||||
accHashesByLockID := make([][]tidyHelperSecretIDAccessor, 256)
|
||||
for _, accessorHash := range accessorHashes {
|
||||
accessorMap[accessorHash] = true
|
||||
var entry secretIDAccessorStorageEntry
|
||||
entryIndex := accessorIDPrefixToUse + accessorHash
|
||||
se, err := s.Get(ctx, entryIndex)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if se == nil {
|
||||
continue
|
||||
}
|
||||
err = se.DecodeJSON(&entry)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
time.Sleep(b.testTidyDelay)
|
||||
lockIdx := locksutil.LockIndexForKey(entry.SecretIDHMAC)
|
||||
accHashesByLockID[lockIdx] = append(accHashesByLockID[lockIdx], tidyHelperSecretIDAccessor{
|
||||
secretIDAccessorStorageEntry: entry,
|
||||
saltedSecretIDAccessor: accessorHash,
|
||||
})
|
||||
}
|
||||
|
||||
secretIDCleanupFunc := func(secretIDHMAC, roleNameHMAC, secretIDPrefixToUse string) error {
|
||||
checkCount++
|
||||
@@ -135,19 +162,20 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi
|
||||
return nil
|
||||
}
|
||||
|
||||
// At this point, the secret ID is not expired and is valid. Delete
|
||||
// the corresponding accessor from the accessorMap. This will leave
|
||||
// only the dangling accessors in the map which can then be cleaned
|
||||
// up later.
|
||||
salt, err := b.Salt(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
delete(accessorMap, salt.SaltID(result.SecretIDAccessor))
|
||||
// At this point, the secret ID is not expired and is valid. Flag
|
||||
// the corresponding accessor as not needing attention.
|
||||
skipHashes[salt.SaltID(result.SecretIDAccessor)] = true
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
logger.Trace("listing role HMACs", "prefix", secretIDPrefixToUse)
|
||||
|
||||
roleNameHMACs, err := s.List(ctx, secretIDPrefixToUse)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, roleNameHMAC := range roleNameHMACs {
|
||||
logger.Trace("listing secret ID HMACs", "role_hmac", roleNameHMAC)
|
||||
secretIDHMACs, err := s.List(ctx, fmt.Sprintf("%s%s", secretIDPrefixToUse, roleNameHMAC))
|
||||
@@ -164,59 +192,55 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi
|
||||
|
||||
// Accessor indexes were not getting cleaned up until 0.9.3. This is a fix
|
||||
// to clean up the dangling accessor entries.
|
||||
if len(accessorMap) > 0 {
|
||||
for _, lock := range b.secretIDLocks {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
}
|
||||
for accessorHash, _ := range accessorMap {
|
||||
logger.Trace("found dangling accessor, verifying")
|
||||
// Ideally, locking on accessors should be performed here too
|
||||
// but for that, accessors are required in plaintext, which are
|
||||
// not available. The code above helps but it may still be
|
||||
// racy.
|
||||
// ...
|
||||
// Look up the secret again now that we have all the locks. The
|
||||
// lock is held when writing accessor/secret so if we have the
|
||||
// lock we know we're not in a
|
||||
// wrote-accessor-but-not-yet-secret case, which can be racy.
|
||||
var entry secretIDAccessorStorageEntry
|
||||
entryIndex := accessorIDPrefixToUse + accessorHash
|
||||
se, err := s.Get(ctx, entryIndex)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if se != nil {
|
||||
err = se.DecodeJSON(&entry)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// The storage entry doesn't store the role ID, so we have
|
||||
// to go about this the long way; fortunately we shouldn't
|
||||
// actually hit this very often
|
||||
var found bool
|
||||
searchloop:
|
||||
if len(accessorHashes) > len(skipHashes) {
|
||||
// There is some raciness here because we're querying secretids for
|
||||
// roles without having a lock while doing so. Because
|
||||
// accHashesByLockID was populated previously, at worst this may
|
||||
// mean that we fail to clean up something we ought to.
|
||||
var allSecretIDHMACs = make(map[string]struct{})
|
||||
for _, roleNameHMAC := range roleNameHMACs {
|
||||
secretIDHMACs, err := s.List(ctx, fmt.Sprintf("%s%s", secretIDPrefixToUse, roleNameHMAC))
|
||||
secretIDHMACs, err := s.List(ctx, secretIDPrefixToUse+roleNameHMAC)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, v := range secretIDHMACs {
|
||||
if v == entry.SecretIDHMAC {
|
||||
found = true
|
||||
logger.Trace("accessor verified, not removing")
|
||||
break searchloop
|
||||
allSecretIDHMACs[v] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
tidyEntries := func(entries []tidyHelperSecretIDAccessor) error {
|
||||
for _, entry := range entries {
|
||||
// Don't clean up accessor index entry if secretid cleanup func
|
||||
// determined that it should stay.
|
||||
if _, ok := skipHashes[entry.saltedSecretIDAccessor]; ok {
|
||||
continue
|
||||
}
|
||||
if !found {
|
||||
logger.Trace("could not verify dangling accessor, removing")
|
||||
err = s.Delete(ctx, entryIndex)
|
||||
if err != nil {
|
||||
|
||||
// Don't clean up accessor index entry if referenced in role.
|
||||
if _, ok := allSecretIDHMACs[entry.SecretIDHMAC]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := s.Delete(context.Background(), accessorIDPrefixToUse+entry.saltedSecretIDAccessor); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
for lockIdx, entries := range accHashesByLockID {
|
||||
// Ideally, locking on accessors should be performed here too
|
||||
// but for that, accessors are required in plaintext, which are
|
||||
// not available.
|
||||
// ...
|
||||
// The lock is held when writing accessor/secret so if we have
|
||||
// the lock we know we're not in a
|
||||
// wrote-accessor-but-not-yet-secret case, which can be racy.
|
||||
b.secretIDLocks[lockIdx].Lock()
|
||||
err = tidyEntries(entries)
|
||||
b.secretIDLocks[lockIdx].Unlock()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -224,7 +248,7 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi
|
||||
return nil
|
||||
}
|
||||
|
||||
err := tidyFunc(secretIDPrefix, secretIDAccessorPrefix)
|
||||
err = tidyFunc(secretIDPrefix, secretIDAccessorPrefix)
|
||||
if err != nil {
|
||||
logger.Error("error tidying global secret IDs", "error", err)
|
||||
return
|
||||
@@ -234,11 +258,6 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi
|
||||
logger.Error("error tidying local secret IDs", "error", err)
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
resp := &logical.Response{}
|
||||
resp.AddWarning("Tidy operation successfully started. Any information from the operation will be printed to Vault's server logs.")
|
||||
return logical.RespondWithStatusCode(resp, req, http.StatusAccepted)
|
||||
}
|
||||
|
||||
// pathTidySecretIDUpdate is used to delete the expired SecretID entries
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -91,8 +92,6 @@ func TestAppRole_TidyDanglingAccessors_RaceTest(t *testing.T) {
|
||||
var err error
|
||||
b, storage := createBackendWithStorage(t)
|
||||
|
||||
b.testTidyDelay = 300 * time.Millisecond
|
||||
|
||||
// Create a role
|
||||
createRole(t, b, storage, "role1", "a,b,c")
|
||||
|
||||
@@ -109,14 +108,9 @@ func TestAppRole_TidyDanglingAccessors_RaceTest(t *testing.T) {
|
||||
count := 1
|
||||
|
||||
wg := &sync.WaitGroup{}
|
||||
now := time.Now()
|
||||
started := false
|
||||
for {
|
||||
if time.Now().Sub(now) > 700*time.Millisecond {
|
||||
break
|
||||
}
|
||||
if time.Now().Sub(now) > 100*time.Millisecond && !started {
|
||||
started = true
|
||||
start := time.Now()
|
||||
for time.Now().Sub(start) < 10*time.Second {
|
||||
if time.Now().Sub(start) > 100*time.Millisecond && atomic.LoadUint32(b.tidySecretIDCASGuard) == 0 {
|
||||
_, err = b.tidySecretID(context.Background(), &logical.Request{
|
||||
Storage: storage,
|
||||
})
|
||||
@@ -137,23 +131,50 @@ func TestAppRole_TidyDanglingAccessors_RaceTest(t *testing.T) {
|
||||
t.Fatalf("err:%v resp:%#v", err, resp)
|
||||
}
|
||||
}()
|
||||
count++
|
||||
}
|
||||
|
||||
t.Logf("wrote %d entries", count)
|
||||
|
||||
wg.Wait()
|
||||
// Let tidy finish
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
// Run tidy again
|
||||
_, err = b.tidySecretID(context.Background(), &logical.Request{
|
||||
Storage: storage,
|
||||
})
|
||||
entry, err := logical.StorageEntryJSON(
|
||||
fmt.Sprintf("accessor/invalid%d", count),
|
||||
&secretIDAccessorStorageEntry{
|
||||
SecretIDHMAC: "samplesecretidhmac",
|
||||
},
|
||||
)
|
||||
err = storage.Put(context.Background(), entry)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
count++
|
||||
time.Sleep(100 * time.Microsecond)
|
||||
}
|
||||
|
||||
logger := b.Logger().Named(t.Name())
|
||||
logger.Info("wrote entries", "count", count)
|
||||
|
||||
wg.Wait()
|
||||
// Let tidy finish
|
||||
for atomic.LoadUint32(b.tidySecretIDCASGuard) != 0 {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
logger.Info("running tidy again")
|
||||
|
||||
// Run tidy again
|
||||
secret, err := b.tidySecretID(context.Background(), &logical.Request{
|
||||
Storage: storage,
|
||||
})
|
||||
if err != nil || len(secret.Warnings) > 0 {
|
||||
t.Fatal(err, secret.Warnings)
|
||||
}
|
||||
|
||||
// Wait for tidy to start
|
||||
for atomic.LoadUint32(b.tidySecretIDCASGuard) == 0 {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
// Let tidy finish
|
||||
for atomic.LoadUint32(b.tidySecretIDCASGuard) != 0 {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
accessorHashes, err := storage.List(context.Background(), "accessor/")
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user