Fix unlocked mounts read (#29091)

This PR fixes copy-paste error in the product usage code where we were
taking out the authLock to access the mount table.

While we're add it we can remove the existing lock grabbing in the
product usage goroutine in favor of a serialized startup/teardown of
censusManager and its core dependency which requires the lock. This
requires some minor test edits, so created a test helper for that.

By moving the censusManager teardown before expirationManager teardown,
we can effectively ensure the goroutine is completely stopped outside of
any expirationManager change.

We are already guaranteed serial startup, so this should free us of any
complex lock semantics.
This commit is contained in:
Mike Palmiotto
2024-12-04 13:23:55 -05:00
committed by GitHub
parent 73bf3ebc7c
commit a67e062254
6 changed files with 42 additions and 42 deletions

3
changelog/29091.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
core/metrics: Fix unlocked mounts read for usage reporting.
```

View File

@@ -2898,14 +2898,17 @@ func (c *Core) preSeal() error {
if err := c.teardownAudits(); err != nil {
result = multierror.Append(result, fmt.Errorf("error tearing down audits: %w", err))
}
if err := c.stopExpiration(); err != nil {
result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err))
}
// Ensure that the ActivityLog and CensusManager are both completely torn
// down before stopping the ExpirationManager. This ordering is critical,
// due to a tight coupling between the ActivityLog, CensusManager, and
// ExpirationManager for product usage reporting.
c.stopActivityLog()
// Clean up census on seal
if err := c.teardownCensusManager(); err != nil {
result = multierror.Append(result, fmt.Errorf("error tearing down reporting agent: %w", err))
}
if err := c.stopExpiration(); err != nil {
result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err))
}
if err := c.teardownCredentials(context.Background()); err != nil {
result = multierror.Append(result, fmt.Errorf("error tearing down credentials: %w", err))
}

View File

@@ -540,22 +540,15 @@ func getMeanNamespaceSecrets(mapOfNamespacesToSecrets map[string]int) int {
func (c *Core) GetSecretEngineUsageMetrics() map[string]int {
mounts := make(map[string]int)
c.authLock.RLock()
defer c.authLock.RUnlock()
// we don't grab the statelock, so this code might run during or after the seal process.
// Therefore, we need to check if c.auth is nil. If we do not, this will panic when
// run after seal.
if c.auth == nil {
return mounts
}
c.mountsLock.RLock()
defer c.mountsLock.RUnlock()
for _, entry := range c.mounts.Entries {
authType := entry.Type
if _, ok := mounts[authType]; !ok {
mounts[authType] = 1
mountType := entry.Type
if _, ok := mounts[mountType]; !ok {
mounts[mountType] = 1
} else {
mounts[authType] += 1
mounts[mountType] += 1
}
}
return mounts
@@ -568,13 +561,6 @@ func (c *Core) GetAuthMethodUsageMetrics() map[string]int {
c.authLock.RLock()
defer c.authLock.RUnlock()
// we don't grab the statelock, so this code might run during or after the seal process.
// Therefore, we need to check if c.auth is nil. If we do not, this will panic when
// run after seal.
if c.auth == nil {
return mounts
}
for _, entry := range c.auth.Entries {
authType := entry.Type
if _, ok := mounts[authType]; !ok {

View File

@@ -435,8 +435,11 @@ func (c *Core) setupExpiration(e ExpireLeaseStrategy) error {
return nil
}
// stopExpiration is used to stop the expiration manager before
// sealing the Vault.
// stopExpiration is used to stop the expiration manager before sealing Vault.
// This *must* be called after shutting down the ActivityLog and
// CensusManager to prevent Core's expirationManager reference from
// changing while being accessed by product usage reporting. This is
// an unfortunate side-effect of tight coupling between ActivityLog and Core.
func (c *Core) stopExpiration() error {
if c.expiration != nil {
if err := c.expiration.Stop(); err != nil {

View File

@@ -855,10 +855,7 @@ func TestExpiration_Restore(t *testing.T) {
}
// Stop everything
err = c.stopExpiration()
if err != nil {
t.Fatalf("err: %v", err)
}
stopExpiration(t, c)
if exp.leaseCount != 0 {
t.Fatalf("expected %v leases, got %v", 0, exp.leaseCount)
@@ -3008,6 +3005,23 @@ func registerOneLease(t *testing.T, ctx context.Context, exp *ExpirationManager)
return leaseID
}
// stopExpiration is a test helper which allows us to safely teardown the
// expiration manager. This preserves the shutdown order of Core for these few
// outlier tests that (previously) directly called [Core].stopExpiration().
func stopExpiration(t *testing.T, core *Core) {
t.Helper()
core.stopActivityLog()
err := core.teardownCensusManager()
if err != nil {
t.Fatalf("error stopping census manager: %v", err)
}
err = core.stopExpiration()
if err != nil {
t.Fatalf("error stopping expiration manager: %v", err)
}
}
func TestExpiration_MarkIrrevocable(t *testing.T) {
c, _, _ := TestCoreUnsealed(t)
exp := c.expiration
@@ -3060,10 +3074,7 @@ func TestExpiration_MarkIrrevocable(t *testing.T) {
}
// stop and restore to verify that irrevocable leases are properly loaded from storage
err = c.stopExpiration()
if err != nil {
t.Fatalf("error stopping expiration manager: %v", err)
}
stopExpiration(t, c)
err = exp.Restore(nil)
if err != nil {
@@ -3153,10 +3164,7 @@ func TestExpiration_StopClearsIrrevocableCache(t *testing.T) {
exp.markLeaseIrrevocable(ctx, le, fmt.Errorf("test irrevocable error"))
exp.pendingLock.Unlock()
err = c.stopExpiration()
if err != nil {
t.Fatalf("error stopping expiration manager: %v", err)
}
stopExpiration(t, c)
if _, ok := exp.irrevocable.Load(leaseID); ok {
t.Error("expiration manager irrevocable cache should be cleared on stop")

View File

@@ -1170,10 +1170,7 @@ func TestTokenStore_CreateLookup_ExpirationInRestoreMode(t *testing.T) {
t.Fatalf("err: %v", err)
}
err = c.stopExpiration()
if err != nil {
t.Fatal(err)
}
stopExpiration(t, c)
// Reset expiration manager to restore mode
ts.expiration.restoreModeLock.Lock()