mirror of
https://github.com/optim-enterprises-bv/vault.git
synced 2025-11-01 19:17:58 +00:00
add core state lock deadlock detection config option v2 (#18604)
* add core state lockd eadlock detection config option v2 * add changelog * split out NewTestCluster function to maintain build flag * replace long func with constant * remove line * rename file, and move where detect deadlock flag is set
This commit is contained in:
3
changelog/18604.txt
Normal file
3
changelog/18604.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
```release-note:improvement
|
||||||
|
core: add `detect_deadlocks` config to optionally detect core state deadlocks
|
||||||
|
```
|
||||||
@@ -2619,6 +2619,7 @@ func createCoreConfig(c *ServerCommand, config *server.Config, backend physical.
|
|||||||
CredentialBackends: c.CredentialBackends,
|
CredentialBackends: c.CredentialBackends,
|
||||||
LogicalBackends: c.LogicalBackends,
|
LogicalBackends: c.LogicalBackends,
|
||||||
Logger: c.logger,
|
Logger: c.logger,
|
||||||
|
DetectDeadlocks: config.DetectDeadlocks,
|
||||||
DisableSentinelTrace: config.DisableSentinelTrace,
|
DisableSentinelTrace: config.DisableSentinelTrace,
|
||||||
DisableCache: config.DisableCache,
|
DisableCache: config.DisableCache,
|
||||||
DisableMlock: config.DisableMlock,
|
DisableMlock: config.DisableMlock,
|
||||||
|
|||||||
@@ -97,6 +97,8 @@ type Config struct {
|
|||||||
LogRequestsLevel string `hcl:"-"`
|
LogRequestsLevel string `hcl:"-"`
|
||||||
LogRequestsLevelRaw interface{} `hcl:"log_requests_level"`
|
LogRequestsLevelRaw interface{} `hcl:"log_requests_level"`
|
||||||
|
|
||||||
|
DetectDeadlocks string `hcl:"detect_deadlocks"`
|
||||||
|
|
||||||
EnableResponseHeaderRaftNodeID bool `hcl:"-"`
|
EnableResponseHeaderRaftNodeID bool `hcl:"-"`
|
||||||
EnableResponseHeaderRaftNodeIDRaw interface{} `hcl:"enable_response_header_raft_node_id"`
|
EnableResponseHeaderRaftNodeIDRaw interface{} `hcl:"enable_response_header_raft_node_id"`
|
||||||
|
|
||||||
@@ -389,6 +391,11 @@ func (c *Config) Merge(c2 *Config) *Config {
|
|||||||
result.LogRequestsLevel = c2.LogRequestsLevel
|
result.LogRequestsLevel = c2.LogRequestsLevel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result.DetectDeadlocks = c.DetectDeadlocks
|
||||||
|
if c2.DetectDeadlocks != "" {
|
||||||
|
result.DetectDeadlocks = c2.DetectDeadlocks
|
||||||
|
}
|
||||||
|
|
||||||
result.EnableResponseHeaderRaftNodeID = c.EnableResponseHeaderRaftNodeID
|
result.EnableResponseHeaderRaftNodeID = c.EnableResponseHeaderRaftNodeID
|
||||||
if c2.EnableResponseHeaderRaftNodeID {
|
if c2.EnableResponseHeaderRaftNodeID {
|
||||||
result.EnableResponseHeaderRaftNodeID = c2.EnableResponseHeaderRaftNodeID
|
result.EnableResponseHeaderRaftNodeID = c2.EnableResponseHeaderRaftNodeID
|
||||||
@@ -1025,6 +1032,8 @@ func (c *Config) Sanitized() map[string]interface{} {
|
|||||||
"enable_response_header_raft_node_id": c.EnableResponseHeaderRaftNodeID,
|
"enable_response_header_raft_node_id": c.EnableResponseHeaderRaftNodeID,
|
||||||
|
|
||||||
"log_requests_level": c.LogRequestsLevel,
|
"log_requests_level": c.LogRequestsLevel,
|
||||||
|
|
||||||
|
"detect_deadlocks": c.DetectDeadlocks,
|
||||||
}
|
}
|
||||||
for k, v := range sharedResult {
|
for k, v := range sharedResult {
|
||||||
result[k] = v
|
result[k] = v
|
||||||
|
|||||||
@@ -745,6 +745,7 @@ func testConfig_Sanitized(t *testing.T) {
|
|||||||
"raw_storage_endpoint": true,
|
"raw_storage_endpoint": true,
|
||||||
"introspection_endpoint": false,
|
"introspection_endpoint": false,
|
||||||
"disable_sentinel_trace": true,
|
"disable_sentinel_trace": true,
|
||||||
|
"detect_deadlocks": "",
|
||||||
"enable_ui": true,
|
"enable_ui": true,
|
||||||
"enable_response_header_hostname": false,
|
"enable_response_header_hostname": false,
|
||||||
"enable_response_header_raft_node_id": false,
|
"enable_response_header_raft_node_id": false,
|
||||||
|
|||||||
@@ -1,21 +0,0 @@
|
|||||||
//go:build deadlock
|
|
||||||
|
|
||||||
package locking
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/sasha-s/go-deadlock"
|
|
||||||
)
|
|
||||||
|
|
||||||
// DeadlockMutex, when the build tag `deadlock` is present, behaves like a
|
|
||||||
// sync.Mutex but does periodic checking to see if outstanding locks and requests
|
|
||||||
// look like a deadlock. If it finds a deadlock candidate it will output it
|
|
||||||
// prefixed with "POTENTIAL DEADLOCK", as described at
|
|
||||||
// https://github.com/sasha-s/go-deadlock
|
|
||||||
type DeadlockMutex struct {
|
|
||||||
deadlock.Mutex
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeadlockRWMutex is the RW version of DeadlockMutex.
|
|
||||||
type DeadlockRWMutex struct {
|
|
||||||
deadlock.RWMutex
|
|
||||||
}
|
|
||||||
@@ -1,19 +1,46 @@
|
|||||||
//go:build !deadlock
|
|
||||||
|
|
||||||
package locking
|
package locking
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/sasha-s/go-deadlock"
|
||||||
)
|
)
|
||||||
|
|
||||||
// DeadlockMutex is just a sync.Mutex when the build tag `deadlock` is absent.
|
// Common mutex interface to allow either built-in or imported deadlock use
|
||||||
// See its other definition in the corresponding deadlock-build-tag-constrained
|
type Mutex interface {
|
||||||
// file for more details.
|
Lock()
|
||||||
|
Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Common r/w mutex interface to allow either built-in or imported deadlock use
|
||||||
|
type RWMutex interface {
|
||||||
|
Lock()
|
||||||
|
RLock()
|
||||||
|
RLocker() sync.Locker
|
||||||
|
RUnlock()
|
||||||
|
Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeadlockMutex (used when requested via config option `detact_deadlocks`),
|
||||||
|
// behaves like a sync.Mutex but does periodic checking to see if outstanding
|
||||||
|
// locks and requests look like a deadlock. If it finds a deadlock candidate it
|
||||||
|
// will output it prefixed with "POTENTIAL DEADLOCK", as described at
|
||||||
|
// https://github.com/sasha-s/go-deadlock
|
||||||
type DeadlockMutex struct {
|
type DeadlockMutex struct {
|
||||||
sync.Mutex
|
deadlock.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeadlockRWMutex is the RW version of DeadlockMutex.
|
// DeadlockRWMutex is the RW version of DeadlockMutex.
|
||||||
type DeadlockRWMutex struct {
|
type DeadlockRWMutex struct {
|
||||||
|
deadlock.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regular sync/mutex.
|
||||||
|
type SyncMutex struct {
|
||||||
|
sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeadlockRWMutex is the RW version of SyncMutex.
|
||||||
|
type SyncRWMutex struct {
|
||||||
sync.RWMutex
|
sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ func TestSysConfigState_Sanitized(t *testing.T) {
|
|||||||
"disable_printable_check": false,
|
"disable_printable_check": false,
|
||||||
"disable_sealwrap": false,
|
"disable_sealwrap": false,
|
||||||
"raw_storage_endpoint": false,
|
"raw_storage_endpoint": false,
|
||||||
|
"detect_deadlocks": "",
|
||||||
"introspection_endpoint": false,
|
"introspection_endpoint": false,
|
||||||
"disable_sentinel_trace": false,
|
"disable_sentinel_trace": false,
|
||||||
"enable_ui": false,
|
"enable_ui": false,
|
||||||
|
|||||||
@@ -316,7 +316,7 @@ type Core struct {
|
|||||||
auditBackends map[string]audit.Factory
|
auditBackends map[string]audit.Factory
|
||||||
|
|
||||||
// stateLock protects mutable state
|
// stateLock protects mutable state
|
||||||
stateLock locking.DeadlockRWMutex
|
stateLock locking.RWMutex
|
||||||
sealed *uint32
|
sealed *uint32
|
||||||
|
|
||||||
standby bool
|
standby bool
|
||||||
@@ -732,6 +732,9 @@ type CoreConfig struct {
|
|||||||
|
|
||||||
Logger log.Logger
|
Logger log.Logger
|
||||||
|
|
||||||
|
// Use the deadlocks library to detect deadlocks
|
||||||
|
DetectDeadlocks string
|
||||||
|
|
||||||
// Disables the trace display for Sentinel checks
|
// Disables the trace display for Sentinel checks
|
||||||
DisableSentinelTrace bool
|
DisableSentinelTrace bool
|
||||||
|
|
||||||
@@ -904,6 +907,14 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
|
|||||||
conf.NumExpirationWorkers = numExpirationWorkersDefault
|
conf.NumExpirationWorkers = numExpirationWorkersDefault
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use imported logging deadlock if requested
|
||||||
|
var stateLock locking.RWMutex
|
||||||
|
if strings.Contains(conf.DetectDeadlocks, "statelock") {
|
||||||
|
stateLock = &locking.DeadlockRWMutex{}
|
||||||
|
} else {
|
||||||
|
stateLock = &locking.SyncRWMutex{}
|
||||||
|
}
|
||||||
|
|
||||||
effectiveSDKVersion := conf.EffectiveSDKVersion
|
effectiveSDKVersion := conf.EffectiveSDKVersion
|
||||||
if effectiveSDKVersion == "" {
|
if effectiveSDKVersion == "" {
|
||||||
effectiveSDKVersion = version.GetVersion().Version
|
effectiveSDKVersion = version.GetVersion().Version
|
||||||
@@ -922,6 +933,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
|
|||||||
clusterListener: new(atomic.Value),
|
clusterListener: new(atomic.Value),
|
||||||
customListenerHeader: new(atomic.Value),
|
customListenerHeader: new(atomic.Value),
|
||||||
seal: conf.Seal,
|
seal: conf.Seal,
|
||||||
|
stateLock: stateLock,
|
||||||
router: NewRouter(),
|
router: NewRouter(),
|
||||||
sealed: new(uint32),
|
sealed: new(uint32),
|
||||||
sealMigrationDone: new(uint32),
|
sealMigrationDone: new(uint32),
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -21,6 +22,7 @@ import (
|
|||||||
"github.com/hashicorp/vault/sdk/physical"
|
"github.com/hashicorp/vault/sdk/physical"
|
||||||
"github.com/hashicorp/vault/sdk/physical/inmem"
|
"github.com/hashicorp/vault/sdk/physical/inmem"
|
||||||
"github.com/hashicorp/vault/version"
|
"github.com/hashicorp/vault/version"
|
||||||
|
"github.com/sasha-s/go-deadlock"
|
||||||
)
|
)
|
||||||
|
|
||||||
// invalidKey is used to test Unseal
|
// invalidKey is used to test Unseal
|
||||||
@@ -2836,3 +2838,51 @@ func TestCore_ServiceRegistration(t *testing.T) {
|
|||||||
t.Fatal(diff)
|
t.Fatal(diff)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDetectedDeadlock(t *testing.T) {
|
||||||
|
testCore, _, _ := TestCoreUnsealedWithConfig(t, &CoreConfig{DetectDeadlocks: "statelock"})
|
||||||
|
InduceDeadlock(t, testCore, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDefaultDeadlock(t *testing.T) {
|
||||||
|
testCore, _, _ := TestCoreUnsealed(t)
|
||||||
|
InduceDeadlock(t, testCore, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RestoreDeadlockOpts() func() {
|
||||||
|
opts := deadlock.Opts
|
||||||
|
return func() {
|
||||||
|
deadlock.Opts = opts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func InduceDeadlock(t *testing.T, vaultcore *Core, expected uint32) {
|
||||||
|
defer RestoreDeadlockOpts()()
|
||||||
|
var deadlocks uint32
|
||||||
|
deadlock.Opts.OnPotentialDeadlock = func() {
|
||||||
|
atomic.AddUint32(&deadlocks, 1)
|
||||||
|
}
|
||||||
|
var mtx deadlock.Mutex
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
vaultcore.expiration.coreStateLock.Lock()
|
||||||
|
mtx.Lock()
|
||||||
|
mtx.Unlock()
|
||||||
|
vaultcore.expiration.coreStateLock.Unlock()
|
||||||
|
}()
|
||||||
|
wg.Wait()
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
mtx.Lock()
|
||||||
|
vaultcore.expiration.coreStateLock.RLock()
|
||||||
|
vaultcore.expiration.coreStateLock.RUnlock()
|
||||||
|
mtx.Unlock()
|
||||||
|
}()
|
||||||
|
wg.Wait()
|
||||||
|
if atomic.LoadUint32(&deadlocks) != expected {
|
||||||
|
t.Fatalf("expected 1 deadlock, detected %d", deadlocks)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -139,7 +139,7 @@ type ExpirationManager struct {
|
|||||||
quitCh chan struct{}
|
quitCh chan struct{}
|
||||||
|
|
||||||
// do not hold coreStateLock in any API handler code - it is already held
|
// do not hold coreStateLock in any API handler code - it is already held
|
||||||
coreStateLock *locking.DeadlockRWMutex
|
coreStateLock locking.RWMutex
|
||||||
quitContext context.Context
|
quitContext context.Context
|
||||||
leaseCheckCounter *uint32
|
leaseCheckCounter *uint32
|
||||||
|
|
||||||
@@ -350,7 +350,7 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
|
|||||||
restoreLocks: locksutil.CreateLocks(),
|
restoreLocks: locksutil.CreateLocks(),
|
||||||
quitCh: make(chan struct{}),
|
quitCh: make(chan struct{}),
|
||||||
|
|
||||||
coreStateLock: &c.stateLock,
|
coreStateLock: c.stateLock,
|
||||||
quitContext: c.activeContext,
|
quitContext: c.activeContext,
|
||||||
leaseCheckCounter: new(uint32),
|
leaseCheckCounter: new(uint32),
|
||||||
|
|
||||||
|
|||||||
5
vault/test_cluster_detect_deadlock.go
Normal file
5
vault/test_cluster_detect_deadlock.go
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
//go:build deadlock
|
||||||
|
|
||||||
|
package vault
|
||||||
|
|
||||||
|
const TestDeadlockDetection = "statelock"
|
||||||
5
vault/test_cluster_do_not_detect_deadlock.go
Normal file
5
vault/test_cluster_do_not_detect_deadlock.go
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
//go:build !deadlock
|
||||||
|
|
||||||
|
package vault
|
||||||
|
|
||||||
|
const TestDeadlockDetection = ""
|
||||||
@@ -212,6 +212,7 @@ func TestCoreWithSealAndUINoCleanup(t testing.T, opts *CoreConfig) *Core {
|
|||||||
conf.EnableResponseHeaderHostname = opts.EnableResponseHeaderHostname
|
conf.EnableResponseHeaderHostname = opts.EnableResponseHeaderHostname
|
||||||
conf.DisableSSCTokens = opts.DisableSSCTokens
|
conf.DisableSSCTokens = opts.DisableSSCTokens
|
||||||
conf.PluginDirectory = opts.PluginDirectory
|
conf.PluginDirectory = opts.PluginDirectory
|
||||||
|
conf.DetectDeadlocks = opts.DetectDeadlocks
|
||||||
|
|
||||||
if opts.Logger != nil {
|
if opts.Logger != nil {
|
||||||
conf.Logger = opts.Logger
|
conf.Logger = opts.Logger
|
||||||
@@ -1382,6 +1383,7 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
|
|||||||
var baseAddr *net.TCPAddr
|
var baseAddr *net.TCPAddr
|
||||||
if opts != nil && opts.BaseListenAddress != "" {
|
if opts != nil && opts.BaseListenAddress != "" {
|
||||||
baseAddr, err = net.ResolveTCPAddr("tcp", opts.BaseListenAddress)
|
baseAddr, err = net.ResolveTCPAddr("tcp", opts.BaseListenAddress)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal("could not parse given base IP")
|
t.Fatal("could not parse given base IP")
|
||||||
}
|
}
|
||||||
@@ -1633,6 +1635,7 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
|
|||||||
}
|
}
|
||||||
|
|
||||||
if base != nil {
|
if base != nil {
|
||||||
|
coreConfig.DetectDeadlocks = TestDeadlockDetection
|
||||||
coreConfig.RawConfig = base.RawConfig
|
coreConfig.RawConfig = base.RawConfig
|
||||||
coreConfig.DisableCache = base.DisableCache
|
coreConfig.DisableCache = base.DisableCache
|
||||||
coreConfig.EnableUI = base.EnableUI
|
coreConfig.EnableUI = base.EnableUI
|
||||||
|
|||||||
@@ -149,6 +149,11 @@ to specify where the configuration is.
|
|||||||
maximum request duration allowed before Vault cancels the request. This can
|
maximum request duration allowed before Vault cancels the request. This can
|
||||||
be overridden per listener via the `max_request_duration` value.
|
be overridden per listener via the `max_request_duration` value.
|
||||||
|
|
||||||
|
- `detect_deadlocks` `(string: "")` - Specifies the internal mutex locks that should be monitored for
|
||||||
|
potential deadlocks. Currently supported value is `statelock`, which will cause "POTENTIAL DEADLOCK:"
|
||||||
|
to be logged when an attempt at a core state lock appears to be deadlocked. Enabling this can have
|
||||||
|
a negative effect on performance due to the tracking of each lock attempt.
|
||||||
|
|
||||||
- `raw_storage_endpoint` `(bool: false)` – Enables the `sys/raw` endpoint which
|
- `raw_storage_endpoint` `(bool: false)` – Enables the `sys/raw` endpoint which
|
||||||
allows the decryption/encryption of raw data into and out of the security
|
allows the decryption/encryption of raw data into and out of the security
|
||||||
barrier. This is a highly privileged endpoint.
|
barrier. This is a highly privileged endpoint.
|
||||||
|
|||||||
Reference in New Issue
Block a user