mirror of
https://github.com/optim-enterprises-bv/vault.git
synced 2025-10-29 17:52:32 +00:00
VAULT-24736 CE changes for static secret capability behaviour toggle (#26744)
This commit is contained in:
@@ -180,7 +180,7 @@ func (updater *StaticSecretCacheUpdater) streamStaticSecretEvents(ctx context.Co
|
||||
return nil
|
||||
}
|
||||
|
||||
// preEventStreamUpdate is called after successful connection to the event system but before
|
||||
// preEventStreamUpdate is called after successful connection to the event system, but before
|
||||
// we process any events, to ensure we don't miss any updates.
|
||||
// In some cases, this will result in multiple processing of the same updates, but
|
||||
// this ensures that we don't lose any updates to secrets that might have been sent
|
||||
|
||||
@@ -19,6 +19,13 @@ import (
|
||||
"golang.org/x/exp/maps"
|
||||
)
|
||||
|
||||
type TokenCapabilityRefreshBehaviour int
|
||||
|
||||
const (
|
||||
TokenCapabilityRefreshBehaviourOptimistic TokenCapabilityRefreshBehaviour = iota
|
||||
TokenCapabilityRefreshBehaviourPessimistic
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultWorkers is the default number of workers for the worker pool.
|
||||
DefaultWorkers = 5
|
||||
@@ -37,15 +44,17 @@ type StaticSecretCapabilityManager struct {
|
||||
logger hclog.Logger
|
||||
workerPool *workerpool.WorkerPool
|
||||
staticSecretTokenCapabilityRefreshInterval time.Duration
|
||||
tokenCapabilityRefreshBehaviour TokenCapabilityRefreshBehaviour
|
||||
}
|
||||
|
||||
// StaticSecretCapabilityManagerConfig is the configuration for initializing a new
|
||||
// StaticSecretCapabilityManager.
|
||||
type StaticSecretCapabilityManagerConfig struct {
|
||||
LeaseCache *LeaseCache
|
||||
Logger hclog.Logger
|
||||
Client *api.Client
|
||||
StaticSecretTokenCapabilityRefreshInterval time.Duration
|
||||
LeaseCache *LeaseCache
|
||||
Logger hclog.Logger
|
||||
Client *api.Client
|
||||
StaticSecretTokenCapabilityRefreshInterval time.Duration
|
||||
StaticSecretTokenCapabilityRefreshBehaviour string
|
||||
}
|
||||
|
||||
// NewStaticSecretCapabilityManager creates a new instance of a StaticSecretCapabilityManager.
|
||||
@@ -70,6 +79,18 @@ func NewStaticSecretCapabilityManager(conf *StaticSecretCapabilityManagerConfig)
|
||||
conf.StaticSecretTokenCapabilityRefreshInterval = DefaultStaticSecretTokenCapabilityRefreshInterval
|
||||
}
|
||||
|
||||
behaviour := TokenCapabilityRefreshBehaviourOptimistic
|
||||
if conf.StaticSecretTokenCapabilityRefreshBehaviour != "" {
|
||||
switch conf.StaticSecretTokenCapabilityRefreshBehaviour {
|
||||
case "optimistic":
|
||||
behaviour = TokenCapabilityRefreshBehaviourOptimistic
|
||||
case "pessimistic":
|
||||
behaviour = TokenCapabilityRefreshBehaviourPessimistic
|
||||
default:
|
||||
return nil, fmt.Errorf("TokenCapabilityRefreshBehaviour must be either \"optimistic\" or \"pessimistic\"")
|
||||
}
|
||||
}
|
||||
|
||||
workerPool := workerpool.New(DefaultWorkers)
|
||||
|
||||
return &StaticSecretCapabilityManager{
|
||||
@@ -78,6 +99,7 @@ func NewStaticSecretCapabilityManager(conf *StaticSecretCapabilityManagerConfig)
|
||||
logger: conf.Logger,
|
||||
workerPool: workerPool,
|
||||
staticSecretTokenCapabilityRefreshInterval: conf.StaticSecretTokenCapabilityRefreshInterval,
|
||||
tokenCapabilityRefreshBehaviour: behaviour,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -136,9 +158,15 @@ func (sscm *StaticSecretCapabilityManager) StartRenewingCapabilities(indexToRene
|
||||
|
||||
capabilities, err := getCapabilities(indexReadablePaths, client)
|
||||
if err != nil {
|
||||
sscm.logger.Error("error when attempting to retrieve updated token capabilities", "indexToRenew.ID", indexToRenew.ID, "err", err)
|
||||
sscm.submitWorkToPoolAfterInterval(work)
|
||||
return
|
||||
sscm.logger.Warn("error when attempting to retrieve updated token capabilities", "indexToRenew.ID", indexToRenew.ID, "err", err)
|
||||
if sscm.tokenCapabilityRefreshBehaviour == TokenCapabilityRefreshBehaviourPessimistic {
|
||||
// Vault is be sealed or unreachable. If pessimistic, assume we might have
|
||||
// lost access. Set capabilities to an empty set, so they are all removed.
|
||||
capabilities = make(map[string][]string)
|
||||
} else {
|
||||
sscm.submitWorkToPoolAfterInterval(work)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
newReadablePaths := reconcileCapabilities(indexReadablePaths, capabilities)
|
||||
@@ -188,6 +216,7 @@ func (sscm *StaticSecretCapabilityManager) StartRenewingCapabilities(indexToRene
|
||||
sscm.submitWorkToPoolAfterInterval(work)
|
||||
return
|
||||
}
|
||||
sscm.logger.Debug("successfully evicted capabilities index from cache", "index.ID", indexToRenew.ID)
|
||||
// If we successfully evicted the index, no need to re-submit the work to the pool.
|
||||
return
|
||||
}
|
||||
|
||||
@@ -345,6 +345,88 @@ func TestSubmitWorkUpdatesIndexWithBadToken(t *testing.T) {
|
||||
sscm.workerPool.Stop()
|
||||
}
|
||||
|
||||
// TestSubmitWorkSealedVaultOptimistic tests that the capability manager
|
||||
// behaves as expected when
|
||||
// sscm.tokenCapabilityRefreshBehaviour == TokenCapabilityRefreshBehaviourOptimistic
|
||||
func TestSubmitWorkSealedVaultOptimistic(t *testing.T) {
|
||||
t.Parallel()
|
||||
cluster := minimal.NewTestSoloCluster(t, nil)
|
||||
client := cluster.Cores[0].Client
|
||||
|
||||
token := "not real token"
|
||||
indexId := hashStaticSecretIndex(token)
|
||||
|
||||
sscm := testNewStaticSecretCapabilityManager(t, client)
|
||||
index := &cachememdb.CapabilitiesIndex{
|
||||
ID: indexId,
|
||||
Token: token,
|
||||
ReadablePaths: map[string]struct{}{
|
||||
"foo/bar": {},
|
||||
"auth/token/lookup-self": {},
|
||||
},
|
||||
}
|
||||
err := sscm.leaseCache.db.SetCapabilitiesIndex(index)
|
||||
require.Nil(t, err)
|
||||
|
||||
// Seal the cluster
|
||||
cluster.EnsureCoresSealed(t)
|
||||
|
||||
sscm.StartRenewingCapabilities(index)
|
||||
|
||||
// Wait for the job to complete at least once...
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
// This entry should not be evicted.
|
||||
newIndex, err := sscm.leaseCache.db.GetCapabilitiesIndex(cachememdb.IndexNameID, indexId)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, newIndex)
|
||||
|
||||
// Forcefully stop any remaining workers
|
||||
sscm.workerPool.Stop()
|
||||
}
|
||||
|
||||
// TestSubmitWorkSealedVaultPessimistic tests that the capability manager
|
||||
// behaves as expected when
|
||||
// sscm.tokenCapabilityRefreshBehaviour == TokenCapabilityRefreshBehaviourPessimistic
|
||||
func TestSubmitWorkSealedVaultPessimistic(t *testing.T) {
|
||||
t.Parallel()
|
||||
cluster := minimal.NewTestSoloCluster(t, nil)
|
||||
client := cluster.Cores[0].Client
|
||||
|
||||
token := "not real token"
|
||||
indexId := hashStaticSecretIndex(token)
|
||||
|
||||
sscm := testNewStaticSecretCapabilityManager(t, client)
|
||||
sscm.tokenCapabilityRefreshBehaviour = TokenCapabilityRefreshBehaviourPessimistic
|
||||
|
||||
index := &cachememdb.CapabilitiesIndex{
|
||||
ID: indexId,
|
||||
Token: token,
|
||||
ReadablePaths: map[string]struct{}{
|
||||
"foo/bar": {},
|
||||
"auth/token/lookup-self": {},
|
||||
},
|
||||
}
|
||||
err := sscm.leaseCache.db.SetCapabilitiesIndex(index)
|
||||
require.Nil(t, err)
|
||||
|
||||
// Seal the cluster
|
||||
cluster.EnsureCoresSealed(t)
|
||||
|
||||
sscm.StartRenewingCapabilities(index)
|
||||
|
||||
// Wait for the job to complete at least once...
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
// This entry should be evicted.
|
||||
newIndex, err := sscm.leaseCache.db.GetCapabilitiesIndex(cachememdb.IndexNameID, indexId)
|
||||
require.Error(t, err)
|
||||
require.Nil(t, newIndex)
|
||||
|
||||
// Forcefully stop any remaining workers
|
||||
sscm.workerPool.Stop()
|
||||
}
|
||||
|
||||
// TestSubmitWorkUpdatesAllIndexes tests that an index will be correctly updated if the capabilities differ, as
|
||||
// well as the indexes related to the paths that are being checked for.
|
||||
func TestSubmitWorkUpdatesAllIndexes(t *testing.T) {
|
||||
|
||||
@@ -520,7 +520,8 @@ func (c *ProxyCommand) Run(args []string) int {
|
||||
LeaseCache: leaseCache,
|
||||
Logger: c.logger.Named("cache.staticsecretcapabilitymanager"),
|
||||
Client: client,
|
||||
StaticSecretTokenCapabilityRefreshInterval: config.Cache.StaticSecretTokenCapabilityRefreshInterval,
|
||||
StaticSecretTokenCapabilityRefreshInterval: config.Cache.StaticSecretTokenCapabilityRefreshInterval,
|
||||
StaticSecretTokenCapabilityRefreshBehaviour: config.Cache.StaticSecretTokenCapabilityRefreshBehaviour,
|
||||
})
|
||||
if err != nil {
|
||||
c.UI.Error(fmt.Sprintf("Error creating static secret capability manager: %v", err))
|
||||
|
||||
@@ -109,6 +109,7 @@ type Cache struct {
|
||||
DisableCachingDynamicSecrets bool `hcl:"disable_caching_dynamic_secrets"`
|
||||
StaticSecretTokenCapabilityRefreshIntervalRaw interface{} `hcl:"static_secret_token_capability_refresh_interval"`
|
||||
StaticSecretTokenCapabilityRefreshInterval time.Duration `hcl:"-"`
|
||||
StaticSecretTokenCapabilityRefreshBehaviour string `hcl:"static_secret_token_capability_refresh_behavior"`
|
||||
}
|
||||
|
||||
// AutoAuth is the configured authentication method and sinks
|
||||
@@ -271,6 +272,15 @@ func (c *Config) ValidateConfig() error {
|
||||
return fmt.Errorf("no auto_auth, cache, or listener block found in config")
|
||||
}
|
||||
|
||||
if c.Cache != nil && c.Cache.StaticSecretTokenCapabilityRefreshBehaviour != "" {
|
||||
switch c.Cache.StaticSecretTokenCapabilityRefreshBehaviour {
|
||||
case "pessimistic":
|
||||
case "optimistic":
|
||||
default:
|
||||
return fmt.Errorf("cache.static_secret_token_capability_refresh_behavior must be either \"optimistic\" or \"pessimistic\"")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -104,6 +104,14 @@ token can no longer access the relevant paths from the cache. The refresh interv
|
||||
is essentially the maximum period after which permission to read a KV secret is
|
||||
fully revoked for the relevant token.
|
||||
|
||||
If the capabilities have been removed, or Proxy receives a `403` response, the
|
||||
capability is removed from the token, and that token cannot be used to access the
|
||||
cache. For other kinds of errors, such as Vault being unreachable or sealed,
|
||||
the `static_secret_token_capability_refresh_behavior` config is consulted.
|
||||
If set to `optimistic` (the default), the capability will not be removed unless we
|
||||
receive a `403` or valid response without the capability. If set to `pessimistic`,
|
||||
the capability will be removed for any error, such as would occur if Vault is sealed.
|
||||
|
||||
For token refresh to work, any token that will access the cache also needs
|
||||
`update` permission for [`sys/capabilies-self`](/vault/api-docs/system/capabilities-self).
|
||||
Having `update` permission for the token lets Proxy test capabilities for the
|
||||
@@ -149,6 +157,16 @@ secrets. The refresh interval is the maximum period after which permission to
|
||||
read a cached KV secret is fully revoked. Ignored when `cache_static_secrets`
|
||||
is `false`.
|
||||
|
||||
- `static_secret_token_capability_refresh_behavior` `(string: "optimistic", optional)` -
|
||||
Sets the capability refresh behavior in the case of an error when attempting to
|
||||
refresh capabilities. In the case of a `403`, capabilities will be removed for the token
|
||||
with either option. In case of other errors, such as Vault being sealed or Vault being
|
||||
unavailable, this setting controls the behavior. If set to `optimistic` (the default),
|
||||
capabilities will be removed for only `403` errors. If set to `pessimistic`, capabilities
|
||||
will be removed for any error. This essentially allows configuring a preference between
|
||||
favoring availability (`optimistic`) or access fidelity (`pessimistic`) of cached
|
||||
static secrets. Ignored when `cache_static_secrets` is `false`.
|
||||
|
||||
### Example configuration
|
||||
|
||||
The following example Vault Proxy configuration:
|
||||
|
||||
Reference in New Issue
Block a user