VAULT-24736 CE changes for static secret capability behaviour toggle (#26744)

This commit is contained in:
Violet Hynes
2024-05-03 14:12:19 -04:00
committed by GitHub
parent 2a99b3651f
commit f2b4ca4def
6 changed files with 149 additions and 9 deletions

View File

@@ -180,7 +180,7 @@ func (updater *StaticSecretCacheUpdater) streamStaticSecretEvents(ctx context.Co
return nil
}
// preEventStreamUpdate is called after successful connection to the event system but before
// preEventStreamUpdate is called after successful connection to the event system, but before
// we process any events, to ensure we don't miss any updates.
// In some cases, this will result in multiple processing of the same updates, but
// this ensures that we don't lose any updates to secrets that might have been sent

View File

@@ -19,6 +19,13 @@ import (
"golang.org/x/exp/maps"
)
type TokenCapabilityRefreshBehaviour int
const (
TokenCapabilityRefreshBehaviourOptimistic TokenCapabilityRefreshBehaviour = iota
TokenCapabilityRefreshBehaviourPessimistic
)
const (
// DefaultWorkers is the default number of workers for the worker pool.
DefaultWorkers = 5
@@ -37,15 +44,17 @@ type StaticSecretCapabilityManager struct {
logger hclog.Logger
workerPool *workerpool.WorkerPool
staticSecretTokenCapabilityRefreshInterval time.Duration
tokenCapabilityRefreshBehaviour TokenCapabilityRefreshBehaviour
}
// StaticSecretCapabilityManagerConfig is the configuration for initializing a new
// StaticSecretCapabilityManager.
type StaticSecretCapabilityManagerConfig struct {
LeaseCache *LeaseCache
Logger hclog.Logger
Client *api.Client
StaticSecretTokenCapabilityRefreshInterval time.Duration
LeaseCache *LeaseCache
Logger hclog.Logger
Client *api.Client
StaticSecretTokenCapabilityRefreshInterval time.Duration
StaticSecretTokenCapabilityRefreshBehaviour string
}
// NewStaticSecretCapabilityManager creates a new instance of a StaticSecretCapabilityManager.
@@ -70,6 +79,18 @@ func NewStaticSecretCapabilityManager(conf *StaticSecretCapabilityManagerConfig)
conf.StaticSecretTokenCapabilityRefreshInterval = DefaultStaticSecretTokenCapabilityRefreshInterval
}
behaviour := TokenCapabilityRefreshBehaviourOptimistic
if conf.StaticSecretTokenCapabilityRefreshBehaviour != "" {
switch conf.StaticSecretTokenCapabilityRefreshBehaviour {
case "optimistic":
behaviour = TokenCapabilityRefreshBehaviourOptimistic
case "pessimistic":
behaviour = TokenCapabilityRefreshBehaviourPessimistic
default:
return nil, fmt.Errorf("TokenCapabilityRefreshBehaviour must be either \"optimistic\" or \"pessimistic\"")
}
}
workerPool := workerpool.New(DefaultWorkers)
return &StaticSecretCapabilityManager{
@@ -78,6 +99,7 @@ func NewStaticSecretCapabilityManager(conf *StaticSecretCapabilityManagerConfig)
logger: conf.Logger,
workerPool: workerPool,
staticSecretTokenCapabilityRefreshInterval: conf.StaticSecretTokenCapabilityRefreshInterval,
tokenCapabilityRefreshBehaviour: behaviour,
}, nil
}
@@ -136,9 +158,15 @@ func (sscm *StaticSecretCapabilityManager) StartRenewingCapabilities(indexToRene
capabilities, err := getCapabilities(indexReadablePaths, client)
if err != nil {
sscm.logger.Error("error when attempting to retrieve updated token capabilities", "indexToRenew.ID", indexToRenew.ID, "err", err)
sscm.submitWorkToPoolAfterInterval(work)
return
sscm.logger.Warn("error when attempting to retrieve updated token capabilities", "indexToRenew.ID", indexToRenew.ID, "err", err)
if sscm.tokenCapabilityRefreshBehaviour == TokenCapabilityRefreshBehaviourPessimistic {
// Vault is be sealed or unreachable. If pessimistic, assume we might have
// lost access. Set capabilities to an empty set, so they are all removed.
capabilities = make(map[string][]string)
} else {
sscm.submitWorkToPoolAfterInterval(work)
return
}
}
newReadablePaths := reconcileCapabilities(indexReadablePaths, capabilities)
@@ -188,6 +216,7 @@ func (sscm *StaticSecretCapabilityManager) StartRenewingCapabilities(indexToRene
sscm.submitWorkToPoolAfterInterval(work)
return
}
sscm.logger.Debug("successfully evicted capabilities index from cache", "index.ID", indexToRenew.ID)
// If we successfully evicted the index, no need to re-submit the work to the pool.
return
}

View File

@@ -345,6 +345,88 @@ func TestSubmitWorkUpdatesIndexWithBadToken(t *testing.T) {
sscm.workerPool.Stop()
}
// TestSubmitWorkSealedVaultOptimistic tests that the capability manager
// behaves as expected when
// sscm.tokenCapabilityRefreshBehaviour == TokenCapabilityRefreshBehaviourOptimistic
func TestSubmitWorkSealedVaultOptimistic(t *testing.T) {
t.Parallel()
cluster := minimal.NewTestSoloCluster(t, nil)
client := cluster.Cores[0].Client
token := "not real token"
indexId := hashStaticSecretIndex(token)
sscm := testNewStaticSecretCapabilityManager(t, client)
index := &cachememdb.CapabilitiesIndex{
ID: indexId,
Token: token,
ReadablePaths: map[string]struct{}{
"foo/bar": {},
"auth/token/lookup-self": {},
},
}
err := sscm.leaseCache.db.SetCapabilitiesIndex(index)
require.Nil(t, err)
// Seal the cluster
cluster.EnsureCoresSealed(t)
sscm.StartRenewingCapabilities(index)
// Wait for the job to complete at least once...
time.Sleep(3 * time.Second)
// This entry should not be evicted.
newIndex, err := sscm.leaseCache.db.GetCapabilitiesIndex(cachememdb.IndexNameID, indexId)
require.NoError(t, err)
require.NotNil(t, newIndex)
// Forcefully stop any remaining workers
sscm.workerPool.Stop()
}
// TestSubmitWorkSealedVaultPessimistic tests that the capability manager
// behaves as expected when
// sscm.tokenCapabilityRefreshBehaviour == TokenCapabilityRefreshBehaviourPessimistic
func TestSubmitWorkSealedVaultPessimistic(t *testing.T) {
t.Parallel()
cluster := minimal.NewTestSoloCluster(t, nil)
client := cluster.Cores[0].Client
token := "not real token"
indexId := hashStaticSecretIndex(token)
sscm := testNewStaticSecretCapabilityManager(t, client)
sscm.tokenCapabilityRefreshBehaviour = TokenCapabilityRefreshBehaviourPessimistic
index := &cachememdb.CapabilitiesIndex{
ID: indexId,
Token: token,
ReadablePaths: map[string]struct{}{
"foo/bar": {},
"auth/token/lookup-self": {},
},
}
err := sscm.leaseCache.db.SetCapabilitiesIndex(index)
require.Nil(t, err)
// Seal the cluster
cluster.EnsureCoresSealed(t)
sscm.StartRenewingCapabilities(index)
// Wait for the job to complete at least once...
time.Sleep(3 * time.Second)
// This entry should be evicted.
newIndex, err := sscm.leaseCache.db.GetCapabilitiesIndex(cachememdb.IndexNameID, indexId)
require.Error(t, err)
require.Nil(t, newIndex)
// Forcefully stop any remaining workers
sscm.workerPool.Stop()
}
// TestSubmitWorkUpdatesAllIndexes tests that an index will be correctly updated if the capabilities differ, as
// well as the indexes related to the paths that are being checked for.
func TestSubmitWorkUpdatesAllIndexes(t *testing.T) {

View File

@@ -520,7 +520,8 @@ func (c *ProxyCommand) Run(args []string) int {
LeaseCache: leaseCache,
Logger: c.logger.Named("cache.staticsecretcapabilitymanager"),
Client: client,
StaticSecretTokenCapabilityRefreshInterval: config.Cache.StaticSecretTokenCapabilityRefreshInterval,
StaticSecretTokenCapabilityRefreshInterval: config.Cache.StaticSecretTokenCapabilityRefreshInterval,
StaticSecretTokenCapabilityRefreshBehaviour: config.Cache.StaticSecretTokenCapabilityRefreshBehaviour,
})
if err != nil {
c.UI.Error(fmt.Sprintf("Error creating static secret capability manager: %v", err))

View File

@@ -109,6 +109,7 @@ type Cache struct {
DisableCachingDynamicSecrets bool `hcl:"disable_caching_dynamic_secrets"`
StaticSecretTokenCapabilityRefreshIntervalRaw interface{} `hcl:"static_secret_token_capability_refresh_interval"`
StaticSecretTokenCapabilityRefreshInterval time.Duration `hcl:"-"`
StaticSecretTokenCapabilityRefreshBehaviour string `hcl:"static_secret_token_capability_refresh_behavior"`
}
// AutoAuth is the configured authentication method and sinks
@@ -271,6 +272,15 @@ func (c *Config) ValidateConfig() error {
return fmt.Errorf("no auto_auth, cache, or listener block found in config")
}
if c.Cache != nil && c.Cache.StaticSecretTokenCapabilityRefreshBehaviour != "" {
switch c.Cache.StaticSecretTokenCapabilityRefreshBehaviour {
case "pessimistic":
case "optimistic":
default:
return fmt.Errorf("cache.static_secret_token_capability_refresh_behavior must be either \"optimistic\" or \"pessimistic\"")
}
}
return nil
}

View File

@@ -104,6 +104,14 @@ token can no longer access the relevant paths from the cache. The refresh interv
is essentially the maximum period after which permission to read a KV secret is
fully revoked for the relevant token.
If the capabilities have been removed, or Proxy receives a `403` response, the
capability is removed from the token, and that token cannot be used to access the
cache. For other kinds of errors, such as Vault being unreachable or sealed,
the `static_secret_token_capability_refresh_behavior` config is consulted.
If set to `optimistic` (the default), the capability will not be removed unless we
receive a `403` or valid response without the capability. If set to `pessimistic`,
the capability will be removed for any error, such as would occur if Vault is sealed.
For token refresh to work, any token that will access the cache also needs
`update` permission for [`sys/capabilies-self`](/vault/api-docs/system/capabilities-self).
Having `update` permission for the token lets Proxy test capabilities for the
@@ -149,6 +157,16 @@ secrets. The refresh interval is the maximum period after which permission to
read a cached KV secret is fully revoked. Ignored when `cache_static_secrets`
is `false`.
- `static_secret_token_capability_refresh_behavior` `(string: "optimistic", optional)` -
Sets the capability refresh behavior in the case of an error when attempting to
refresh capabilities. In the case of a `403`, capabilities will be removed for the token
with either option. In case of other errors, such as Vault being sealed or Vault being
unavailable, this setting controls the behavior. If set to `optimistic` (the default),
capabilities will be removed for only `403` errors. If set to `pessimistic`, capabilities
will be removed for any error. This essentially allows configuring a preference between
favoring availability (`optimistic`) or access fidelity (`pessimistic`) of cached
static secrets. Ignored when `cache_static_secrets` is `false`.
### Example configuration
The following example Vault Proxy configuration: