mirror of
				https://github.com/optim-enterprises-bv/vault.git
				synced 2025-10-30 18:17:55 +00:00 
			
		
		
		
	VAULT-24736 CE changes for static secret capability behaviour toggle (#26744)
This commit is contained in:
		| @@ -180,7 +180,7 @@ func (updater *StaticSecretCacheUpdater) streamStaticSecretEvents(ctx context.Co | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // preEventStreamUpdate is called after successful connection to the event system but before | ||||
| // preEventStreamUpdate is called after successful connection to the event system, but before | ||||
| // we process any events, to ensure we don't miss any updates. | ||||
| // In some cases, this will result in multiple processing of the same updates, but | ||||
| // this ensures that we don't lose any updates to secrets that might have been sent | ||||
|   | ||||
| @@ -19,6 +19,13 @@ import ( | ||||
| 	"golang.org/x/exp/maps" | ||||
| ) | ||||
|  | ||||
| type TokenCapabilityRefreshBehaviour int | ||||
|  | ||||
| const ( | ||||
| 	TokenCapabilityRefreshBehaviourOptimistic TokenCapabilityRefreshBehaviour = iota | ||||
| 	TokenCapabilityRefreshBehaviourPessimistic | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	// DefaultWorkers is the default number of workers for the worker pool. | ||||
| 	DefaultWorkers = 5 | ||||
| @@ -37,6 +44,7 @@ type StaticSecretCapabilityManager struct { | ||||
| 	logger                                     hclog.Logger | ||||
| 	workerPool                                 *workerpool.WorkerPool | ||||
| 	staticSecretTokenCapabilityRefreshInterval time.Duration | ||||
| 	tokenCapabilityRefreshBehaviour            TokenCapabilityRefreshBehaviour | ||||
| } | ||||
|  | ||||
| // StaticSecretCapabilityManagerConfig is the configuration for initializing a new | ||||
| @@ -46,6 +54,7 @@ type StaticSecretCapabilityManagerConfig struct { | ||||
| 	Logger                                      hclog.Logger | ||||
| 	Client                                      *api.Client | ||||
| 	StaticSecretTokenCapabilityRefreshInterval  time.Duration | ||||
| 	StaticSecretTokenCapabilityRefreshBehaviour string | ||||
| } | ||||
|  | ||||
| // NewStaticSecretCapabilityManager creates a new instance of a StaticSecretCapabilityManager. | ||||
| @@ -70,6 +79,18 @@ func NewStaticSecretCapabilityManager(conf *StaticSecretCapabilityManagerConfig) | ||||
| 		conf.StaticSecretTokenCapabilityRefreshInterval = DefaultStaticSecretTokenCapabilityRefreshInterval | ||||
| 	} | ||||
|  | ||||
| 	behaviour := TokenCapabilityRefreshBehaviourOptimistic | ||||
| 	if conf.StaticSecretTokenCapabilityRefreshBehaviour != "" { | ||||
| 		switch conf.StaticSecretTokenCapabilityRefreshBehaviour { | ||||
| 		case "optimistic": | ||||
| 			behaviour = TokenCapabilityRefreshBehaviourOptimistic | ||||
| 		case "pessimistic": | ||||
| 			behaviour = TokenCapabilityRefreshBehaviourPessimistic | ||||
| 		default: | ||||
| 			return nil, fmt.Errorf("TokenCapabilityRefreshBehaviour must be either \"optimistic\" or \"pessimistic\"") | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	workerPool := workerpool.New(DefaultWorkers) | ||||
|  | ||||
| 	return &StaticSecretCapabilityManager{ | ||||
| @@ -78,6 +99,7 @@ func NewStaticSecretCapabilityManager(conf *StaticSecretCapabilityManagerConfig) | ||||
| 		logger:     conf.Logger, | ||||
| 		workerPool: workerPool, | ||||
| 		staticSecretTokenCapabilityRefreshInterval: conf.StaticSecretTokenCapabilityRefreshInterval, | ||||
| 		tokenCapabilityRefreshBehaviour:            behaviour, | ||||
| 	}, nil | ||||
| } | ||||
|  | ||||
| @@ -136,10 +158,16 @@ func (sscm *StaticSecretCapabilityManager) StartRenewingCapabilities(indexToRene | ||||
|  | ||||
| 		capabilities, err := getCapabilities(indexReadablePaths, client) | ||||
| 		if err != nil { | ||||
| 			sscm.logger.Error("error when attempting to retrieve updated token capabilities", "indexToRenew.ID", indexToRenew.ID, "err", err) | ||||
| 			sscm.logger.Warn("error when attempting to retrieve updated token capabilities", "indexToRenew.ID", indexToRenew.ID, "err", err) | ||||
| 			if sscm.tokenCapabilityRefreshBehaviour == TokenCapabilityRefreshBehaviourPessimistic { | ||||
| 				// Vault is be sealed or unreachable. If pessimistic, assume we might have | ||||
| 				// lost access. Set capabilities to an empty set, so they are all removed. | ||||
| 				capabilities = make(map[string][]string) | ||||
| 			} else { | ||||
| 				sscm.submitWorkToPoolAfterInterval(work) | ||||
| 				return | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		newReadablePaths := reconcileCapabilities(indexReadablePaths, capabilities) | ||||
| 		if maps.Equal(indexReadablePathsMap, newReadablePaths) { | ||||
| @@ -188,6 +216,7 @@ func (sscm *StaticSecretCapabilityManager) StartRenewingCapabilities(indexToRene | ||||
| 				sscm.submitWorkToPoolAfterInterval(work) | ||||
| 				return | ||||
| 			} | ||||
| 			sscm.logger.Debug("successfully evicted capabilities index from cache", "index.ID", indexToRenew.ID) | ||||
| 			// If we successfully evicted the index, no need to re-submit the work to the pool. | ||||
| 			return | ||||
| 		} | ||||
|   | ||||
| @@ -345,6 +345,88 @@ func TestSubmitWorkUpdatesIndexWithBadToken(t *testing.T) { | ||||
| 	sscm.workerPool.Stop() | ||||
| } | ||||
|  | ||||
| // TestSubmitWorkSealedVaultOptimistic tests that the capability manager | ||||
| // behaves as expected when | ||||
| // sscm.tokenCapabilityRefreshBehaviour == TokenCapabilityRefreshBehaviourOptimistic | ||||
| func TestSubmitWorkSealedVaultOptimistic(t *testing.T) { | ||||
| 	t.Parallel() | ||||
| 	cluster := minimal.NewTestSoloCluster(t, nil) | ||||
| 	client := cluster.Cores[0].Client | ||||
|  | ||||
| 	token := "not real token" | ||||
| 	indexId := hashStaticSecretIndex(token) | ||||
|  | ||||
| 	sscm := testNewStaticSecretCapabilityManager(t, client) | ||||
| 	index := &cachememdb.CapabilitiesIndex{ | ||||
| 		ID:    indexId, | ||||
| 		Token: token, | ||||
| 		ReadablePaths: map[string]struct{}{ | ||||
| 			"foo/bar":                {}, | ||||
| 			"auth/token/lookup-self": {}, | ||||
| 		}, | ||||
| 	} | ||||
| 	err := sscm.leaseCache.db.SetCapabilitiesIndex(index) | ||||
| 	require.Nil(t, err) | ||||
|  | ||||
| 	// Seal the cluster | ||||
| 	cluster.EnsureCoresSealed(t) | ||||
|  | ||||
| 	sscm.StartRenewingCapabilities(index) | ||||
|  | ||||
| 	// Wait for the job to complete at least once... | ||||
| 	time.Sleep(3 * time.Second) | ||||
|  | ||||
| 	// This entry should not be evicted. | ||||
| 	newIndex, err := sscm.leaseCache.db.GetCapabilitiesIndex(cachememdb.IndexNameID, indexId) | ||||
| 	require.NoError(t, err) | ||||
| 	require.NotNil(t, newIndex) | ||||
|  | ||||
| 	// Forcefully stop any remaining workers | ||||
| 	sscm.workerPool.Stop() | ||||
| } | ||||
|  | ||||
| // TestSubmitWorkSealedVaultPessimistic tests that the capability manager | ||||
| // behaves as expected when | ||||
| // sscm.tokenCapabilityRefreshBehaviour == TokenCapabilityRefreshBehaviourPessimistic | ||||
| func TestSubmitWorkSealedVaultPessimistic(t *testing.T) { | ||||
| 	t.Parallel() | ||||
| 	cluster := minimal.NewTestSoloCluster(t, nil) | ||||
| 	client := cluster.Cores[0].Client | ||||
|  | ||||
| 	token := "not real token" | ||||
| 	indexId := hashStaticSecretIndex(token) | ||||
|  | ||||
| 	sscm := testNewStaticSecretCapabilityManager(t, client) | ||||
| 	sscm.tokenCapabilityRefreshBehaviour = TokenCapabilityRefreshBehaviourPessimistic | ||||
|  | ||||
| 	index := &cachememdb.CapabilitiesIndex{ | ||||
| 		ID:    indexId, | ||||
| 		Token: token, | ||||
| 		ReadablePaths: map[string]struct{}{ | ||||
| 			"foo/bar":                {}, | ||||
| 			"auth/token/lookup-self": {}, | ||||
| 		}, | ||||
| 	} | ||||
| 	err := sscm.leaseCache.db.SetCapabilitiesIndex(index) | ||||
| 	require.Nil(t, err) | ||||
|  | ||||
| 	// Seal the cluster | ||||
| 	cluster.EnsureCoresSealed(t) | ||||
|  | ||||
| 	sscm.StartRenewingCapabilities(index) | ||||
|  | ||||
| 	// Wait for the job to complete at least once... | ||||
| 	time.Sleep(3 * time.Second) | ||||
|  | ||||
| 	// This entry should be evicted. | ||||
| 	newIndex, err := sscm.leaseCache.db.GetCapabilitiesIndex(cachememdb.IndexNameID, indexId) | ||||
| 	require.Error(t, err) | ||||
| 	require.Nil(t, newIndex) | ||||
|  | ||||
| 	// Forcefully stop any remaining workers | ||||
| 	sscm.workerPool.Stop() | ||||
| } | ||||
|  | ||||
| // TestSubmitWorkUpdatesAllIndexes tests that an index will be correctly updated if the capabilities differ, as | ||||
| // well as the indexes related to the paths that are being checked for. | ||||
| func TestSubmitWorkUpdatesAllIndexes(t *testing.T) { | ||||
|   | ||||
| @@ -521,6 +521,7 @@ func (c *ProxyCommand) Run(args []string) int { | ||||
| 				Logger:     c.logger.Named("cache.staticsecretcapabilitymanager"), | ||||
| 				Client:     client, | ||||
| 				StaticSecretTokenCapabilityRefreshInterval:  config.Cache.StaticSecretTokenCapabilityRefreshInterval, | ||||
| 				StaticSecretTokenCapabilityRefreshBehaviour: config.Cache.StaticSecretTokenCapabilityRefreshBehaviour, | ||||
| 			}) | ||||
| 			if err != nil { | ||||
| 				c.UI.Error(fmt.Sprintf("Error creating static secret capability manager: %v", err)) | ||||
|   | ||||
| @@ -109,6 +109,7 @@ type Cache struct { | ||||
| 	DisableCachingDynamicSecrets                  bool                            `hcl:"disable_caching_dynamic_secrets"` | ||||
| 	StaticSecretTokenCapabilityRefreshIntervalRaw interface{}                     `hcl:"static_secret_token_capability_refresh_interval"` | ||||
| 	StaticSecretTokenCapabilityRefreshInterval    time.Duration                   `hcl:"-"` | ||||
| 	StaticSecretTokenCapabilityRefreshBehaviour   string                          `hcl:"static_secret_token_capability_refresh_behavior"` | ||||
| } | ||||
|  | ||||
| // AutoAuth is the configured authentication method and sinks | ||||
| @@ -271,6 +272,15 @@ func (c *Config) ValidateConfig() error { | ||||
| 		return fmt.Errorf("no auto_auth, cache, or listener block found in config") | ||||
| 	} | ||||
|  | ||||
| 	if c.Cache != nil && c.Cache.StaticSecretTokenCapabilityRefreshBehaviour != "" { | ||||
| 		switch c.Cache.StaticSecretTokenCapabilityRefreshBehaviour { | ||||
| 		case "pessimistic": | ||||
| 		case "optimistic": | ||||
| 		default: | ||||
| 			return fmt.Errorf("cache.static_secret_token_capability_refresh_behavior must be either \"optimistic\" or \"pessimistic\"") | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -104,6 +104,14 @@ token can no longer access the relevant paths from the cache. The refresh interv | ||||
| is essentially the maximum period after which permission to read a KV secret is | ||||
| fully revoked for the relevant token. | ||||
|  | ||||
| If the capabilities have been removed, or Proxy receives a `403` response, the | ||||
| capability is removed from the token, and that token cannot be used to access the | ||||
| cache. For other kinds of errors, such as Vault being unreachable or sealed, | ||||
| the `static_secret_token_capability_refresh_behavior` config is consulted. | ||||
| If set to `optimistic` (the default), the capability will not be removed unless we | ||||
| receive a `403` or valid response without the capability. If set to `pessimistic`, | ||||
| the capability will be removed for any error, such as would occur if Vault is sealed. | ||||
|  | ||||
| For token refresh to work, any token that will access the cache also needs | ||||
| `update` permission for [`sys/capabilies-self`](/vault/api-docs/system/capabilities-self). | ||||
| Having `update` permission for the token lets Proxy test capabilities for the | ||||
| @@ -149,6 +157,16 @@ secrets. The refresh interval is the maximum period after which permission to | ||||
| read a cached KV secret is fully revoked. Ignored when `cache_static_secrets` | ||||
| is `false`. | ||||
|  | ||||
| - `static_secret_token_capability_refresh_behavior` `(string: "optimistic", optional)` - | ||||
| Sets the capability refresh behavior in the case of an error when attempting to | ||||
| refresh capabilities. In the case of a `403`, capabilities will be removed for the token | ||||
| with either option. In case of other errors, such as Vault being sealed or Vault being | ||||
| unavailable, this setting controls the behavior. If set to `optimistic` (the default), | ||||
| capabilities will be removed for only `403` errors. If set to `pessimistic`, capabilities | ||||
| will be removed for any error. This essentially allows configuring a preference between | ||||
| favoring availability (`optimistic`) or access fidelity (`pessimistic`) of cached | ||||
| static secrets. Ignored when `cache_static_secrets` is `false`. | ||||
|  | ||||
| ### Example configuration | ||||
|  | ||||
| The following example Vault Proxy configuration: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Violet Hynes
					Violet Hynes