mirror of
https://github.com/optim-enterprises-bv/vault.git
synced 2025-10-29 17:52:32 +00:00
VAULT-25341 Address issue where having no permissions to renew caused auto-auth to attempt to renew with no backoff (#26844)
* VAULT-25341 Address issue where having no permissions to renew caused Agent and Proxy auth to attempt to renew with no backoff * Fiddle with go.mod changes that shouldn't have happened * VAULT-25341 small cleanup and extra test * VAULT-25341 backoff only in error case * VAULT-25341 godocs * VAULT-25342 changelog * Update command/agent_test.go Co-authored-by: divyaac <divya.chandrasekaran@hashicorp.com> * VAULT-25341 rename file audit --------- Co-authored-by: divyaac <divya.chandrasekaran@hashicorp.com>
This commit is contained in:
@@ -6,6 +6,7 @@ package api
|
||||
import (
|
||||
"errors"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -289,12 +290,18 @@ func (r *LifetimeWatcher) doRenewWithOptions(tokenMode bool, nonRenewable bool,
|
||||
switch {
|
||||
case nonRenewable || r.renewBehavior == RenewBehaviorRenewDisabled:
|
||||
// Can't or won't renew, just keep the same expiration so we exit
|
||||
// when it's reauthentication time
|
||||
// when it's re-authentication time
|
||||
remainingLeaseDuration = fallbackLeaseDuration
|
||||
|
||||
default:
|
||||
// Renew the token
|
||||
renewal, err = renew(credString, r.increment)
|
||||
if err != nil && strings.Contains(err.Error(), "permission denied") {
|
||||
// We can't renew since the token doesn't have permission to. Fall back
|
||||
// to the code path for non-renewable tokens.
|
||||
nonRenewable = true
|
||||
continue
|
||||
}
|
||||
if err != nil || renewal == nil || (tokenMode && renewal.Auth == nil) {
|
||||
if r.renewBehavior == RenewBehaviorErrorOnErrors {
|
||||
if err != nil {
|
||||
|
||||
@@ -177,6 +177,20 @@ func TestLifetimeWatcher(t *testing.T) {
|
||||
expectError: nil,
|
||||
expectRenewal: true,
|
||||
},
|
||||
{
|
||||
maxTestTime: time.Second,
|
||||
name: "permission_denied_error",
|
||||
leaseDurationSeconds: 60,
|
||||
incrementSeconds: 10,
|
||||
// This should cause the lifetime watcher to behave just
|
||||
// like a non-renewable secret, i.e. wait until its lifetime
|
||||
// then be done.
|
||||
renew: func(_ string, _ int) (*Secret, error) {
|
||||
return nil, fmt.Errorf("permission denied")
|
||||
},
|
||||
expectError: nil,
|
||||
expectRenewal: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
@@ -204,7 +218,9 @@ func TestLifetimeWatcher(t *testing.T) {
|
||||
for {
|
||||
select {
|
||||
case <-time.After(tc.maxTestTime):
|
||||
t.Fatalf("renewal didn't happen")
|
||||
if tc.expectRenewal || tc.expectError != nil {
|
||||
t.Fatalf("expected error or renewal, and neither happened")
|
||||
}
|
||||
case r := <-v.RenewCh():
|
||||
if !tc.expectRenewal {
|
||||
t.Fatal("expected no renewals")
|
||||
|
||||
3
changelog/26844.txt
Normal file
3
changelog/26844.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
auto-auth: Addressed issue where having no permissions to renew a renewable token caused auto-auth to attempt to renew constantly with no backoff
|
||||
```
|
||||
@@ -3179,6 +3179,143 @@ vault {
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgent_TokenRenewal tests that LifeTimeWatcher does not make
|
||||
// many renewal attempts if the token's policy does not allow for it to renew
|
||||
// itself. Prior to a bug fix in the PR that added this test, this would have resulted
|
||||
// in hundreds of token renewal requests with no backoff.
|
||||
func TestAgent_TokenRenewal(t *testing.T) {
|
||||
logger := logging.NewVaultLogger(hclog.Trace)
|
||||
cluster := vault.NewTestCluster(t, nil, &vault.TestClusterOptions{
|
||||
HandlerFunc: vaulthttp.Handler,
|
||||
})
|
||||
cluster.Start()
|
||||
defer cluster.Cleanup()
|
||||
|
||||
serverClient := cluster.Cores[0].Client
|
||||
|
||||
auditLogFileName := makeTempFile(t, "audit-log", "")
|
||||
err := serverClient.Sys().EnableAuditWithOptions("file-audit-for-TestAgent_TokenRenewal", &api.EnableAuditOptions{
|
||||
Type: "file",
|
||||
Options: map[string]string{
|
||||
"file_path": auditLogFileName,
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Unset the environment variable so that agent picks up the right test
|
||||
// cluster address
|
||||
defer os.Setenv(api.EnvVaultAddress, os.Getenv(api.EnvVaultAddress))
|
||||
os.Unsetenv(api.EnvVaultAddress)
|
||||
|
||||
policyName := "less-than-default"
|
||||
// Has a subset of the default policy's permissions
|
||||
// Specifically removing renew-self.
|
||||
err = serverClient.Sys().PutPolicy(policyName, `
|
||||
path "auth/token/lookup-self" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
# Allow tokens to revoke themselves
|
||||
path "auth/token/revoke-self" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Allow a token to look up its own capabilities on a path
|
||||
path "sys/capabilities-self" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
`)
|
||||
require.NoError(t, err)
|
||||
|
||||
renewable := true
|
||||
// Make the token renewable but give it no permissions
|
||||
// (e.g. the permission to renew itself)
|
||||
tokenCreateRequest := &api.TokenCreateRequest{
|
||||
Policies: []string{policyName},
|
||||
TTL: "10s",
|
||||
Renewable: &renewable,
|
||||
NoDefaultPolicy: true,
|
||||
}
|
||||
|
||||
secret, err := serverClient.Auth().Token().CreateOrphan(tokenCreateRequest)
|
||||
require.NoError(t, err)
|
||||
lowPermissionToken := secret.Auth.ClientToken
|
||||
|
||||
tokenFileName := makeTempFile(t, "token-file", lowPermissionToken)
|
||||
|
||||
sinkFileName := makeTempFile(t, "sink-file", "")
|
||||
|
||||
autoAuthConfig := fmt.Sprintf(`
|
||||
auto_auth {
|
||||
method {
|
||||
type = "token_file"
|
||||
config = {
|
||||
token_file_path = "%s"
|
||||
}
|
||||
}
|
||||
|
||||
sink "file" {
|
||||
config = {
|
||||
path = "%s"
|
||||
}
|
||||
}
|
||||
}`, tokenFileName, sinkFileName)
|
||||
|
||||
config := fmt.Sprintf(`
|
||||
vault {
|
||||
address = "%s"
|
||||
tls_skip_verify = true
|
||||
}
|
||||
|
||||
log_level = "trace"
|
||||
|
||||
%s
|
||||
`, serverClient.Address(), autoAuthConfig)
|
||||
configPath := makeTempFile(t, "config.hcl", config)
|
||||
|
||||
// Start the agent
|
||||
ui, cmd := testAgentCommand(t, logger)
|
||||
|
||||
cmd.startedCh = make(chan struct{})
|
||||
|
||||
wg := &sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
cmd.Run([]string{"-config", configPath})
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-cmd.startedCh:
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Errorf("timeout")
|
||||
t.Errorf("stdout: %s", ui.OutputWriter.String())
|
||||
t.Errorf("stderr: %s", ui.ErrorWriter.String())
|
||||
}
|
||||
|
||||
// Sleep, to allow the renewal/auth process to work and ensure that it doesn't
|
||||
// go crazy with renewals.
|
||||
time.Sleep(30 * time.Second)
|
||||
|
||||
fileBytes, err := os.ReadFile(auditLogFileName)
|
||||
require.NoError(t, err)
|
||||
stringAudit := string(fileBytes)
|
||||
|
||||
// This is a bit of an imperfect way to test things, but we want to make sure
|
||||
// that a token like this doesn't keep triggering retries.
|
||||
// Due to the fact this is an auto-auth specific thing, unit tests for the
|
||||
// LifetimeWatcher wouldn't be sufficient here.
|
||||
// Prior to the fix made in the same PR this test was added, it would trigger many, many
|
||||
// retries (hundreds to thousands in less than a minute).
|
||||
// We really want to make sure that doesn't happen.
|
||||
numberOfRenewSelves := strings.Count(stringAudit, "auth/token/renew-self")
|
||||
// We actually expect ~6, but I added some buffer for CI weirdness. It can also vary
|
||||
// due to the grace added/removed from the sleep in LifetimeWatcher too.
|
||||
if numberOfRenewSelves > 10 {
|
||||
t.Fatalf("did too many renews -- Vault received %d renew-self requests", numberOfRenewSelves)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgent_Logging_ConsulTemplate attempts to ensure two things about Vault Agent logs:
|
||||
// 1. When -log-format command line arg is set to JSON, it is honored as the output format
|
||||
// for messages generated from within the consul-template library.
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
@@ -478,10 +479,9 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error {
|
||||
}
|
||||
|
||||
metrics.IncrCounter([]string{ah.metricsSignifier, "auth", "success"}, 1)
|
||||
// We don't want to trigger the renewal process for tokens with
|
||||
// unlimited TTL, such as the root token.
|
||||
if leaseDuration == 0 && isTokenFileMethod {
|
||||
ah.logger.Info("not starting token renewal process, as token has unlimited TTL")
|
||||
// We don't want to trigger the renewal process for the root token
|
||||
if isRootToken(leaseDuration, isTokenFileMethod, secret) {
|
||||
ah.logger.Info("not starting token renewal process, as token is root token")
|
||||
} else {
|
||||
ah.logger.Info("starting renewal process")
|
||||
go watcher.Renew()
|
||||
@@ -496,11 +496,31 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error {
|
||||
break LifetimeWatcherLoop
|
||||
|
||||
case err := <-watcher.DoneCh():
|
||||
ah.logger.Info("lifetime watcher done channel triggered")
|
||||
ah.logger.Info("lifetime watcher done channel triggered, re-authenticating")
|
||||
if err != nil {
|
||||
ah.logger.Error("error renewing token", "error", err, "backoff", backoffCfg)
|
||||
metrics.IncrCounter([]string{ah.metricsSignifier, "auth", "failure"}, 1)
|
||||
ah.logger.Error("error renewing token", "error", err)
|
||||
|
||||
// Add some exponential backoff so that if auth is successful
|
||||
// but the watcher errors, we won't go into an immediate
|
||||
// aggressive retry loop.
|
||||
// This might be quite a small sleep, since if we have a successful
|
||||
// auth, we reset the backoff. Still, some backoff is important, and
|
||||
// ensuring we follow the normal flow is important:
|
||||
// auth -> try to renew
|
||||
if !backoffSleep(ctx, backoffCfg) {
|
||||
// We're at max retries. Return an error.
|
||||
return fmt.Errorf("exceeded max retries failing to renew auth token")
|
||||
}
|
||||
}
|
||||
|
||||
// If the lease duration is 0, wait a second before re-authenticating
|
||||
// so that we don't go into a loop, as the LifetimeWatcher will immediately
|
||||
// return for tokens like this.
|
||||
if leaseDuration == 0 {
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
break LifetimeWatcherLoop
|
||||
|
||||
case <-watcher.RenewCh():
|
||||
@@ -523,6 +543,24 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error {
|
||||
}
|
||||
}
|
||||
|
||||
// isRootToken checks if the secret in the argument is the root token
|
||||
// This is determinable without leaseDuration and isTokenFileMethod,
|
||||
// but those make it easier to rule out other tokens cheaply.
|
||||
func isRootToken(leaseDuration int, isTokenFileMethod bool, secret *api.Secret) bool {
|
||||
// This check is cheaper than the others, so we do this first.
|
||||
if leaseDuration == 0 && isTokenFileMethod && !secret.Renewable {
|
||||
if secret != nil {
|
||||
policies, err := secret.TokenPolicies()
|
||||
if err == nil {
|
||||
if len(policies) == 1 && policies[0] == "root" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// autoAuthBackoff tracks exponential backoff state.
|
||||
type autoAuthBackoff struct {
|
||||
backoff *backoff.Backoff
|
||||
|
||||
Reference in New Issue
Block a user