mirror of
https://github.com/optim-enterprises-bv/vault.git
synced 2025-11-01 02:57:59 +00:00
VAULT-25341 Address issue where having no permissions to renew caused auto-auth to attempt to renew with no backoff (#26844)
* VAULT-25341 Address issue where having no permissions to renew caused Agent and Proxy auth to attempt to renew with no backoff * Fiddle with go.mod changes that shouldn't have happened * VAULT-25341 small cleanup and extra test * VAULT-25341 backoff only in error case * VAULT-25341 godocs * VAULT-25342 changelog * Update command/agent_test.go Co-authored-by: divyaac <divya.chandrasekaran@hashicorp.com> * VAULT-25341 rename file audit --------- Co-authored-by: divyaac <divya.chandrasekaran@hashicorp.com>
This commit is contained in:
@@ -6,6 +6,7 @@ package api
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -289,12 +290,18 @@ func (r *LifetimeWatcher) doRenewWithOptions(tokenMode bool, nonRenewable bool,
|
|||||||
switch {
|
switch {
|
||||||
case nonRenewable || r.renewBehavior == RenewBehaviorRenewDisabled:
|
case nonRenewable || r.renewBehavior == RenewBehaviorRenewDisabled:
|
||||||
// Can't or won't renew, just keep the same expiration so we exit
|
// Can't or won't renew, just keep the same expiration so we exit
|
||||||
// when it's reauthentication time
|
// when it's re-authentication time
|
||||||
remainingLeaseDuration = fallbackLeaseDuration
|
remainingLeaseDuration = fallbackLeaseDuration
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Renew the token
|
// Renew the token
|
||||||
renewal, err = renew(credString, r.increment)
|
renewal, err = renew(credString, r.increment)
|
||||||
|
if err != nil && strings.Contains(err.Error(), "permission denied") {
|
||||||
|
// We can't renew since the token doesn't have permission to. Fall back
|
||||||
|
// to the code path for non-renewable tokens.
|
||||||
|
nonRenewable = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
if err != nil || renewal == nil || (tokenMode && renewal.Auth == nil) {
|
if err != nil || renewal == nil || (tokenMode && renewal.Auth == nil) {
|
||||||
if r.renewBehavior == RenewBehaviorErrorOnErrors {
|
if r.renewBehavior == RenewBehaviorErrorOnErrors {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -177,6 +177,20 @@ func TestLifetimeWatcher(t *testing.T) {
|
|||||||
expectError: nil,
|
expectError: nil,
|
||||||
expectRenewal: true,
|
expectRenewal: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
maxTestTime: time.Second,
|
||||||
|
name: "permission_denied_error",
|
||||||
|
leaseDurationSeconds: 60,
|
||||||
|
incrementSeconds: 10,
|
||||||
|
// This should cause the lifetime watcher to behave just
|
||||||
|
// like a non-renewable secret, i.e. wait until its lifetime
|
||||||
|
// then be done.
|
||||||
|
renew: func(_ string, _ int) (*Secret, error) {
|
||||||
|
return nil, fmt.Errorf("permission denied")
|
||||||
|
},
|
||||||
|
expectError: nil,
|
||||||
|
expectRenewal: false,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range cases {
|
for _, tc := range cases {
|
||||||
@@ -204,7 +218,9 @@ func TestLifetimeWatcher(t *testing.T) {
|
|||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-time.After(tc.maxTestTime):
|
case <-time.After(tc.maxTestTime):
|
||||||
t.Fatalf("renewal didn't happen")
|
if tc.expectRenewal || tc.expectError != nil {
|
||||||
|
t.Fatalf("expected error or renewal, and neither happened")
|
||||||
|
}
|
||||||
case r := <-v.RenewCh():
|
case r := <-v.RenewCh():
|
||||||
if !tc.expectRenewal {
|
if !tc.expectRenewal {
|
||||||
t.Fatal("expected no renewals")
|
t.Fatal("expected no renewals")
|
||||||
|
|||||||
3
changelog/26844.txt
Normal file
3
changelog/26844.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
```release-note:bug
|
||||||
|
auto-auth: Addressed issue where having no permissions to renew a renewable token caused auto-auth to attempt to renew constantly with no backoff
|
||||||
|
```
|
||||||
@@ -3179,6 +3179,143 @@ vault {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestAgent_TokenRenewal tests that LifeTimeWatcher does not make
|
||||||
|
// many renewal attempts if the token's policy does not allow for it to renew
|
||||||
|
// itself. Prior to a bug fix in the PR that added this test, this would have resulted
|
||||||
|
// in hundreds of token renewal requests with no backoff.
|
||||||
|
func TestAgent_TokenRenewal(t *testing.T) {
|
||||||
|
logger := logging.NewVaultLogger(hclog.Trace)
|
||||||
|
cluster := vault.NewTestCluster(t, nil, &vault.TestClusterOptions{
|
||||||
|
HandlerFunc: vaulthttp.Handler,
|
||||||
|
})
|
||||||
|
cluster.Start()
|
||||||
|
defer cluster.Cleanup()
|
||||||
|
|
||||||
|
serverClient := cluster.Cores[0].Client
|
||||||
|
|
||||||
|
auditLogFileName := makeTempFile(t, "audit-log", "")
|
||||||
|
err := serverClient.Sys().EnableAuditWithOptions("file-audit-for-TestAgent_TokenRenewal", &api.EnableAuditOptions{
|
||||||
|
Type: "file",
|
||||||
|
Options: map[string]string{
|
||||||
|
"file_path": auditLogFileName,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Unset the environment variable so that agent picks up the right test
|
||||||
|
// cluster address
|
||||||
|
defer os.Setenv(api.EnvVaultAddress, os.Getenv(api.EnvVaultAddress))
|
||||||
|
os.Unsetenv(api.EnvVaultAddress)
|
||||||
|
|
||||||
|
policyName := "less-than-default"
|
||||||
|
// Has a subset of the default policy's permissions
|
||||||
|
// Specifically removing renew-self.
|
||||||
|
err = serverClient.Sys().PutPolicy(policyName, `
|
||||||
|
path "auth/token/lookup-self" {
|
||||||
|
capabilities = ["read"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow tokens to revoke themselves
|
||||||
|
path "auth/token/revoke-self" {
|
||||||
|
capabilities = ["update"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow a token to look up its own capabilities on a path
|
||||||
|
path "sys/capabilities-self" {
|
||||||
|
capabilities = ["update"]
|
||||||
|
}
|
||||||
|
`)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
renewable := true
|
||||||
|
// Make the token renewable but give it no permissions
|
||||||
|
// (e.g. the permission to renew itself)
|
||||||
|
tokenCreateRequest := &api.TokenCreateRequest{
|
||||||
|
Policies: []string{policyName},
|
||||||
|
TTL: "10s",
|
||||||
|
Renewable: &renewable,
|
||||||
|
NoDefaultPolicy: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
secret, err := serverClient.Auth().Token().CreateOrphan(tokenCreateRequest)
|
||||||
|
require.NoError(t, err)
|
||||||
|
lowPermissionToken := secret.Auth.ClientToken
|
||||||
|
|
||||||
|
tokenFileName := makeTempFile(t, "token-file", lowPermissionToken)
|
||||||
|
|
||||||
|
sinkFileName := makeTempFile(t, "sink-file", "")
|
||||||
|
|
||||||
|
autoAuthConfig := fmt.Sprintf(`
|
||||||
|
auto_auth {
|
||||||
|
method {
|
||||||
|
type = "token_file"
|
||||||
|
config = {
|
||||||
|
token_file_path = "%s"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sink "file" {
|
||||||
|
config = {
|
||||||
|
path = "%s"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}`, tokenFileName, sinkFileName)
|
||||||
|
|
||||||
|
config := fmt.Sprintf(`
|
||||||
|
vault {
|
||||||
|
address = "%s"
|
||||||
|
tls_skip_verify = true
|
||||||
|
}
|
||||||
|
|
||||||
|
log_level = "trace"
|
||||||
|
|
||||||
|
%s
|
||||||
|
`, serverClient.Address(), autoAuthConfig)
|
||||||
|
configPath := makeTempFile(t, "config.hcl", config)
|
||||||
|
|
||||||
|
// Start the agent
|
||||||
|
ui, cmd := testAgentCommand(t, logger)
|
||||||
|
|
||||||
|
cmd.startedCh = make(chan struct{})
|
||||||
|
|
||||||
|
wg := &sync.WaitGroup{}
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
cmd.Run([]string{"-config", configPath})
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-cmd.startedCh:
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
t.Errorf("timeout")
|
||||||
|
t.Errorf("stdout: %s", ui.OutputWriter.String())
|
||||||
|
t.Errorf("stderr: %s", ui.ErrorWriter.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sleep, to allow the renewal/auth process to work and ensure that it doesn't
|
||||||
|
// go crazy with renewals.
|
||||||
|
time.Sleep(30 * time.Second)
|
||||||
|
|
||||||
|
fileBytes, err := os.ReadFile(auditLogFileName)
|
||||||
|
require.NoError(t, err)
|
||||||
|
stringAudit := string(fileBytes)
|
||||||
|
|
||||||
|
// This is a bit of an imperfect way to test things, but we want to make sure
|
||||||
|
// that a token like this doesn't keep triggering retries.
|
||||||
|
// Due to the fact this is an auto-auth specific thing, unit tests for the
|
||||||
|
// LifetimeWatcher wouldn't be sufficient here.
|
||||||
|
// Prior to the fix made in the same PR this test was added, it would trigger many, many
|
||||||
|
// retries (hundreds to thousands in less than a minute).
|
||||||
|
// We really want to make sure that doesn't happen.
|
||||||
|
numberOfRenewSelves := strings.Count(stringAudit, "auth/token/renew-self")
|
||||||
|
// We actually expect ~6, but I added some buffer for CI weirdness. It can also vary
|
||||||
|
// due to the grace added/removed from the sleep in LifetimeWatcher too.
|
||||||
|
if numberOfRenewSelves > 10 {
|
||||||
|
t.Fatalf("did too many renews -- Vault received %d renew-self requests", numberOfRenewSelves)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestAgent_Logging_ConsulTemplate attempts to ensure two things about Vault Agent logs:
|
// TestAgent_Logging_ConsulTemplate attempts to ensure two things about Vault Agent logs:
|
||||||
// 1. When -log-format command line arg is set to JSON, it is honored as the output format
|
// 1. When -log-format command line arg is set to JSON, it is honored as the output format
|
||||||
// for messages generated from within the consul-template library.
|
// for messages generated from within the consul-template library.
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -478,10 +479,9 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
metrics.IncrCounter([]string{ah.metricsSignifier, "auth", "success"}, 1)
|
metrics.IncrCounter([]string{ah.metricsSignifier, "auth", "success"}, 1)
|
||||||
// We don't want to trigger the renewal process for tokens with
|
// We don't want to trigger the renewal process for the root token
|
||||||
// unlimited TTL, such as the root token.
|
if isRootToken(leaseDuration, isTokenFileMethod, secret) {
|
||||||
if leaseDuration == 0 && isTokenFileMethod {
|
ah.logger.Info("not starting token renewal process, as token is root token")
|
||||||
ah.logger.Info("not starting token renewal process, as token has unlimited TTL")
|
|
||||||
} else {
|
} else {
|
||||||
ah.logger.Info("starting renewal process")
|
ah.logger.Info("starting renewal process")
|
||||||
go watcher.Renew()
|
go watcher.Renew()
|
||||||
@@ -496,11 +496,31 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error {
|
|||||||
break LifetimeWatcherLoop
|
break LifetimeWatcherLoop
|
||||||
|
|
||||||
case err := <-watcher.DoneCh():
|
case err := <-watcher.DoneCh():
|
||||||
ah.logger.Info("lifetime watcher done channel triggered")
|
ah.logger.Info("lifetime watcher done channel triggered, re-authenticating")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
ah.logger.Error("error renewing token", "error", err, "backoff", backoffCfg)
|
||||||
metrics.IncrCounter([]string{ah.metricsSignifier, "auth", "failure"}, 1)
|
metrics.IncrCounter([]string{ah.metricsSignifier, "auth", "failure"}, 1)
|
||||||
ah.logger.Error("error renewing token", "error", err)
|
|
||||||
|
// Add some exponential backoff so that if auth is successful
|
||||||
|
// but the watcher errors, we won't go into an immediate
|
||||||
|
// aggressive retry loop.
|
||||||
|
// This might be quite a small sleep, since if we have a successful
|
||||||
|
// auth, we reset the backoff. Still, some backoff is important, and
|
||||||
|
// ensuring we follow the normal flow is important:
|
||||||
|
// auth -> try to renew
|
||||||
|
if !backoffSleep(ctx, backoffCfg) {
|
||||||
|
// We're at max retries. Return an error.
|
||||||
|
return fmt.Errorf("exceeded max retries failing to renew auth token")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the lease duration is 0, wait a second before re-authenticating
|
||||||
|
// so that we don't go into a loop, as the LifetimeWatcher will immediately
|
||||||
|
// return for tokens like this.
|
||||||
|
if leaseDuration == 0 {
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
break LifetimeWatcherLoop
|
break LifetimeWatcherLoop
|
||||||
|
|
||||||
case <-watcher.RenewCh():
|
case <-watcher.RenewCh():
|
||||||
@@ -523,6 +543,24 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isRootToken checks if the secret in the argument is the root token
|
||||||
|
// This is determinable without leaseDuration and isTokenFileMethod,
|
||||||
|
// but those make it easier to rule out other tokens cheaply.
|
||||||
|
func isRootToken(leaseDuration int, isTokenFileMethod bool, secret *api.Secret) bool {
|
||||||
|
// This check is cheaper than the others, so we do this first.
|
||||||
|
if leaseDuration == 0 && isTokenFileMethod && !secret.Renewable {
|
||||||
|
if secret != nil {
|
||||||
|
policies, err := secret.TokenPolicies()
|
||||||
|
if err == nil {
|
||||||
|
if len(policies) == 1 && policies[0] == "root" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// autoAuthBackoff tracks exponential backoff state.
|
// autoAuthBackoff tracks exponential backoff state.
|
||||||
type autoAuthBackoff struct {
|
type autoAuthBackoff struct {
|
||||||
backoff *backoff.Backoff
|
backoff *backoff.Backoff
|
||||||
|
|||||||
Reference in New Issue
Block a user