Added exponential backoff (#25497)

* Applied Patch

* Added changelog

* Edited changelog

* Added constants to be shared

* Edited changelog verbage

* Removed copy and paste error

* Moved the constants

* Fixed static checks
This commit is contained in:
divyaac
2024-02-20 11:42:59 -08:00
committed by GitHub
parent 3850d41a87
commit 3132592c19
6 changed files with 57 additions and 12 deletions

3
changelog/25497.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
agent: Fixes a high Vault load issue, by restarting the Conusl template server after backing off instead of immediately.
```

View File

@@ -7,6 +7,7 @@ import (
"context"
"fmt"
"io"
"math"
"os"
"sort"
"sync"
@@ -19,6 +20,8 @@ import (
"github.com/hashicorp/vault/command/agent/config"
"github.com/hashicorp/vault/command/agent/internal/ctmanager"
"github.com/hashicorp/vault/helper/useragent"
"github.com/hashicorp/vault/sdk/helper/backoff"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/sdk/helper/pointerutil"
"golang.org/x/exp/slices"
)
@@ -167,6 +170,10 @@ func (s *Server) Run(ctx context.Context, incomingVaultToken chan string) error
// capture the errors related to restarting the child process
restartChildProcessErrCh := make(chan error)
// create exponential backoff object to calculate backoff time before restarting a failed
// consul template server
restartBackoff := backoff.NewBackoff(math.MaxInt, consts.DefaultMinBackoff, consts.DefaultMaxBackoff)
for {
select {
case <-ctx.Done():
@@ -216,6 +223,17 @@ func (s *Server) Run(ctx context.Context, incomingVaultToken chan string) error
return fmt.Errorf("template server: %w", err)
}
// Calculate the amount of time to backoff using exponential backoff
sleep, err := restartBackoff.Next()
if err != nil {
s.logger.Error("template server: reached maximum number restart attempts")
restartBackoff.Reset()
}
// Sleep for the calculated backoff time then attempt to create a new runner
s.logger.Warn(fmt.Sprintf("template server restart: retry attempt after %s", sleep))
time.Sleep(sleep)
s.runner, err = manager.NewRunner(runnerConfig, true)
if err != nil {
return fmt.Errorf("template server failed to create: %w", err)

View File

@@ -13,6 +13,8 @@ import (
"errors"
"fmt"
"io"
"math"
"time"
ctconfig "github.com/hashicorp/consul-template/config"
"github.com/hashicorp/consul-template/manager"
@@ -20,6 +22,8 @@ import (
"github.com/hashicorp/vault/command/agent/config"
"github.com/hashicorp/vault/command/agent/internal/ctmanager"
"github.com/hashicorp/vault/helper/useragent"
"github.com/hashicorp/vault/sdk/helper/backoff"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/sdk/helper/pointerutil"
"go.uber.org/atomic"
)
@@ -143,6 +147,10 @@ func (ts *Server) Run(ctx context.Context, incoming chan string, templates []*ct
}
ts.lookupMap = lookupMap
// Create backoff object to calculate backoff time before restarting a failed
// consul template server
restartBackoff := backoff.NewBackoff(math.MaxInt, consts.DefaultMinBackoff, consts.DefaultMaxBackoff)
for {
select {
case <-ctx.Done():
@@ -191,6 +199,17 @@ func (ts *Server) Run(ctx context.Context, incoming chan string, templates []*ct
return fmt.Errorf("template server: %w", err)
}
// Calculate the amount of time to backoff using exponential backoff
sleep, err := restartBackoff.Next()
if err != nil {
ts.logger.Error("template server: reached maximum number of restart attempts")
restartBackoff.Reset()
}
// Sleep for the calculated backoff time then attempt to create a new runner
ts.logger.Warn(fmt.Sprintf("template server restart: retry attempt after %s", sleep))
time.Sleep(sleep)
ts.runner, err = manager.NewRunner(runnerConfig, false)
if err != nil {
return fmt.Errorf("template server failed to create: %w", err)

View File

@@ -16,14 +16,10 @@ import (
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/vault/api"
"github.com/hashicorp/vault/sdk/helper/backoff"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/sdk/helper/jsonutil"
)
const (
defaultMinBackoff = 1 * time.Second
defaultMaxBackoff = 5 * time.Minute
)
// AuthMethod is the interface that auto-auth methods implement for the agent/proxy
// to use.
type AuthMethod interface {
@@ -132,10 +128,10 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error {
}
if ah.minBackoff <= 0 {
ah.minBackoff = defaultMinBackoff
ah.minBackoff = consts.DefaultMinBackoff
}
if ah.maxBackoff <= 0 {
ah.maxBackoff = defaultMaxBackoff
ah.maxBackoff = consts.DefaultMaxBackoff
}
if ah.minBackoff > ah.maxBackoff {
return errors.New("auth handler: min_backoff cannot be greater than max_backoff")
@@ -510,11 +506,11 @@ type autoAuthBackoff struct {
func newAutoAuthBackoff(min, max time.Duration, exitErr bool) *autoAuthBackoff {
if max <= 0 {
max = defaultMaxBackoff
max = consts.DefaultMaxBackoff
}
if min <= 0 {
min = defaultMinBackoff
min = consts.DefaultMinBackoff
}
retries := math.MaxInt

View File

@@ -13,6 +13,7 @@ import (
"github.com/hashicorp/vault/api"
"github.com/hashicorp/vault/builtin/credential/userpass"
vaulthttp "github.com/hashicorp/vault/http"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/sdk/helper/logging"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/vault/vault"
@@ -110,10 +111,10 @@ consumption:
func TestAgentBackoff(t *testing.T) {
max := 1024 * time.Second
backoff := newAutoAuthBackoff(defaultMinBackoff, max, false)
backoff := newAutoAuthBackoff(consts.DefaultMinBackoff, max, false)
// Test initial value
if backoff.backoff.Current() > defaultMinBackoff || backoff.backoff.Current() < defaultMinBackoff*3/4 {
if backoff.backoff.Current() > consts.DefaultMinBackoff || backoff.backoff.Current() < consts.DefaultMinBackoff*3/4 {
t.Fatalf("expected 1s initial backoff, got: %v", backoff.backoff.Current())
}
@@ -141,7 +142,7 @@ func TestAgentBackoff(t *testing.T) {
// Test reset
backoff.backoff.Reset()
if backoff.backoff.Current() > defaultMinBackoff || backoff.backoff.Current() < defaultMinBackoff*3/4 {
if backoff.backoff.Current() > consts.DefaultMinBackoff || backoff.backoff.Current() < consts.DefaultMinBackoff*3/4 {
t.Fatalf("expected 1s backoff after reset, got: %v", backoff.backoff.Current())
}
}

View File

@@ -3,6 +3,8 @@
package consts
import "time"
// AgentPathCacheClear is the path that the agent will use as its cache-clear
// endpoint.
const AgentPathCacheClear = "/agent/v1/cache-clear"
@@ -13,3 +15,9 @@ const AgentPathMetrics = "/agent/v1/metrics"
// AgentPathQuit is the path that the agent will use to trigger stopping it.
const AgentPathQuit = "/agent/v1/quit"
// DefaultMinBackoff is the default minimum backoff time for agent and proxy
const DefaultMinBackoff = 1 * time.Second
// DefaultMaxBackoff is the default max backoff time for agent and proxy
const DefaultMaxBackoff = 5 * time.Minute