mirror of
https://github.com/optim-enterprises-bv/vault.git
synced 2025-11-03 03:58:01 +00:00
backport of commit dbfaa6f81a (#23286)
Co-authored-by: Steven Clark <steven.clark@hashicorp.com>
This commit is contained in:
committed by
GitHub
parent
b4d07277a6
commit
0943b82368
@@ -10,6 +10,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||||
"github.com/hashicorp/vault/sdk/logical"
|
"github.com/hashicorp/vault/sdk/logical"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -42,6 +43,7 @@ type ChallengeValidation struct {
|
|||||||
type ChallengeQueueEntry struct {
|
type ChallengeQueueEntry struct {
|
||||||
Identifier string
|
Identifier string
|
||||||
RetryAfter time.Time
|
RetryAfter time.Time
|
||||||
|
NumRetries int // Track if we are spinning on a corrupted challenge
|
||||||
}
|
}
|
||||||
|
|
||||||
type ACMEChallengeEngine struct {
|
type ACMEChallengeEngine struct {
|
||||||
@@ -97,7 +99,7 @@ func (ace *ACMEChallengeEngine) Run(b *backend, state *acmeState, sc *storageCon
|
|||||||
b.Logger().Error("failed loading existing ACME challenge validations:", "err", err)
|
b.Logger().Error("failed loading existing ACME challenge validations:", "err", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for true {
|
for {
|
||||||
// err == nil on shutdown.
|
// err == nil on shutdown.
|
||||||
b.Logger().Debug("Starting ACME challenge validation engine")
|
b.Logger().Debug("Starting ACME challenge validation engine")
|
||||||
err := ace._run(b, state)
|
err := ace._run(b, state)
|
||||||
@@ -119,7 +121,7 @@ func (ace *ACMEChallengeEngine) _run(b *backend, state *acmeState) error {
|
|||||||
// We want at most a certain number of workers operating to verify
|
// We want at most a certain number of workers operating to verify
|
||||||
// challenges.
|
// challenges.
|
||||||
var finishedWorkersChannels []chan bool
|
var finishedWorkersChannels []chan bool
|
||||||
for true {
|
for {
|
||||||
// Wait until we've got more work to do.
|
// Wait until we've got more work to do.
|
||||||
select {
|
select {
|
||||||
case <-ace.Closing:
|
case <-ace.Closing:
|
||||||
@@ -201,12 +203,17 @@ func (ace *ACMEChallengeEngine) _run(b *backend, state *acmeState) error {
|
|||||||
// looping through the queue until we hit a repeat.
|
// looping through the queue until we hit a repeat.
|
||||||
firstIdentifier = ""
|
firstIdentifier = ""
|
||||||
|
|
||||||
|
// If we are no longer the active node, break out
|
||||||
|
if b.System().ReplicationState().HasState(consts.ReplicationDRSecondary | consts.ReplicationPerformanceStandby) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
// Here, we got a piece of work that is ready to check; create a
|
// Here, we got a piece of work that is ready to check; create a
|
||||||
// channel and a new go routine and run it. Note that this still
|
// channel and a new go routine and run it. Note that this still
|
||||||
// could have a RetryAfter date we're not aware of (e.g., if the
|
// could have a RetryAfter date we're not aware of (e.g., if the
|
||||||
// cluster restarted as we do not read the entries there).
|
// cluster restarted as we do not read the entries there).
|
||||||
channel := make(chan bool, 1)
|
channel := make(chan bool, 1)
|
||||||
go ace.VerifyChallenge(runnerSC, task.Identifier, channel, config)
|
go ace.VerifyChallenge(runnerSC, task.Identifier, task.NumRetries, channel, config)
|
||||||
finishedWorkersChannels = append(finishedWorkersChannels, channel)
|
finishedWorkersChannels = append(finishedWorkersChannels, channel)
|
||||||
startedWork = true
|
startedWork = true
|
||||||
}
|
}
|
||||||
@@ -305,8 +312,9 @@ func (ace *ACMEChallengeEngine) AcceptChallenge(sc *storageContext, account stri
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ace *ACMEChallengeEngine) VerifyChallenge(runnerSc *storageContext, id string, finished chan bool, config *acmeConfigEntry) {
|
func (ace *ACMEChallengeEngine) VerifyChallenge(runnerSc *storageContext, id string, validationQueueRetries int, finished chan bool, config *acmeConfigEntry) {
|
||||||
sc, _ /* cancel func */ := runnerSc.WithFreshTimeout(MaxChallengeTimeout)
|
sc, cancel := runnerSc.WithFreshTimeout(MaxChallengeTimeout)
|
||||||
|
defer cancel()
|
||||||
runnerSc.Backend.Logger().Debug("Starting verification of challenge", "id", id)
|
runnerSc.Backend.Logger().Debug("Starting verification of challenge", "id", id)
|
||||||
|
|
||||||
if retry, retryAfter, err := ace._verifyChallenge(sc, id, config); err != nil {
|
if retry, retryAfter, err := ace._verifyChallenge(sc, id, config); err != nil {
|
||||||
@@ -316,11 +324,28 @@ func (ace *ACMEChallengeEngine) VerifyChallenge(runnerSc *storageContext, id str
|
|||||||
sc.Backend.Logger().Error(fmt.Sprintf("ACME validation failed for %v: %v", id, err))
|
sc.Backend.Logger().Error(fmt.Sprintf("ACME validation failed for %v: %v", id, err))
|
||||||
|
|
||||||
if retry {
|
if retry {
|
||||||
|
validationQueueRetries++
|
||||||
|
|
||||||
|
// The retry logic within _verifyChallenge is dependent on being able to read and decode
|
||||||
|
// the ACME challenge entries. If we encounter such failures we would retry forever, so
|
||||||
|
// we have a secondary check here to see if we are consistently looping within the validation
|
||||||
|
// queue that is larger than the normal retry attempts we would allow.
|
||||||
|
if validationQueueRetries > MaxRetryAttempts*2 {
|
||||||
|
sc.Backend.Logger().Warn("reached max error attempts within challenge queue: %v, giving up", id)
|
||||||
|
_, _, err = ace._verifyChallengeCleanup(sc, nil, id)
|
||||||
|
if err != nil {
|
||||||
|
sc.Backend.Logger().Warn("Failed cleaning up challenge entry: %v", err)
|
||||||
|
}
|
||||||
|
finished <- true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
ace.ValidationLock.Lock()
|
ace.ValidationLock.Lock()
|
||||||
defer ace.ValidationLock.Unlock()
|
defer ace.ValidationLock.Unlock()
|
||||||
ace.Validations.PushBack(&ChallengeQueueEntry{
|
ace.Validations.PushBack(&ChallengeQueueEntry{
|
||||||
Identifier: id,
|
Identifier: id,
|
||||||
RetryAfter: retryAfter,
|
RetryAfter: retryAfter,
|
||||||
|
NumRetries: validationQueueRetries,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Let the validator know there's a pending challenge.
|
// Let the validator know there's a pending challenge.
|
||||||
@@ -343,23 +368,23 @@ func (ace *ACMEChallengeEngine) VerifyChallenge(runnerSc *storageContext, id str
|
|||||||
|
|
||||||
func (ace *ACMEChallengeEngine) _verifyChallenge(sc *storageContext, id string, config *acmeConfigEntry) (bool, time.Time, error) {
|
func (ace *ACMEChallengeEngine) _verifyChallenge(sc *storageContext, id string, config *acmeConfigEntry) (bool, time.Time, error) {
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
|
backoffTime := now.Add(1 * time.Second)
|
||||||
path := acmeValidationPrefix + id
|
path := acmeValidationPrefix + id
|
||||||
challengeEntry, err := sc.Storage.Get(sc.Context, path)
|
challengeEntry, err := sc.Storage.Get(sc.Context, path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return true, now, fmt.Errorf("error loading challenge %v: %w", id, err)
|
return true, backoffTime, fmt.Errorf("error loading challenge %v: %w", id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if challengeEntry == nil {
|
if challengeEntry == nil {
|
||||||
// Something must've successfully cleaned up our storage entry from
|
// Something must've successfully cleaned up our storage entry from
|
||||||
// under us. Assume we don't need to rerun, else the client will
|
// under us. Assume we don't need to rerun, else the client will
|
||||||
// trigger us to re-run.
|
// trigger us to re-run.
|
||||||
err = nil
|
return ace._verifyChallengeCleanup(sc, nil, id)
|
||||||
return ace._verifyChallengeCleanup(sc, err, id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var cv *ChallengeValidation
|
var cv *ChallengeValidation
|
||||||
if err := challengeEntry.DecodeJSON(&cv); err != nil {
|
if err := challengeEntry.DecodeJSON(&cv); err != nil {
|
||||||
return true, now, fmt.Errorf("error decoding challenge %v: %w", id, err)
|
return true, backoffTime, fmt.Errorf("error decoding challenge %v: %w", id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if now.Before(cv.RetryAfter) {
|
if now.Before(cv.RetryAfter) {
|
||||||
@@ -369,7 +394,7 @@ func (ace *ACMEChallengeEngine) _verifyChallenge(sc *storageContext, id string,
|
|||||||
authzPath := getAuthorizationPath(cv.Account, cv.Authorization)
|
authzPath := getAuthorizationPath(cv.Account, cv.Authorization)
|
||||||
authz, err := loadAuthorizationAtPath(sc, authzPath)
|
authz, err := loadAuthorizationAtPath(sc, authzPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return true, now, fmt.Errorf("error loading authorization %v/%v for challenge %v: %w", cv.Account, cv.Authorization, id, err)
|
return true, backoffTime, fmt.Errorf("error loading authorization %v/%v for challenge %v: %w", cv.Account, cv.Authorization, id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if authz.Status != ACMEAuthorizationPending {
|
if authz.Status != ACMEAuthorizationPending {
|
||||||
@@ -527,7 +552,7 @@ func (ace *ACMEChallengeEngine) _verifyChallengeCleanup(sc *storageContext, err
|
|||||||
|
|
||||||
// Remove our ChallengeValidation entry only.
|
// Remove our ChallengeValidation entry only.
|
||||||
if deleteErr := sc.Storage.Delete(sc.Context, acmeValidationPrefix+id); deleteErr != nil {
|
if deleteErr := sc.Storage.Delete(sc.Context, acmeValidationPrefix+id); deleteErr != nil {
|
||||||
return true, now.Add(-1 * time.Second), fmt.Errorf("error deleting challenge %v (error prior to cleanup, if any: %v): %w", id, err, deleteErr)
|
return true, now.Add(1 * time.Second), fmt.Errorf("error deleting challenge %v (error prior to cleanup, if any: %v): %w", id, err, deleteErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import (
|
|||||||
|
|
||||||
"github.com/hashicorp/go-secure-stdlib/nonceutil"
|
"github.com/hashicorp/go-secure-stdlib/nonceutil"
|
||||||
"github.com/hashicorp/vault/sdk/framework"
|
"github.com/hashicorp/vault/sdk/framework"
|
||||||
|
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||||
"github.com/hashicorp/vault/sdk/logical"
|
"github.com/hashicorp/vault/sdk/logical"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -76,6 +77,13 @@ func (a *acmeState) Initialize(b *backend, sc *storageContext) error {
|
|||||||
return fmt.Errorf("error initializing ACME engine: %w", err)
|
return fmt.Errorf("error initializing ACME engine: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if b.System().ReplicationState().HasState(consts.ReplicationDRSecondary | consts.ReplicationPerformanceStandby) {
|
||||||
|
// It is assumed, that if the node does become the active node later
|
||||||
|
// the plugin is re-initialized, so this is safe. It also spares the node
|
||||||
|
// from loading the existing queue into memory for no reason.
|
||||||
|
b.Logger().Debug("Not on an active node, skipping starting ACME challenge validation engine")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
// Kick off our ACME challenge validation engine.
|
// Kick off our ACME challenge validation engine.
|
||||||
go a.validator.Run(b, a, sc)
|
go a.validator.Run(b, a, sc)
|
||||||
|
|
||||||
|
|||||||
@@ -64,9 +64,10 @@ func (ts *TestServer) setupRunner(domain string, network string) {
|
|||||||
ContainerName: "bind9-dns-" + strings.ReplaceAll(domain, ".", "-"),
|
ContainerName: "bind9-dns-" + strings.ReplaceAll(domain, ".", "-"),
|
||||||
NetworkName: network,
|
NetworkName: network,
|
||||||
Ports: []string{"53/udp"},
|
Ports: []string{"53/udp"},
|
||||||
LogConsumer: func(s string) {
|
// DNS container logging was disabled to reduce content within CI logs.
|
||||||
ts.log.Info(s)
|
//LogConsumer: func(s string) {
|
||||||
},
|
// ts.log.Info(s)
|
||||||
|
//},
|
||||||
})
|
})
|
||||||
require.NoError(ts.t, err)
|
require.NoError(ts.t, err)
|
||||||
}
|
}
|
||||||
|
|||||||
3
changelog/23278.txt
Normal file
3
changelog/23278.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
```release-note:bug
|
||||||
|
secrets/pki: Stop processing in-flight ACME verifications when an active node steps down
|
||||||
|
```
|
||||||
Reference in New Issue
Block a user