mirror of
https://github.com/optim-enterprises-bv/vault.git
synced 2025-10-30 18:17:55 +00:00
Modularize Run Command (#11573)
* initial refactoring of unseal step in run * remove waitgroup * remove waitgroup * backup work * backup * backup * completely modularize run and move into diagnose * add diagnose errors for incorrect number of unseal keys * comment tests back in * backup * first subspan * finished subspanning but running into error with timeouts * remove runtime checks * meeting updates * remove telemetry block * roy comment * subspans for seal finalization and wrapping diagnose latency checks * fix storage latency test errors * review comments * use random uuid for latency checks instead of static id
This commit is contained in:
@@ -4,18 +4,26 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/ioutils"
|
||||
"github.com/hashicorp/consul/api"
|
||||
log "github.com/hashicorp/go-hclog"
|
||||
uuid "github.com/hashicorp/go-uuid"
|
||||
"github.com/hashicorp/vault/helper/metricsutil"
|
||||
"github.com/hashicorp/vault/internalshared/configutil"
|
||||
"github.com/hashicorp/vault/internalshared/listenerutil"
|
||||
"github.com/hashicorp/vault/internalshared/reloadutil"
|
||||
physconsul "github.com/hashicorp/vault/physical/consul"
|
||||
"github.com/hashicorp/vault/sdk/physical"
|
||||
"github.com/hashicorp/vault/sdk/version"
|
||||
sr "github.com/hashicorp/vault/serviceregistration"
|
||||
srconsul "github.com/hashicorp/vault/serviceregistration/consul"
|
||||
"github.com/hashicorp/vault/vault"
|
||||
"github.com/hashicorp/vault/vault/diagnose"
|
||||
"github.com/mitchellh/cli"
|
||||
"github.com/posener/complete"
|
||||
@@ -23,6 +31,10 @@ import (
|
||||
|
||||
const OperatorDiagnoseEnableEnv = "VAULT_DIAGNOSE"
|
||||
|
||||
const CoreUninitializedErr = "diagnose cannot attempt this step because core could not be initialized"
|
||||
const BackendUninitializedErr = "diagnose cannot attempt this step because backend could not be initialized"
|
||||
const CoreConfigUninitializedErr = "diagnose cannot attempt this step because core config could not be set"
|
||||
|
||||
var (
|
||||
_ cli.Command = (*OperatorDiagnoseCommand)(nil)
|
||||
_ cli.CommandAutocomplete = (*OperatorDiagnoseCommand)(nil)
|
||||
@@ -37,11 +49,12 @@ type OperatorDiagnoseCommand struct {
|
||||
flagConfigs []string
|
||||
cleanupGuard sync.Once
|
||||
|
||||
reloadFuncsLock *sync.RWMutex
|
||||
reloadFuncs *map[string][]reloadutil.ReloadFunc
|
||||
startedCh chan struct{} // for tests
|
||||
reloadedCh chan struct{} // for tests
|
||||
skipEndEnd bool // for tests
|
||||
reloadFuncsLock *sync.RWMutex
|
||||
reloadFuncs *map[string][]reloadutil.ReloadFunc
|
||||
ServiceRegistrations map[string]sr.Factory
|
||||
startedCh chan struct{} // for tests
|
||||
reloadedCh chan struct{} // for tests
|
||||
skipEndEnd bool // for tests
|
||||
}
|
||||
|
||||
func (c *OperatorDiagnoseCommand) Synopsis() string {
|
||||
@@ -203,18 +216,230 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
|
||||
} else {
|
||||
diagnose.SpotOk(ctx, "parse-config", "")
|
||||
}
|
||||
// Check Listener Information
|
||||
// TODO: Run Diagnose checks on the actual net.Listeners
|
||||
|
||||
if err := diagnose.Test(ctx, "init-listeners", func(ctx context.Context) error {
|
||||
var metricSink *metricsutil.ClusterMetricSink
|
||||
var metricsHelper *metricsutil.MetricsHelper
|
||||
|
||||
var backend *physical.Backend
|
||||
diagnose.Test(ctx, "storage", func(ctx context.Context) error {
|
||||
diagnose.Test(ctx, "create-storage-backend", func(ctx context.Context) error {
|
||||
|
||||
b, err := server.setupStorage(config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
backend = &b
|
||||
return nil
|
||||
})
|
||||
|
||||
if config.Storage == nil {
|
||||
return fmt.Errorf("no storage stanza found in config")
|
||||
}
|
||||
|
||||
if config.Storage != nil && config.Storage.Type == storageTypeConsul {
|
||||
diagnose.Test(ctx, "test-storage-tls-consul", func(ctx context.Context) error {
|
||||
err = physconsul.SetupSecureTLS(api.DefaultConfig(), config.Storage.Config, server.logger, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
diagnose.Test(ctx, "test-consul-direct-access-storage", func(ctx context.Context) error {
|
||||
dirAccess := diagnose.ConsulDirectAccess(config.Storage.Config)
|
||||
if dirAccess != "" {
|
||||
diagnose.Warn(ctx, dirAccess)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// Attempt to use storage backend
|
||||
if !c.skipEndEnd {
|
||||
diagnose.Test(ctx, "test-access-storage", diagnose.WithTimeout(30*time.Second, func(ctx context.Context) error {
|
||||
maxDurationCrudOperation := "write"
|
||||
maxDuration := time.Duration(0)
|
||||
uuidSuffix, err := uuid.GenerateUUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
uuid := "diagnose/latency/" + uuidSuffix
|
||||
dur, err := diagnose.EndToEndLatencyCheckWrite(ctx, uuid, *backend)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
maxDuration = dur
|
||||
dur, err = diagnose.EndToEndLatencyCheckRead(ctx, uuid, *backend)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if dur > maxDuration {
|
||||
maxDuration = dur
|
||||
maxDurationCrudOperation = "read"
|
||||
}
|
||||
dur, err = diagnose.EndToEndLatencyCheckDelete(ctx, uuid, *backend)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if dur > maxDuration {
|
||||
maxDuration = dur
|
||||
maxDurationCrudOperation = "delete"
|
||||
}
|
||||
|
||||
if maxDuration > time.Duration(0) {
|
||||
diagnose.Warn(ctx, diagnose.LatencyWarning+fmt.Sprintf("duration: %s, ", maxDuration)+fmt.Sprintf("operation: %s", maxDurationCrudOperation))
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
var configSR sr.ServiceRegistration
|
||||
diagnose.Test(ctx, "service-discovery", func(ctx context.Context) error {
|
||||
if config.ServiceRegistration == nil || config.ServiceRegistration.Config == nil {
|
||||
return fmt.Errorf("No service registration config")
|
||||
}
|
||||
srConfig := config.ServiceRegistration.Config
|
||||
|
||||
diagnose.Test(ctx, "test-serviceregistration-tls-consul", func(ctx context.Context) error {
|
||||
// SetupSecureTLS for service discovery uses the same cert and key to set up physical
|
||||
// storage. See the consul package in physical for details.
|
||||
err = srconsul.SetupSecureTLS(api.DefaultConfig(), srConfig, server.logger, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if config.ServiceRegistration != nil && config.ServiceRegistration.Type == "consul" {
|
||||
diagnose.Test(ctx, "test-consul-direct-access-service-discovery", func(ctx context.Context) error {
|
||||
dirAccess := diagnose.ConsulDirectAccess(config.ServiceRegistration.Config)
|
||||
if dirAccess != "" {
|
||||
diagnose.Warn(ctx, dirAccess)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
sealcontext, sealspan := diagnose.StartSpan(ctx, "create-seal")
|
||||
var seals []vault.Seal
|
||||
var sealConfigError error
|
||||
barrierSeal, barrierWrapper, unwrapSeal, seals, sealConfigError, err := setSeal(server, config, make([]string, 0), make(map[string]string))
|
||||
// Check error here
|
||||
if err != nil {
|
||||
diagnose.Fail(sealcontext, err.Error())
|
||||
goto SEALFAIL
|
||||
}
|
||||
if sealConfigError != nil {
|
||||
diagnose.Fail(sealcontext, "seal could not be configured: seals may already be initialized")
|
||||
goto SEALFAIL
|
||||
}
|
||||
|
||||
if seals != nil {
|
||||
for _, seal := range seals {
|
||||
// Ensure that the seal finalizer is called, even if using verify-only
|
||||
defer func(seal *vault.Seal) {
|
||||
sealType := (*seal).BarrierType()
|
||||
finalizeSealContext, finalizeSealSpan := diagnose.StartSpan(ctx, "finalize-seal-"+sealType)
|
||||
err = (*seal).Finalize(finalizeSealContext)
|
||||
if err != nil {
|
||||
diagnose.Fail(finalizeSealContext, "error finalizing seal")
|
||||
finalizeSealSpan.End()
|
||||
}
|
||||
finalizeSealSpan.End()
|
||||
}(&seal)
|
||||
}
|
||||
}
|
||||
|
||||
if barrierSeal == nil {
|
||||
diagnose.Fail(sealcontext, "could not create barrier seal! Most likely proper Seal configuration information was not set, but no error was generated")
|
||||
}
|
||||
|
||||
SEALFAIL:
|
||||
sealspan.End()
|
||||
var coreConfig vault.CoreConfig
|
||||
if err := diagnose.Test(ctx, "setup-core", func(ctx context.Context) error {
|
||||
var secureRandomReader io.Reader
|
||||
// prepare a secure random reader for core
|
||||
secureRandomReader, err = configutil.CreateSecureRandomReaderFunc(config.SharedConfig, barrierWrapper)
|
||||
if err != nil {
|
||||
return diagnose.SpotError(ctx, "init-randreader", err)
|
||||
}
|
||||
diagnose.SpotOk(ctx, "init-randreader", "")
|
||||
|
||||
if backend == nil {
|
||||
return fmt.Errorf(BackendUninitializedErr)
|
||||
}
|
||||
coreConfig = createCoreConfig(server, config, *backend, configSR, barrierSeal, unwrapSeal, metricsHelper, metricSink, secureRandomReader)
|
||||
return nil
|
||||
}); err != nil {
|
||||
diagnose.Error(ctx, err)
|
||||
}
|
||||
|
||||
var disableClustering bool
|
||||
diagnose.Test(ctx, "setup-ha-storage", func(ctx context.Context) error {
|
||||
if backend == nil {
|
||||
return fmt.Errorf(BackendUninitializedErr)
|
||||
}
|
||||
diagnose.Test(ctx, "create-ha-storage-backend", func(ctx context.Context) error {
|
||||
// Initialize the separate HA storage backend, if it exists
|
||||
disableClustering, err = initHaBackend(server, config, &coreConfig, *backend)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
diagnose.Test(ctx, "test-consul-direct-access-storage", func(ctx context.Context) error {
|
||||
dirAccess := diagnose.ConsulDirectAccess(config.HAStorage.Config)
|
||||
if dirAccess != "" {
|
||||
diagnose.Warn(ctx, dirAccess)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if config.HAStorage != nil && config.HAStorage.Type == storageTypeConsul {
|
||||
diagnose.Test(ctx, "test-storage-tls-consul", func(ctx context.Context) error {
|
||||
err = physconsul.SetupSecureTLS(api.DefaultConfig(), config.HAStorage.Config, server.logger, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// Determine the redirect address from environment variables
|
||||
err = determineRedirectAddr(server, &coreConfig, config)
|
||||
if err != nil {
|
||||
return diagnose.SpotError(ctx, "determine-redirect", err)
|
||||
}
|
||||
diagnose.SpotOk(ctx, "determine-redirect", "")
|
||||
|
||||
err = findClusterAddress(server, &coreConfig, config, disableClustering)
|
||||
if err != nil {
|
||||
return diagnose.SpotError(ctx, "find-cluster-addr", err)
|
||||
}
|
||||
diagnose.SpotOk(ctx, "find-cluster-addr", "")
|
||||
|
||||
var lns []listenerutil.Listener
|
||||
diagnose.Test(ctx, "init-listeners", func(ctx context.Context) error {
|
||||
disableClustering := config.HAStorage.DisableClustering
|
||||
infoKeys := make([]string, 0, 10)
|
||||
info := make(map[string]string)
|
||||
status, lns, _, errMsg := server.InitListeners(config, disableClustering, &infoKeys, &info)
|
||||
var listeners []listenerutil.Listener
|
||||
var status int
|
||||
diagnose.Test(ctx, "create-listeners", func(ctx context.Context) error {
|
||||
status, listeners, _, err = server.InitListeners(config, disableClustering, &infoKeys, &info)
|
||||
if status != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if status != 0 {
|
||||
return errMsg
|
||||
}
|
||||
lns = listeners
|
||||
|
||||
// Make sure we close all listeners from this point on
|
||||
listenerCloseFunc := func() {
|
||||
@@ -225,96 +450,39 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
|
||||
|
||||
defer c.cleanupGuard.Do(listenerCloseFunc)
|
||||
|
||||
sanitizedListeners := make([]listenerutil.Listener, 0, len(config.Listeners))
|
||||
for _, ln := range lns {
|
||||
if ln.Config.TLSDisable {
|
||||
diagnose.Warn(ctx, "TLS is disabled in a Listener config stanza.")
|
||||
continue
|
||||
}
|
||||
if ln.Config.TLSDisableClientCerts {
|
||||
diagnose.Warn(ctx, "TLS for a listener is turned on without requiring client certs.")
|
||||
}
|
||||
diagnose.Test(ctx, "check-listener-tls", func(ctx context.Context) error {
|
||||
sanitizedListeners := make([]listenerutil.Listener, 0, len(config.Listeners))
|
||||
for _, ln := range lns {
|
||||
if ln.Config.TLSDisable {
|
||||
diagnose.Warn(ctx, "TLS is disabled in a Listener config stanza.")
|
||||
continue
|
||||
}
|
||||
if ln.Config.TLSDisableClientCerts {
|
||||
diagnose.Warn(ctx, "TLS for a listener is turned on without requiring client certs.")
|
||||
}
|
||||
|
||||
// Check ciphersuite and load ca/cert/key files
|
||||
// TODO: TLSConfig returns a reloadFunc and a TLSConfig. We can use this to
|
||||
// perform an active probe.
|
||||
_, _, err := listenerutil.TLSConfig(ln.Config, make(map[string]string), c.UI)
|
||||
// Check ciphersuite and load ca/cert/key files
|
||||
// TODO: TLSConfig returns a reloadFunc and a TLSConfig. We can use this to
|
||||
// perform an active probe.
|
||||
_, _, err := listenerutil.TLSConfig(ln.Config, make(map[string]string), c.UI)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sanitizedListeners = append(sanitizedListeners, listenerutil.Listener{
|
||||
Listener: ln.Listener,
|
||||
Config: ln.Config,
|
||||
})
|
||||
}
|
||||
err = diagnose.ListenerChecks(sanitizedListeners)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sanitizedListeners = append(sanitizedListeners, listenerutil.Listener{
|
||||
Listener: ln.Listener,
|
||||
Config: ln.Config,
|
||||
})
|
||||
}
|
||||
return diagnose.ListenerChecks(sanitizedListeners)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Errors in these items could stop Vault from starting but are not yet covered:
|
||||
// TODO: logging configuration
|
||||
// TODO: SetupTelemetry
|
||||
if err := diagnose.Test(ctx, "storage", func(ctx context.Context) error {
|
||||
b, err := server.setupStorage(config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dirAccess := diagnose.ConsulDirectAccess(config.HAStorage.Config)
|
||||
if dirAccess != "" {
|
||||
diagnose.Warn(ctx, dirAccess)
|
||||
}
|
||||
|
||||
if config.Storage != nil && config.Storage.Type == storageTypeConsul {
|
||||
err = physconsul.SetupSecureTLS(api.DefaultConfig(), config.Storage.Config, server.logger, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dirAccess := diagnose.ConsulDirectAccess(config.Storage.Config)
|
||||
if dirAccess != "" {
|
||||
diagnose.Warn(ctx, dirAccess)
|
||||
}
|
||||
}
|
||||
|
||||
if config.HAStorage != nil && config.HAStorage.Type == storageTypeConsul {
|
||||
err = physconsul.SetupSecureTLS(api.DefaultConfig(), config.HAStorage.Config, server.logger, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to use storage backend
|
||||
if !c.skipEndEnd {
|
||||
err = diagnose.StorageEndToEndLatencyCheck(ctx, b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
diagnose.Test(ctx, "service-discovery", func(ctx context.Context) error {
|
||||
srConfig := config.ServiceRegistration.Config
|
||||
// Initialize the Service Discovery, if there is one
|
||||
if config.ServiceRegistration != nil && config.ServiceRegistration.Type == "consul" {
|
||||
// setupStorage populates the srConfig, so no nil checks are necessary.
|
||||
dirAccess := diagnose.ConsulDirectAccess(config.ServiceRegistration.Config)
|
||||
if dirAccess != "" {
|
||||
diagnose.Warn(ctx, dirAccess)
|
||||
}
|
||||
|
||||
// SetupSecureTLS for service discovery uses the same cert and key to set up physical
|
||||
// storage. See the consul package in physical for details.
|
||||
return srconsul.SetupSecureTLS(api.DefaultConfig(), srConfig, server.logger, true)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return nil
|
||||
})
|
||||
|
||||
// TODO: Diagnose logging configuration
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user