Diagnose Language Pass (#11909)

* save

* save

* save

* first round of the diagnose language pass

* capitalization

* first round of feedback

* fix bug in advise

* a few more nouns to verbs
This commit is contained in:
Hridoy Roy
2021-07-11 15:44:19 -07:00
committed by GitHub
parent f796bc9f4d
commit 46afacb013
16 changed files with 332 additions and 264 deletions

View File

@@ -37,9 +37,7 @@ import (
const OperatorDiagnoseEnableEnv = "VAULT_DIAGNOSE"
const CoreUninitializedErr = "diagnose cannot attempt this step because core could not be initialized"
const BackendUninitializedErr = "diagnose cannot attempt this step because backend could not be initialized"
const CoreConfigUninitializedErr = "diagnose cannot attempt this step because core config could not be set"
const CoreConfigUninitializedErr = "Diagnose cannot attempt this step because core config could not be set."
var (
_ cli.Command = (*OperatorDiagnoseCommand)(nil)
@@ -175,7 +173,7 @@ func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int {
if c.flagFormat == "json" {
resultsJS, err := json.MarshalIndent(results, "", " ")
if err != nil {
fmt.Fprintf(os.Stderr, "error marshalling results: %v", err)
fmt.Fprintf(os.Stderr, "Error marshalling results: %v.", err)
return 4
}
c.UI.Output(string(resultsJS))
@@ -224,7 +222,7 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
reloadFuncsLock: new(sync.RWMutex),
}
ctx, span := diagnose.StartSpan(ctx, "initialization")
ctx, span := diagnose.StartSpan(ctx, "Vault Diagnose")
defer span.End()
// OS Specific checks
@@ -232,16 +230,17 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
var config *cserver.Config
diagnose.Test(ctx, "Parse configuration", func(ctx context.Context) (err error) {
diagnose.Test(ctx, "Parse Configuration", func(ctx context.Context) (err error) {
server.flagConfigs = c.flagConfigs
var configErrors []configutil.ConfigError
config, configErrors, err = server.parseConfig()
if err != nil {
return err
return fmt.Errorf("Could not parse configuration: %w.", err)
}
for _, ce := range configErrors {
diagnose.Warn(ctx, ce.String())
diagnose.Warn(ctx, diagnose.CapitalizeFirstLetter(ce.String())+".")
}
diagnose.Success(ctx, "Vault configuration syntax is ok.")
return nil
})
if config == nil {
@@ -252,20 +251,22 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
var metricsHelper *metricsutil.MetricsHelper
var backend *physical.Backend
diagnose.Test(ctx, "storage", func(ctx context.Context) error {
diagnose.Test(ctx, "Check Storage", func(ctx context.Context) error {
// Ensure that there is a storage stanza
if config.Storage == nil {
return fmt.Errorf("no storage stanza found in config")
diagnose.Advise(ctx, "To learn how to specify a storage backend, see the Vault server configuration documentation.")
return fmt.Errorf("No storage stanza in Vault server configuration.")
}
diagnose.Test(ctx, "create-storage-backend", func(ctx context.Context) error {
diagnose.Test(ctx, "Create Storage Backend", func(ctx context.Context) error {
b, err := server.setupStorage(config)
if err != nil {
return err
}
if b == nil {
return fmt.Errorf("storage backend was initialized to nil")
diagnose.Advise(ctx, "To learn how to specify a storage backend, see the Vault server configuration documentation.")
return fmt.Errorf("Storage backend could not be initialized.")
}
backend = &b
return nil
@@ -283,20 +284,16 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
if path == "" {
path, ok := config.Storage.Config["path"]
if !ok {
diagnose.SpotError(ctx, "raft folder permission checks", fmt.Errorf("storage file path is required"))
diagnose.SpotError(ctx, "Check Raft Folder Permissions", fmt.Errorf("Storage folder path is required."))
}
diagnose.RaftFileChecks(ctx, path)
}
if backend != nil {
diagnose.RaftStorageQuorum(ctx, (*backend).(*raft.RaftBackend))
} else {
diagnose.SpotError(ctx, "raft quorum", fmt.Errorf("could not determine quorum status without initialized backend"))
}
diagnose.RaftStorageQuorum(ctx, (*backend).(*raft.RaftBackend))
}
// Consul storage checks
if config.Storage != nil && config.Storage.Type == storageTypeConsul {
diagnose.Test(ctx, "test-storage-tls-consul", func(ctx context.Context) error {
diagnose.Test(ctx, "Check Consul TLS", func(ctx context.Context) error {
err := physconsul.SetupSecureTLS(ctx, api.DefaultConfig(), config.Storage.Config, server.logger, true)
if err != nil {
return err
@@ -304,18 +301,21 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
return nil
})
diagnose.Test(ctx, "test-consul-direct-access-storage", func(ctx context.Context) error {
diagnose.Test(ctx, "Check Consul Direct Storage Access", func(ctx context.Context) error {
dirAccess := diagnose.ConsulDirectAccess(config.Storage.Config)
if dirAccess != "" {
diagnose.Warn(ctx, dirAccess)
}
if dirAccess == diagnose.DirAccessErr {
diagnose.Advise(ctx, diagnose.DirAccessAdvice)
}
return nil
})
}
// Attempt to use storage backend
if !c.skipEndEnd && config.Storage.Type != storageTypeRaft {
diagnose.Test(ctx, "test-access-storage", diagnose.WithTimeout(30*time.Second, func(ctx context.Context) error {
diagnose.Test(ctx, "Check Storage Access", diagnose.WithTimeout(30*time.Second, func(ctx context.Context) error {
maxDurationCrudOperation := "write"
maxDuration := time.Duration(0)
uuidSuffix, err := uuid.GenerateUUID()
@@ -346,7 +346,7 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
}
if maxDuration > time.Duration(0) {
diagnose.Warn(ctx, diagnose.LatencyWarning+fmt.Sprintf("duration: %s, ", maxDuration)+fmt.Sprintf("operation: %s", maxDurationCrudOperation))
diagnose.Warn(ctx, diagnose.LatencyWarning+fmt.Sprintf("duration: %s, operation: %s", maxDuration, maxDurationCrudOperation))
}
return nil
}))
@@ -360,14 +360,14 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
}
var configSR sr.ServiceRegistration
diagnose.Test(ctx, "service-discovery", func(ctx context.Context) error {
diagnose.Test(ctx, "Check Service Discovery", func(ctx context.Context) error {
if config.ServiceRegistration == nil || config.ServiceRegistration.Config == nil {
diagnose.Skipped(ctx, "no service registration configured")
diagnose.Skipped(ctx, "No service registration configured.")
return nil
}
srConfig := config.ServiceRegistration.Config
diagnose.Test(ctx, "test-serviceregistration-tls-consul", func(ctx context.Context) error {
diagnose.Test(ctx, "Check Consul Service Discovery TLS", func(ctx context.Context) error {
// SetupSecureTLS for service discovery uses the same cert and key to set up physical
// storage. See the consul package in physical for details.
err := srconsul.SetupSecureTLS(ctx, api.DefaultConfig(), srConfig, server.logger, true)
@@ -378,64 +378,21 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
})
if config.ServiceRegistration != nil && config.ServiceRegistration.Type == "consul" {
diagnose.Test(ctx, "test-consul-direct-access-service-discovery", func(ctx context.Context) error {
diagnose.Test(ctx, "Check Consul Direct Service Discovery", func(ctx context.Context) error {
dirAccess := diagnose.ConsulDirectAccess(config.ServiceRegistration.Config)
if dirAccess != "" {
diagnose.Warn(ctx, dirAccess)
}
if dirAccess == diagnose.DirAccessErr {
diagnose.Advise(ctx, diagnose.DirAccessAdvice)
}
return nil
})
}
return nil
})
diagnose.Test(ctx, "seal-transit-tls-checks", func(ctx context.Context) error {
var checkSealTransit bool
for _, seal := range config.Seals {
if seal.Type == "transit" {
checkSealTransit = true
tlsSkipVerify, _ := seal.Config["tls_skip_verify"]
if tlsSkipVerify == "true" {
diagnose.Warn(ctx, "TLS verification is Skipped! Using this option is highly discouraged and decreases the security of data transmissions to and from the Vault server.")
return nil
}
// Checking tls_client_cert and tls_client_key
tlsClientCert, ok := seal.Config["tls_client_cert"]
if !ok {
diagnose.Warn(ctx, "Missing tls_client_cert in the config")
return nil
}
tlsClientKey, ok := seal.Config["tls_client_key"]
if !ok {
diagnose.Warn(ctx, "Missing tls_client_key in the config")
return nil
}
_, err := diagnose.TLSFileChecks(tlsClientCert, tlsClientKey)
if err != nil {
return fmt.Errorf("TLS file check failed for tls_client_cert and tls_client_key with the following error: %w", err)
}
// checking tls_ca_cert
tlsCACert, ok := seal.Config["tls_ca_cert"]
if !ok {
diagnose.Warn(ctx, "Mising tls_ca_cert in the config")
return nil
}
_, err = diagnose.TLSCAFileCheck(tlsCACert)
if err != nil {
return fmt.Errorf("TLS file check failed for tls_ca_cert with the following error: %w", err)
}
}
}
if !checkSealTransit {
diagnose.Skipped(ctx, "No transit seal found!")
}
return nil
})
sealcontext, sealspan := diagnose.StartSpan(ctx, "create-seal")
sealcontext, sealspan := diagnose.StartSpan(ctx, "Create Vault Server Configuration Seals")
var seals []vault.Seal
var sealConfigError error
@@ -443,11 +400,12 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
// Check error here
if err != nil {
diagnose.Fail(sealcontext, err.Error())
diagnose.Advise(ctx, "For assistance with the seal stanza, see the Vault configuration documentation.")
diagnose.Fail(sealcontext, fmt.Sprintf("Seal creation resulted in the following error: %s.", err.Error()))
goto SEALFAIL
}
if sealConfigError != nil {
diagnose.Fail(sealcontext, "seal could not be configured: seals may already be initialized")
diagnose.Fail(sealcontext, "Seal could not be configured: seals may already be initialized.")
goto SEALFAIL
}
@@ -455,11 +413,12 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
for _, seal := range seals {
// Ensure that the seal finalizer is called, even if using verify-only
defer func(seal *vault.Seal) {
sealType := (*seal).BarrierType()
finalizeSealContext, finalizeSealSpan := diagnose.StartSpan(ctx, "finalize-seal-"+sealType)
sealType := diagnose.CapitalizeFirstLetter((*seal).BarrierType())
finalizeSealContext, finalizeSealSpan := diagnose.StartSpan(ctx, "Finalize "+sealType+" Seal")
err = (*seal).Finalize(finalizeSealContext)
if err != nil {
diagnose.Fail(finalizeSealContext, "error finalizing seal")
diagnose.Fail(finalizeSealContext, "Error finalizing seal.")
diagnose.Advise(finalizeSealContext, "This likely means that the barrier is still in use; therefore, finalizing the seal timed out.")
finalizeSealSpan.End()
}
finalizeSealSpan.End()
@@ -468,27 +427,80 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
}
if barrierSeal == nil {
diagnose.Fail(sealcontext, "could not create barrier seal! Most likely proper Seal configuration information was not set, but no error was generated")
diagnose.Fail(sealcontext, "Could not create barrier seal. No error was generated, but it is likely that the seal stanza is misconfigured. For guidance, see Vault's configuration documentation on the seal stanza.")
}
SEALFAIL:
sealspan.End()
diagnose.Test(ctx, "Check Transit Seal TLS", func(ctx context.Context) error {
var checkSealTransit bool
for _, seal := range config.Seals {
if seal.Type == "transit" {
checkSealTransit = true
tlsSkipVerify, _ := seal.Config["tls_skip_verify"]
if tlsSkipVerify == "true" {
diagnose.Warn(ctx, "TLS verification is skipped. This is highly discouraged and decreases the security of data transmissions to and from the Vault server.")
return nil
}
// Checking tls_client_cert and tls_client_key
tlsClientCert, ok := seal.Config["tls_client_cert"]
if !ok {
diagnose.Warn(ctx, "Missing tls_client_cert in the seal configuration.")
return nil
}
tlsClientKey, ok := seal.Config["tls_client_key"]
if !ok {
diagnose.Warn(ctx, "Missing tls_client_key in the seal configuration.")
return nil
}
_, err := diagnose.TLSFileChecks(tlsClientCert, tlsClientKey)
if err != nil {
return fmt.Errorf("The TLS certificate and key configured through the tls_client_cert and tls_client_key fields of the transit seal configuration are invalid: %w.", err)
}
// checking tls_ca_cert
tlsCACert, ok := seal.Config["tls_ca_cert"]
if !ok {
diagnose.Warn(ctx, "Missing tls_ca_cert in the seal configuration.")
return nil
}
warnings, err := diagnose.TLSCAFileCheck(tlsCACert)
if len(warnings) != 0 {
for _, warning := range warnings {
diagnose.Warn(ctx, warning)
}
}
if err != nil {
return fmt.Errorf("The TLS CA certificate configured through the tls_ca_cert field of the transit seal configuration is invalid: %w.", err)
}
}
}
if !checkSealTransit {
diagnose.Skipped(ctx, "No transit seal found in seal configuration.")
}
return nil
})
var coreConfig vault.CoreConfig
diagnose.Test(ctx, "setup-core", func(ctx context.Context) error {
diagnose.Test(ctx, "Create Core Configuration", func(ctx context.Context) error {
var secureRandomReader io.Reader
// prepare a secure random reader for core
randReaderTestName := "Initialize Randomness for Core"
secureRandomReader, err = configutil.CreateSecureRandomReaderFunc(config.SharedConfig, barrierWrapper)
if err != nil {
return diagnose.SpotError(ctx, "init-randreader", err)
return diagnose.SpotError(ctx, randReaderTestName, fmt.Errorf("Could not initialize randomness for core: %w.", err))
}
diagnose.SpotOk(ctx, "init-randreader", "")
diagnose.SpotOk(ctx, randReaderTestName, "")
coreConfig = createCoreConfig(server, config, *backend, configSR, barrierSeal, unwrapSeal, metricsHelper, metricSink, secureRandomReader)
return nil
})
var disableClustering bool
diagnose.Test(ctx, "setup-ha-storage", func(ctx context.Context) error {
diagnose.Test(ctx, "create-ha-storage-backend", func(ctx context.Context) error {
diagnose.Test(ctx, "HA Storage", func(ctx context.Context) error {
diagnose.Test(ctx, "Create HA Storage Backend", func(ctx context.Context) error {
// Initialize the separate HA storage backend, if it exists
disableClustering, err = initHaBackend(server, config, &coreConfig, *backend)
if err != nil {
@@ -497,19 +509,22 @@ SEALFAIL:
return nil
})
diagnose.Test(ctx, "test-consul-direct-access-storage", func(ctx context.Context) error {
diagnose.Test(ctx, "Check HA Consul Direct Storage Access", func(ctx context.Context) error {
if config.HAStorage == nil {
diagnose.Skipped(ctx, "no HA storage configured")
diagnose.Skipped(ctx, "No HA storage stanza is configured.")
} else {
dirAccess := diagnose.ConsulDirectAccess(config.HAStorage.Config)
if dirAccess != "" {
diagnose.Warn(ctx, dirAccess)
}
if dirAccess == diagnose.DirAccessErr {
diagnose.Advise(ctx, diagnose.DirAccessAdvice)
}
}
return nil
})
if config.HAStorage != nil && config.HAStorage.Type == storageTypeConsul {
diagnose.Test(ctx, "test-ha-storage-tls-consul", func(ctx context.Context) error {
diagnose.Test(ctx, "Check Consul TLS", func(ctx context.Context) error {
err = physconsul.SetupSecureTLS(ctx, api.DefaultConfig(), config.HAStorage.Config, server.logger, true)
if err != nil {
return err
@@ -523,22 +538,23 @@ SEALFAIL:
// Determine the redirect address from environment variables
err = determineRedirectAddr(server, &coreConfig, config)
if err != nil {
return diagnose.SpotError(ctx, "determine-redirect", err)
return diagnose.SpotError(ctx, "Determine Redirect Address", fmt.Errorf("Redirect Address could not be determined: %w.", err))
}
diagnose.SpotOk(ctx, "determine-redirect", "")
diagnose.SpotOk(ctx, "Determine Redirect Address", "")
err = findClusterAddress(server, &coreConfig, config, disableClustering)
if err != nil {
return diagnose.SpotError(ctx, "find-cluster-addr", err)
diagnose.Advise(ctx, "Please check that the API and Cluster addresses are different, and that the API, Cluster and Redirect addresses have both a host and port.")
return diagnose.SpotError(ctx, "Check Cluster Address", fmt.Errorf("Cluster Address could not be determined or was invalid: %w.", err))
}
diagnose.SpotOk(ctx, "find-cluster-addr", "")
diagnose.SpotOk(ctx, "Check Cluster Address", "Cluster address is logically valid and can be found.")
var vaultCore *vault.Core
// Run all the checks that are utilized when initializing a core object
// without actually calling core.Init. These are in the init-core section
// as they are runtime checks.
diagnose.Test(ctx, "init-core", func(ctx context.Context) error {
diagnose.Test(ctx, "Check Core Creation", func(ctx context.Context) error {
var newCoreError error
if coreConfig.RawConfig == nil {
return fmt.Errorf(CoreConfigUninitializedErr)
@@ -546,11 +562,10 @@ SEALFAIL:
core, newCoreError := vault.CreateCore(&coreConfig)
if newCoreError != nil {
if vault.IsFatalError(newCoreError) {
return fmt.Errorf("Error initializing core: %s", newCoreError)
return fmt.Errorf("Error initializing core: %s.", newCoreError)
}
diagnose.Warn(ctx, wrapAtLength(
"WARNING! A non-fatal error occurred during initialization. Please "+
"check the logs for more information."))
"A non-fatal error occurred during initialization. Please check the logs for more information."))
} else {
vaultCore = core
}
@@ -558,10 +573,10 @@ SEALFAIL:
})
if vaultCore == nil {
return fmt.Errorf("Diagnose could not initialize the vault core from the vault server configuration.")
return fmt.Errorf("Diagnose could not initialize the Vault core from the Vault server configuration.")
}
licenseCtx, licenseSpan := diagnose.StartSpan(ctx, "autoloaded license")
licenseCtx, licenseSpan := diagnose.StartSpan(ctx, "Check For Autoloaded License")
// If we are not in enterprise, return from the check
if !constants.IsEnterprise {
diagnose.Skipped(licenseCtx, "License check will not run on OSS Vault.")
@@ -579,7 +594,7 @@ SEALFAIL:
licenseSpan.End()
var lns []listenerutil.Listener
diagnose.Test(ctx, "init-listeners", func(ctx context.Context) error {
diagnose.Test(ctx, "Start Listeners", func(ctx context.Context) error {
disableClustering := config.HAStorage != nil && config.HAStorage.DisableClustering
infoKeys := make([]string, 0, 10)
info := make(map[string]string)
@@ -588,7 +603,7 @@ SEALFAIL:
diagnose.ListenerChecks(ctx, config.Listeners)
diagnose.Test(ctx, "create-listeners", func(ctx context.Context) error {
diagnose.Test(ctx, "Create Listeners", func(ctx context.Context) error {
status, listeners, _, err = server.InitListeners(config, disableClustering, &infoKeys, &info)
if status != 0 {
return err
@@ -614,18 +629,18 @@ SEALFAIL:
// The unseal diagnose check will simply attempt to use the barrier to encrypt and
// decrypt a mock value. It will not call runUnseal.
diagnose.Test(ctx, "unseal", diagnose.WithTimeout(30*time.Second, func(ctx context.Context) error {
diagnose.Test(ctx, "Check Barrier Encryption", diagnose.WithTimeout(30*time.Second, func(ctx context.Context) error {
if barrierWrapper == nil {
return fmt.Errorf("Diagnose could not create a barrier seal object")
return fmt.Errorf("Diagnose could not create a barrier seal object.")
}
barrierUUID, err := uuid.GenerateUUID()
if err != nil {
return fmt.Errorf("Diagnose could not create unique UUID for unsealing")
return fmt.Errorf("Diagnose could not create unique UUID for unsealing.")
}
barrierEncValue := "diagnose-" + barrierUUID
ciphertext, err := barrierWrapper.Encrypt(ctx, []byte(barrierEncValue), nil)
if err != nil {
return fmt.Errorf("Error encrypting with seal barrier: %w", err)
return fmt.Errorf("Error encrypting with seal barrier: %w.", err)
}
plaintext, err := barrierWrapper.Decrypt(ctx, ciphertext, nil)
if err != nil {
@@ -633,18 +648,20 @@ SEALFAIL:
}
if string(plaintext) != barrierEncValue {
return fmt.Errorf("barrier returned incorrect decrypted value for mock data")
return fmt.Errorf("Barrier returned incorrect decrypted value for mock data.")
}
return nil
}))
// The following block contains static checks that are run during the
// startHttpServers portion of server run. In other words, they are static
// checks during resource creation.
diagnose.Test(ctx, "start-servers", func(ctx context.Context) error {
// checks during resource creation. Currently there is nothing important in this
// diagnose check. For now it is a placeholder for any checks that will be done
// before server run.
diagnose.Test(ctx, "Check Server Before Runtime", func(ctx context.Context) error {
for _, ln := range lns {
if ln.Config == nil {
return fmt.Errorf("Found nil listener config after parsing")
return fmt.Errorf("Found no listener config after parsing the Vault configuration.")
}
}
return nil