Diagnose Disk usage checks (#11673)

* Disk usage checks

* Move disk free earlier

* Move logic to helpers

* Bring over test logic from the ulimit PR

* imports

* Report error

* Get unit tests working
This commit is contained in:
Scott Miller
2021-05-27 10:17:52 -07:00
committed by GitHub
parent 55ad81203e
commit 1cf7637400
4 changed files with 81 additions and 418 deletions

View File

@@ -209,6 +209,9 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error
ctx, span := diagnose.StartSpan(ctx, "initialization")
defer span.End()
diagnose.Test(ctx, "disk-usage", diagnose.DiskUsageCheck)
server.flagConfigs = c.flagConfigs
config, err := server.parseConfig()
if err != nil {
@@ -400,7 +403,7 @@ SEALFAIL:
return nil
})
if config.HAStorage != nil && config.HAStorage.Type == storageTypeConsul {
diagnose.Test(ctx, "test-storage-tls-consul", func(ctx context.Context) error {
diagnose.Test(ctx, "test-ha-storage-tls-consul", func(ctx context.Context) error {
err = physconsul.SetupSecureTLS(api.DefaultConfig(), config.HAStorage.Config, server.logger, true)
if err != nil {
return err

View File

@@ -39,10 +39,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
"-config", "./server/test-fixtures/config_diagnose_ok.hcl",
},
[]*diagnose.Result{
{
Name: "parse-config",
Status: diagnose.OkStatus,
},
{
Name: "storage",
Status: diagnose.OkStatus,
@@ -61,81 +57,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
},
},
},
{
Name: "service-discovery",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "test-serviceregistration-tls-consul",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-service-discovery",
Status: diagnose.OkStatus,
},
},
},
{
Name: "create-seal",
Status: diagnose.OkStatus,
},
{
Name: "setup-core",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "init-randreader",
Status: diagnose.OkStatus,
},
},
},
{
Name: "setup-ha-storage",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "create-ha-storage-backend",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-storage",
Status: diagnose.OkStatus,
},
{
Name: "test-storage-tls-consul",
Status: diagnose.OkStatus,
},
},
},
{
Name: "determine-redirect",
Status: diagnose.OkStatus,
},
{
Name: "find-cluster-addr",
Status: diagnose.OkStatus,
},
{
Name: "init-listeners",
Status: diagnose.WarningStatus,
Children: []*diagnose.Result{
{
Name: "create-listeners",
Status: diagnose.OkStatus,
},
{
Name: "check-listener-tls",
Status: diagnose.WarningStatus,
Warnings: []string{
"TLS is disabled in a Listener config stanza.",
},
},
},
},
{
Name: "finalize-seal-shamir",
Status: diagnose.OkStatus,
},
},
},
{
@@ -144,10 +65,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
"-config", "./server/test-fixtures/nostore_config.hcl",
},
[]*diagnose.Result{
{
Name: "parse-config",
Status: diagnose.OkStatus,
},
{
Name: "storage",
Status: diagnose.ErrorStatus,
@@ -159,59 +76,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
},
},
},
{
Name: "service-discovery",
Status: diagnose.ErrorStatus,
},
{
Name: "create-seal",
Status: diagnose.OkStatus,
},
{
Name: "setup-core",
Status: diagnose.ErrorStatus,
Message: BackendUninitializedErr,
Children: []*diagnose.Result{
{
Name: "init-randreader",
Status: diagnose.OkStatus,
},
},
},
{
Name: "setup-ha-storage",
Status: diagnose.ErrorStatus,
Message: BackendUninitializedErr,
},
{
Name: "determine-redirect",
Status: diagnose.OkStatus,
},
{
Name: "find-cluster-addr",
Status: diagnose.OkStatus,
},
{
Name: "init-listeners",
Status: diagnose.WarningStatus,
Children: []*diagnose.Result{
{
Name: "create-listeners",
Status: diagnose.OkStatus,
},
{
Name: "check-listener-tls",
Status: diagnose.WarningStatus,
Warnings: []string{
"TLS is disabled in a Listener config stanza.",
},
},
},
},
{
Name: "finalize-seal-shamir",
Status: diagnose.OkStatus,
},
},
},
{
@@ -220,82 +84,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
"-config", "./server/test-fixtures/tls_config_ok.hcl",
},
[]*diagnose.Result{
{
Name: "parse-config",
Status: diagnose.OkStatus,
},
{
Name: "storage",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "create-storage-backend",
Status: diagnose.OkStatus,
},
{
Name: "test-storage-tls-consul",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-storage",
Status: diagnose.OkStatus,
},
},
},
{
Name: "service-discovery",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "test-serviceregistration-tls-consul",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-service-discovery",
Status: diagnose.OkStatus,
},
},
},
{
Name: "create-seal",
Status: diagnose.OkStatus,
},
{
Name: "setup-core",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "init-randreader",
Status: diagnose.OkStatus,
},
},
},
{
Name: "setup-ha-storage",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "create-ha-storage-backend",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-storage",
Status: diagnose.OkStatus,
},
{
Name: "test-storage-tls-consul",
Status: diagnose.OkStatus,
},
},
},
{
Name: "determine-redirect",
Status: diagnose.OkStatus,
},
{
Name: "find-cluster-addr",
Status: diagnose.OkStatus,
},
{
Name: "init-listeners",
Status: diagnose.OkStatus,
@@ -306,17 +94,10 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
},
{
Name: "check-listener-tls",
Status: diagnose.WarningStatus,
Warnings: []string{
"TLS is disabled in a Listener config stanza.",
},
Status: diagnose.OkStatus,
},
},
},
{
Name: "finalize-seal-shamir",
Status: diagnose.OkStatus,
},
},
},
{
@@ -325,10 +106,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
"-config", "./server/test-fixtures/config_bad_https_storage.hcl",
},
[]*diagnose.Result{
{
Name: "parse-config",
Status: diagnose.OkStatus,
},
{
Name: "storage",
Status: diagnose.ErrorStatus,
@@ -348,52 +125,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
},
},
},
{
Name: "service-discovery",
Status: diagnose.WarningStatus,
Children: []*diagnose.Result{
{
Name: "test-serviceregistration-tls-consul",
Status: diagnose.WarningStatus,
},
{
Name: "test-consul-direct-access-service-discovery",
Status: diagnose.OkStatus,
},
},
},
{
Name: "create-seal",
Status: diagnose.OkStatus,
},
{
Name: "setup-core",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "init-randreader",
Status: diagnose.OkStatus,
},
},
},
{
Name: "setup-ha-storage",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "create-ha-storage-backend",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-storage",
Status: diagnose.OkStatus,
},
{
Name: "test-storage-tls-consul",
Status: diagnose.OkStatus,
},
},
},
},
},
{
@@ -402,10 +133,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
"-config", "./server/test-fixtures/config_diagnose_hastorage_bad_https.hcl",
},
[]*diagnose.Result{
{
Name: "parse-config",
Status: diagnose.OkStatus,
},
{
Name: "storage",
Status: diagnose.WarningStatus,
@@ -416,39 +143,14 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
},
{
Name: "test-storage-tls-consul",
Status: diagnose.WarningStatus,
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-storage",
Status: diagnose.OkStatus,
},
},
},
{
Name: "service-discovery",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "test-serviceregistration-tls-consul",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-service-discovery",
Status: diagnose.OkStatus,
},
},
},
{
Name: "create-seal",
Status: diagnose.OkStatus,
},
{
Name: "setup-core",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "init-randreader",
Status: diagnose.OkStatus,
Status: diagnose.WarningStatus,
Warnings: []string{
"consul storage does not connect to local agent, but directly to server",
},
},
},
},
@@ -462,15 +164,22 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
},
{
Name: "test-consul-direct-access-storage",
Status: diagnose.OkStatus,
Status: diagnose.WarningStatus,
Warnings: []string{
"consul storage does not connect to local agent, but directly to server",
},
},
{
Name: "test-storage-tls-consul",
Name: "test-ha-storage-tls-consul",
Status: diagnose.ErrorStatus,
Message: "x509: certificate has expired or is not yet valid",
},
},
},
{
Name: "find-cluster-addr",
Status: diagnose.ErrorStatus,
},
},
},
{
@@ -479,28 +188,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
"-config", "./server/test-fixtures/diagnose_bad_https_consul_sr.hcl",
},
[]*diagnose.Result{
{
Name: "parse-config",
Status: diagnose.OkStatus,
},
{
Name: "storage",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "create-storage-backend",
Status: diagnose.OkStatus,
},
{
Name: "test-storage-tls-consul",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-storage",
Status: diagnose.OkStatus,
},
},
},
{
Name: "service-discovery",
Status: diagnose.ErrorStatus,
@@ -527,10 +214,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
"-config", "./server/test-fixtures/diagnose_ok_storage_direct_access.hcl",
},
[]*diagnose.Result{
{
Name: "parse-config",
Status: diagnose.OkStatus,
},
{
Name: "storage",
Status: diagnose.WarningStatus,
@@ -552,81 +235,6 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
},
},
},
{
Name: "service-discovery",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "test-serviceregistration-tls-consul",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-service-discovery",
Status: diagnose.OkStatus,
},
},
},
{
Name: "create-seal",
Status: diagnose.OkStatus,
},
{
Name: "setup-core",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "init-randreader",
Status: diagnose.OkStatus,
},
},
},
{
Name: "setup-ha-storage",
Status: diagnose.OkStatus,
Children: []*diagnose.Result{
{
Name: "create-ha-storage-backend",
Status: diagnose.OkStatus,
},
{
Name: "test-consul-direct-access-storage",
Status: diagnose.OkStatus,
},
{
Name: "test-storage-tls-consul",
Status: diagnose.OkStatus,
},
},
},
{
Name: "determine-redirect",
Status: diagnose.OkStatus,
},
{
Name: "find-cluster-addr",
Status: diagnose.OkStatus,
},
{
Name: "init-listeners",
Status: diagnose.WarningStatus,
Children: []*diagnose.Result{
{
Name: "create-listeners",
Status: diagnose.OkStatus,
},
{
Name: "check-listener-tls",
Status: diagnose.WarningStatus,
Warnings: []string{
"TLS is disabled in a Listener config stanza.",
},
},
},
},
{
Name: "finalize-seal-shamir",
Status: diagnose.OkStatus,
},
},
},
}
@@ -647,21 +255,36 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) {
cmd.Run(tc.args)
result := cmd.diagnose.Finalize(context.Background())
for i, exp := range tc.expected {
if i >= len(result.Children) {
t.Fatalf("there are at least %d test cases, but fewer actual results", i)
}
act := result.Children[i]
if err := compareResult(t, exp, act); err != nil {
t.Fatalf("%v, %v, %v", err, act, exp)
}
if err := compareResults(tc.expected, result.Children); err != nil {
t.Fatalf("Did not find expected test results: %v", err)
t.Fatal(result.String())
}
})
}
})
}
func compareResult(t *testing.T, exp *diagnose.Result, act *diagnose.Result) error {
func compareResults(expected []*diagnose.Result, actual []*diagnose.Result) error {
for _, exp := range expected {
found := false
// Check them all so we don't have to be order specific
for _, act := range actual {
if exp.Name == act.Name {
found = true
if err := compareResult(exp, act); err != nil {
return err
}
break
}
}
if !found {
return fmt.Errorf("could not find expected test result: %s", exp.Name)
}
}
return nil
}
func compareResult(exp *diagnose.Result, act *diagnose.Result) error {
if exp.Name != act.Name {
return fmt.Errorf("names mismatch: %s vs %s", exp.Name, act.Name)
}
@@ -690,5 +313,9 @@ func compareResult(t *testing.T, exp *diagnose.Result, act *diagnose.Result) err
}
return fmt.Errorf(strings.Join(errStrings, ","))
}
if len(exp.Children) > 0 {
return compareResults(exp.Children, act.Children)
}
return nil
}

View File

@@ -12,7 +12,6 @@ backend "consul" {
foo = "bar"
advertise_addr = "foo"
address = "http://remoteconsulserverIP:1028"
}
ha_backend "consul" {

View File

@@ -4,8 +4,10 @@ import (
"context"
"fmt"
"io"
"strings"
"time"
"github.com/shirou/gopsutil/disk"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
@@ -227,3 +229,35 @@ func Skippable(skipName string, f testFunction) testFunction {
return nil
}
}
func DiskUsageCheck(ctx context.Context) error {
partitions, err := disk.Partitions(false)
if err != nil {
return err
}
partitionExcludes := []string{"/boot"}
partLoop:
for _, partition := range partitions {
for _, exc := range partitionExcludes {
if strings.HasPrefix(partition.Mountpoint, exc) {
continue partLoop
}
}
usage, err := disk.Usage(partition.Mountpoint)
testName := "disk-usage: " + partition.Mountpoint
if err != nil {
Warn(ctx, fmt.Sprintf("could not obtain partition usage for %s: %v", partition.Mountpoint, err))
} else {
if usage.UsedPercent > 95 {
SpotWarn(ctx, testName, "more than 95% full")
} else if usage.Free < 2<<30 {
SpotWarn(ctx, testName, "less than 1GB free")
} else {
SpotOk(ctx, testName, "ok")
}
}
}
return nil
}