diff --git a/command/operator_diagnose.go b/command/operator_diagnose.go index 4f9619b034..99f8c4d516 100644 --- a/command/operator_diagnose.go +++ b/command/operator_diagnose.go @@ -211,9 +211,7 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error defer span.End() // OS Specific checks - // Check open file count diagnose.OSChecks(ctx) - diagnose.Test(ctx, "disk-usage", diagnose.DiskUsageCheck) server.flagConfigs = c.flagConfigs config, err := server.parseConfig() diff --git a/command/operator_diagnose_test.go b/command/operator_diagnose_test.go index 9233bfd287..10da07ea28 100644 --- a/command/operator_diagnose_test.go +++ b/command/operator_diagnose_test.go @@ -40,9 +40,20 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) { }, []*diagnose.Result{ { - Name: "open file limits", + Name: "operating system", Status: diagnose.OkStatus, + Children: []*diagnose.Result{ + { + Name: "open file limits", + Status: diagnose.OkStatus, + }, + { + Name: "disk usage", + Status: diagnose.OkStatus, + }, + }, }, + { Name: "parse-config", Status: diagnose.OkStatus, @@ -331,7 +342,7 @@ func compareResult(exp *diagnose.Result, act *diagnose.Result) error { return fmt.Errorf("section %s, warning message not found: %s in %s", exp.Name, exp.Warnings[j], act.Warnings[j]) } } - if len(exp.Children) != len(act.Children) { + if len(exp.Children) > len(act.Children) { errStrings := []string{} for _, c := range act.Children { errStrings = append(errStrings, fmt.Sprintf("%+v", c)) diff --git a/vault/diagnose/helpers.go b/vault/diagnose/helpers.go index 68237e6dfd..ee62398aef 100644 --- a/vault/diagnose/helpers.go +++ b/vault/diagnose/helpers.go @@ -4,10 +4,8 @@ import ( "context" "fmt" "io" - "strings" "time" - "github.com/shirou/gopsutil/disk" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" sdktrace "go.opentelemetry.io/otel/sdk/trace" @@ -15,15 +13,16 @@ import ( ) const ( - warningEventName = "warning" - skippedEventName = "skipped" - actionKey = "actionKey" - spotCheckOkEventName = "spot-check-ok" - spotCheckWarnEventName = "spot-check-warn" - spotCheckErrorEventName = "spot-check-error" - errorMessageKey = attribute.Key("error.message") - nameKey = attribute.Key("name") - messageKey = attribute.Key("message") + warningEventName = "warning" + skippedEventName = "skipped" + actionKey = "actionKey" + spotCheckOkEventName = "spot-check-ok" + spotCheckWarnEventName = "spot-check-warn" + spotCheckErrorEventName = "spot-check-error" + spotCheckSkippedEventName = "spot-check-skipped" + errorMessageKey = attribute.Key("error.message") + nameKey = attribute.Key("name") + messageKey = attribute.Key("message") ) var ( @@ -162,6 +161,11 @@ func SpotError(ctx context.Context, checkName string, err error, options ...trac return err } +// SpotSkipped adds a Skipped result without adding a new Span. +func SpotSkipped(ctx context.Context, checkName, message string, options ...trace.EventOption) { + addSpotCheckResult(ctx, spotCheckSkippedEventName, checkName, message, options...) +} + func addSpotCheckResult(ctx context.Context, eventName, checkName, message string, options ...trace.EventOption) { span := trace.SpanFromContext(ctx) attrs := append(options, trace.WithAttributes(nameKey.String(checkName))) @@ -229,35 +233,3 @@ func Skippable(skipName string, f testFunction) testFunction { return nil } } - -func DiskUsageCheck(ctx context.Context) error { - partitions, err := disk.Partitions(false) - if err != nil { - return err - } - - partitionExcludes := []string{"/boot"} -partLoop: - for _, partition := range partitions { - for _, exc := range partitionExcludes { - if strings.HasPrefix(partition.Mountpoint, exc) { - continue partLoop - } - } - usage, err := disk.Usage(partition.Mountpoint) - testName := "disk-usage: " + partition.Mountpoint - if err != nil { - Warn(ctx, fmt.Sprintf("could not obtain partition usage for %s: %v", partition.Mountpoint, err)) - } else { - if usage.UsedPercent > 95 { - SpotWarn(ctx, testName, "more than 95% full") - } else if usage.Free < 2<<30 { - SpotWarn(ctx, testName, "less than 1GB free") - } else { - SpotOk(ctx, testName, "ok") - } - } - - } - return nil -} diff --git a/vault/diagnose/os_common.go b/vault/diagnose/os_common.go new file mode 100644 index 0000000000..2e2dc4efea --- /dev/null +++ b/vault/diagnose/os_common.go @@ -0,0 +1,44 @@ +// +build !openbsd,!arm + +package diagnose + +import ( + "context" + "fmt" + "strings" + + "github.com/shirou/gopsutil/disk" +) + +func diskUsage(ctx context.Context) error { + // Disk usage + partitions, err := disk.Partitions(false) + if err != nil { + return err + } + + partitionExcludes := []string{"/boot"} +partLoop: + for _, partition := range partitions { + for _, exc := range partitionExcludes { + if strings.HasPrefix(partition.Mountpoint, exc) { + continue partLoop + } + } + usage, err := disk.Usage(partition.Mountpoint) + testName := "disk usage" + if err != nil { + Warn(ctx, fmt.Sprintf("could not obtain partition usage for %s: %v", partition.Mountpoint, err)) + } else { + if usage.UsedPercent > 95 { + SpotWarn(ctx, testName, partition.Mountpoint+" more than 95% full") + } else if usage.Free < 2<<30 { + SpotWarn(ctx, testName, partition.Mountpoint+" less than 1GB free") + } else { + SpotOk(ctx, testName, partition.Mountpoint+" ok") + } + } + + } + return nil +} diff --git a/vault/diagnose/os_openbsd_arm.go b/vault/diagnose/os_openbsd_arm.go new file mode 100644 index 0000000000..df8fcf3e74 --- /dev/null +++ b/vault/diagnose/os_openbsd_arm.go @@ -0,0 +1,7 @@ +// +build openbsd,arm + +package diagnose + +func diskUsage(ctx context.Context) error { + SpotSkipped("disk usage", "unsupported on this platform") +} diff --git a/vault/diagnose/os_unix.go b/vault/diagnose/os_unix.go index 578ee8303e..9943dfa2ee 100644 --- a/vault/diagnose/os_unix.go +++ b/vault/diagnose/os_unix.go @@ -9,6 +9,9 @@ import ( ) func OSChecks(ctx context.Context) { + ctx, span := StartSpan(ctx, "operating system") + defer span.End() + var limit unix.Rlimit if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &limit); err != nil { SpotError(ctx, "open file limits", fmt.Errorf("could not determine open file limit: %w", err)) @@ -23,4 +26,6 @@ func OSChecks(ctx context.Context) { SpotOk(ctx, "open file limits", fmt.Sprintf("set to %d", min)) } } + + diskUsage(ctx) } diff --git a/vault/diagnose/os_windows.go b/vault/diagnose/os_windows.go index 3f89feb5ca..8e0b4b8b47 100644 --- a/vault/diagnose/os_windows.go +++ b/vault/diagnose/os_windows.go @@ -7,5 +7,7 @@ import ( ) func OSChecks(ctx context.Context) { - // None so far + ctx, span := StartSpan(ctx, "operating system") + defer span.End() + diskUsage(ctx) } diff --git a/vault/diagnose/output.go b/vault/diagnose/output.go index 8ebfdbf505..b4890afa20 100644 --- a/vault/diagnose/output.go +++ b/vault/diagnose/output.go @@ -266,7 +266,28 @@ func (t *TelemetryCollector) getOrBuildResult(id trace.SpanID) *Result { Time: e.Time, }) } + case spotCheckSkippedEventName: + var checkName string + var message string + for _, a := range e.Attributes { + switch a.Key { + case nameKey: + checkName = a.Value.AsString() + case messageKey: + message = a.Value.AsString() + } + } + if checkName != "" { + r.Children = append(r.Children, + &Result{ + Name: checkName, + Status: SkippedStatus, + Message: message, + Time: e.Time, + }) + } } + } switch s.StatusCode() { case codes.Unset: