VAULT-12732: Add Heap Profiling Option to Vault Server Command Line (#27033)

* Add pprof output option to CLI

* Add to docs

* Add changelog

* Update website/content/docs/commands/server.mdx

Co-authored-by: Sarah Chavis <62406755+schavis@users.noreply.github.com>

* some changes

* Update changelog/27033.txt

Co-authored-by: Josh Black <raskchanky@gmail.com>

---------

Co-authored-by: Sarah Chavis <62406755+schavis@users.noreply.github.com>
Co-authored-by: Josh Black <raskchanky@gmail.com>
This commit is contained in:
Luis (LT) Carbonell
2024-09-16 11:00:08 -04:00
committed by GitHub
parent a439428855
commit 352bbeb06c
3 changed files with 66 additions and 1 deletions

3
changelog/27033.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:improvement
command/server: Add support for dumping pprof files during startup using CLI option `pprof-dump-dir`
```

View File

@@ -119,6 +119,7 @@ type ServerCommand struct {
flagConfigs []string
flagRecovery bool
flagExperiments []string
flagCLIDump string
flagDev bool
flagDevTLS bool
flagDevTLSCertDir string
@@ -221,6 +222,13 @@ func (c *ServerCommand) Flags() *FlagSets {
"Valid experiments are: " + strings.Join(experiments.ValidExperiments(), ", "),
})
f.StringVar(&StringVar{
Name: "pprof-dump-dir",
Target: &c.flagCLIDump,
Completion: complete.PredictDirs("*"),
Usage: "Directory where generated profiles are created. If left unset, files are not generated.",
})
f = set.NewFlagSet("Dev Options")
f.BoolVar(&BoolVar{
@@ -1593,6 +1601,11 @@ func (c *ServerCommand) Run(args []string) int {
coreShutdownDoneCh = core.ShutdownDone()
}
cliDumpCh := make(chan struct{})
if c.flagCLIDump != "" {
go func() { cliDumpCh <- struct{}{} }()
}
// Wait for shutdown
shutdownTriggered := false
retCode := 0
@@ -1707,7 +1720,6 @@ func (c *ServerCommand) Run(args []string) int {
// Notify systemd that the server has completed reloading config
c.notifySystemd(systemd.SdNotifyReady)
case <-c.SigUSR2Ch:
logWriter := c.logger.StandardWriter(&hclog.StandardLoggerOptions{})
pprof.Lookup("goroutine").WriteTo(logWriter, 2)
@@ -1759,6 +1771,51 @@ func (c *ServerCommand) Run(args []string) int {
}
c.logger.Info(fmt.Sprintf("Wrote pprof files to: %s", pprofPath))
case <-cliDumpCh:
path := c.flagCLIDump
if _, err := os.Stat(path); err != nil && !errors.Is(err, os.ErrNotExist) {
c.logger.Error("Checking cli dump path failed", "error", err)
continue
}
pprofPath := filepath.Join(path, "vault-pprof")
err := os.MkdirAll(pprofPath, os.ModePerm)
if err != nil {
c.logger.Error("Could not create temporary directory for pprof", "error", err)
continue
}
dumps := []string{"goroutine", "heap", "allocs", "threadcreate", "profile"}
for _, dump := range dumps {
pFile, err := os.Create(filepath.Join(pprofPath, dump))
if err != nil {
c.logger.Error("error creating pprof file", "name", dump, "error", err)
break
}
if dump != "profile" {
err = pprof.Lookup(dump).WriteTo(pFile, 0)
if err != nil {
c.logger.Error("error generating pprof data", "name", dump, "error", err)
pFile.Close()
break
}
} else {
// CPU profiles need to run for a duration so we're going to run it
// just for one second to avoid blocking here.
if err := pprof.StartCPUProfile(pFile); err != nil {
c.logger.Error("could not start CPU profile: ", err)
pFile.Close()
break
}
time.Sleep(time.Second * 1)
pprof.StopCPUProfile()
}
pFile.Close()
}
c.logger.Info(fmt.Sprintf("Wrote startup pprof files to: %s", pprofPath))
}
}
// Notify systemd that the server is shutting down

View File

@@ -92,6 +92,11 @@ flags](/vault/docs/commands) included on all commands.
`VAULT_EXPERIMENTS` environment variable as a comma-separated list, or via the
[`experiments`](/vault/docs/configuration#experiments) config key.
- `-pprof-dump-dir` `(string: "")` - Directory where the generated profiles are
created. Vault does not generate profiles when `pprof-dump-dir` is unset.
Use `pprof-dump-dir` temporarily during debugging sessions. Do not use
`pprof-dump-dir` in regular production processes.
- `VAULT_ALLOW_PENDING_REMOVAL_MOUNTS` `(bool: false)` - (environment variable)
Allow Vault to be started with builtin engines which have the `Pending Removal`
deprecation state. This is a temporary stopgap in place in order to perform an