Add Reindex In Progress Metric (#23160)

* Add a telemetry metric to track if a reindex is in progress or not

* changelog

* Add other reindex related metrics

* cleanup types

* Add docs for these metrics

* check for nil values
This commit is contained in:
Luis (LT) Carbonell
2023-09-22 10:53:26 -04:00
committed by GitHub
parent 68dd82c902
commit c93137d9a3
7 changed files with 57 additions and 0 deletions

3
changelog/23160.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:improvement
replication: Add re-index status metric to telemetry
```

View File

@@ -116,6 +116,29 @@ func (c *Core) metricsLoop(stopCh chan struct{}) {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "dr", "secondary"}, 0, nil)
}
if haState == consts.Active {
reindexState := c.ReindexStage()
if reindexState != nil {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "reindex_stage"}, float32(*reindexState), nil)
} else {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "reindex_stage"}, 0, nil)
}
buildProgress := c.BuildProgress()
if buildProgress != nil {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "build_progress"}, float32(*buildProgress), nil)
} else {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "build_progress"}, 0, nil)
}
buildTotal := c.BuildTotal()
if buildTotal != nil {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "build_total"}, float32(*buildTotal), nil)
} else {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "build_total"}, 0, nil)
}
}
// If we're using a raft backend, emit raft metrics
if rb, ok := c.underlyingPhysical.(*raft.RaftBackend); ok {
rb.CollectMetrics(c.MetricSink())

View File

@@ -76,6 +76,10 @@ func (c *Core) UndoLogsEnabled() bool { return false }
func (c *Core) UndoLogsPersisted() (bool, error) { return false, nil }
func (c *Core) PersistUndoLogs() error { return nil }
func (c *Core) ReindexStage() *uint32 { return nil }
func (c *Core) BuildProgress() *uint32 { return nil }
func (c *Core) BuildTotal() *uint32 { return nil }
func (c *Core) teardownReplicationResolverHandler() {}
func createSecondaries(*Core, *CoreConfig) {}

View File

@@ -210,6 +210,12 @@ alphabetic order by name.
@include 'telemetry-metrics/vault/core/replication/write_undo_logs.mdx'
@include 'telemetry-metrics/vault/core/replication/build_progress.mdx'
@include 'telemetry-metrics/vault/core/replication/build_total.mdx'
@include 'telemetry-metrics/vault/core/replication/reindex_stage.mdx'
@include 'telemetry-metrics/vault/core/seal_internal.mdx'
@include 'telemetry-metrics/vault/core/seal_with_request.mdx'

View File

@@ -0,0 +1,5 @@
### vault.core.replication.build_progress ((#vault-core-replication-build_progress))
Metric type | Value | Description
----------- | ------- | -----------
gauge | keys | Number of keys that have been inserted into the new tree

View File

@@ -0,0 +1,5 @@
### vault.core.replication.build_total ((#vault-core-replication-build_total))
Metric type | Value | Description
----------- | ------- | -----------
gauge | keys | Total number of keys that have to be inserted into the new tree

View File

@@ -0,0 +1,11 @@
### vault.core.replication.reindex_stage ((#vault-core-replication-reindex_stage))
Metric type | Value | Description
----------- | ------- | -----------
gauge | stage | Current stage of the reindexing process
- A value of `4` indicates the reindex process is committing any differences between the newly created tree and the old tree.
- A value of `3` indicates the reindex process is replaying WALs to ensure no updates were missed while scanning and building.
- A value of `2` indicates the reindex process is currently building a new merkle tree based of the values for the keys obtained in the scanning stage.
- A value of `1` indicates the reindex process is currently creating a list of all known storage keys.
- A value of `0` indicates that a reindex is not in progress.