Skip to content

Commit

Permalink
Merge pull request #5841 from hashicorp/f-raft-snapshot-metrics
Browse files Browse the repository at this point in the history
Raft and state store indexes as metrics
  • Loading branch information
preetapan committed Jun 19, 2019
2 parents e029478 + aba8d42 commit c135f03
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
25 changes: 25 additions & 0 deletions nomad/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"sync/atomic"
"time"

"github.com/armon/go-metrics"
"github.com/hashicorp/consul/agent/consul/autopilot"
consulapi "github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/lib"
Expand Down Expand Up @@ -410,6 +411,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI) (*Server, error)
// Emit metrics
go s.heartbeatStats()

// Emit raft and state store metrics
go s.EmitRaftStats(10*time.Second, s.shutdownCh)

// Start enterprise background workers
s.startEnterpriseBackground()

Expand Down Expand Up @@ -1450,6 +1454,27 @@ func (s *Server) Stats() map[string]map[string]string {
return stats
}

// EmitRaftStats is used to export metrics about raft indexes and state store snapshot index
func (s *Server) EmitRaftStats(period time.Duration, stopCh <-chan struct{}) {
for {
select {
case <-time.After(period):
lastIndex := s.raft.LastIndex()
metrics.SetGauge([]string{"raft", "lastIndex"}, float32(lastIndex))
appliedIndex := s.raft.AppliedIndex()
metrics.SetGauge([]string{"raft", "appliedIndex"}, float32(appliedIndex))
stateStoreSnapshotIndex, err := s.State().LatestIndex()
if err != nil {
s.logger.Warn("Unable to read snapshot index from statestore, metric will not be emitted", "error", err)
} else {
metrics.SetGauge([]string{"state", "snapshotIndex"}, float32(stateStoreSnapshotIndex))
}
case <-stopCh:
return
}
}
}

// Region returns the region of the server
func (s *Server) Region() string {
return s.config.Region
Expand Down
18 changes: 18 additions & 0 deletions website/source/docs/telemetry/index.html.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ when retrieving metrics using the above described signals.
<td>Raft transactions / `interval`</td>
<td>Counter</td>
</tr>
<tr>
<td>`nomad.raft.lastIndex`</td>
<td>Index of the <a href="https://godoc.org/github.com/hashicorp/raft#Raft.LastIndex">last log in stable storage</a></td>
<td>Sequence number</td>
<td>Gauge</td>
</tr>
<tr>
<td>`nomad.raft.appliedIndex`</td>
<td>Index of the <a href="https://godoc.org/github.com/hashicorp/raft#Raft.AppliedIndex">last applied log</a></td>
<td>Sequence number</td>
<td>Gauge</td>
</tr>
<tr>
<td>`nomad.raft.replication.appendEntries`</td>
<td>Raft transaction commit time</td>
Expand Down Expand Up @@ -167,6 +179,12 @@ when retrieving metrics using the above described signals.
<td>ms / Plan Evaluation</td>
<td>Timer</td>
</tr>
<tr>
<td>`nomad.state.snapshotIndex`</td>
<td>Latest index in the server's in memory state store</td>
<td>Sequence number</td>
<td>Gauge</td>
</tr>
<tr>
<td>`nomad.worker.invoke_scheduler.<type>`</td>
<td>Time to run the scheduler of the given type</td>
Expand Down

0 comments on commit c135f03

Please sign in to comment.