diff --git a/nomad/server.go b/nomad/server.go index 612be5da65d4..2df2dca6a2a8 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -15,6 +15,7 @@ import ( "sync/atomic" "time" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/agent/consul/autopilot" consulapi "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/lib" @@ -410,6 +411,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI) (*Server, error) // Emit metrics go s.heartbeatStats() + // Emit raft and state store metrics + go s.EmitRaftStats(10*time.Second, s.shutdownCh) + // Start enterprise background workers s.startEnterpriseBackground() @@ -1450,6 +1454,27 @@ func (s *Server) Stats() map[string]map[string]string { return stats } +// EmitRaftStats is used to export metrics about raft indexes and state store snapshot index +func (s *Server) EmitRaftStats(period time.Duration, stopCh <-chan struct{}) { + for { + select { + case <-time.After(period): + lastIndex := s.raft.LastIndex() + metrics.SetGauge([]string{"raft", "lastIndex"}, float32(lastIndex)) + appliedIndex := s.raft.AppliedIndex() + metrics.SetGauge([]string{"raft", "appliedIndex"}, float32(appliedIndex)) + stateStoreSnapshotIndex, err := s.State().LatestIndex() + if err != nil { + s.logger.Warn("Unable to read snapshot index from statestore, metric will not be emitted", "error", err) + } else { + metrics.SetGauge([]string{"state", "snapshotIndex"}, float32(stateStoreSnapshotIndex)) + } + case <-stopCh: + return + } + } +} + // Region returns the region of the server func (s *Server) Region() string { return s.config.Region diff --git a/website/source/docs/telemetry/index.html.md b/website/source/docs/telemetry/index.html.md index e51f21e10d7b..3d33c27f2201 100644 --- a/website/source/docs/telemetry/index.html.md +++ b/website/source/docs/telemetry/index.html.md @@ -109,6 +109,18 @@ when retrieving metrics using the above described signals. Raft transactions / `interval` Counter + + `nomad.raft.lastIndex` + Index of the last log in stable storage + Sequence number + Gauge + + + `nomad.raft.appliedIndex` + Index of the last applied log + Sequence number + Gauge + `nomad.raft.replication.appendEntries` Raft transaction commit time @@ -167,6 +179,12 @@ when retrieving metrics using the above described signals. ms / Plan Evaluation Timer + + `nomad.state.snapshotIndex` + Latest index in the server's in memory state store + Sequence number + Gauge + `nomad.worker.invoke_scheduler.` Time to run the scheduler of the given type