hashicorp · preetapan · Jun 19, 2019 · Jun 14, 2019 · Jun 17, 2019 · Jun 17, 2019
diff --git a/nomad/server.go b/nomad/server.go
@@ -15,6 +15,7 @@ import (
 	"sync/atomic"
 	"time"
 
+	"github.com/armon/go-metrics"
 	"github.com/hashicorp/consul/agent/consul/autopilot"
 	consulapi "github.com/hashicorp/consul/api"
 	"github.com/hashicorp/consul/lib"
@@ -410,6 +411,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI) (*Server, error)
 	// Emit metrics
 	go s.heartbeatStats()
 
+	// Emit raft and state store metrics
+	go s.EmitRaftStats(time.Second, s.shutdownCh)
+
 	// Start enterprise background workers
 	s.startEnterpriseBackground()
 
@@ -1450,6 +1454,27 @@ func (s *Server) Stats() map[string]map[string]string {
 	return stats
 }
 
+// EmitRaftStats is used to export metrics about raft indexes and state store snapshot index
+func (s *Server) EmitRaftStats(period time.Duration, stopCh <-chan struct{}) {
+	for {
+		select {
+		case <-time.After(period):
+			lastIndex := s.raft.LastIndex()
+			metrics.SetGauge([]string{"raft", "lastIndex"}, float32(lastIndex))
+			appliedIndex := s.raft.AppliedIndex()
+			metrics.SetGauge([]string{"raft", "appliedIndex"}, float32(appliedIndex))
+			stateStoreSnapshotIndex, err := s.State().LatestIndex()
+			if err != nil {
+				s.logger.Warn("Unable to read snapshot index from statestore, metric will not be emitted", "error", err)
+			} else {
+				metrics.SetGauge([]string{"state", "snapshotIndex"}, float32(stateStoreSnapshotIndex))
+			}
+		case <-stopCh:
+			return
+		}
+	}
+}
+
 // Region returns the region of the server
 func (s *Server) Region() string {
 	return s.config.Region

diff --git a/website/source/docs/telemetry/index.html.md b/website/source/docs/telemetry/index.html.md
@@ -109,6 +109,18 @@ when retrieving metrics using the above described signals.
     <td>Raft transactions / `interval`</td>
     <td>Counter</td>
   </tr>
+  <tr>
+    <td>`nomad.raft.lastIndex`</td>
+    <td>Index of the last log</td>
+    <td>Sequence number</td>
+    <td>Gauge</td>
+  </tr>
+  <tr>
+    <td>`nomad.raft.appliedIndex`</td>
+    <td>Index of the last applied log</td>
+    <td>Sequence number</td>
+    <td>Gauge</td>
+  </tr>
   <tr>
     <td>`nomad.raft.replication.appendEntries`</td>
     <td>Raft transaction commit time</td>
@@ -167,6 +179,12 @@ when retrieving metrics using the above described signals.
     <td>ms / Plan Evaluation</td>
     <td>Timer</td>
   </tr>
+  <tr>
+   <td>`nomad.state.snapshotIndex`</td>
+   <td>Latest index in the server's in memory state store</td>
+   <td>Sequence number</td>
+   <td>Gauge</td>
+  </tr>
   <tr>
     <td>`nomad.worker.invoke_scheduler.<type>`</td>
     <td>Time to run the scheduler of the given type</td>