Skip to content

Commit

Permalink
etcdserver/api/rafthttp: add "etcd_network_raft_send_total_duration_s…
Browse files Browse the repository at this point in the history
…econds" metric

Currently, only v2 metrics ("stats.FollowerStats") tracks Raft message
send latencies. Add Prometheus histogram to track Raft messages for
writes, since heartbeats are probed (see etcd-io#10022)
and snapshots are already being tracked via etcd-io#9997.

```
etcd_network_raft_send_total_duration_seconds_bucket{To="7339c4e5e833c029",Type="MsgProp",le="0.0001"} 1
etcd_network_raft_send_total_duration_seconds_bucket{To="7339c4e5e833c029",Type="MsgProp",le="0.0002"} 1
etcd_network_raft_send_total_duration_seconds_bucket{To="729934363faa4a24",Type="MsgApp",le="0.0001"} 9
etcd_network_raft_send_total_duration_seconds_bucket{To="729934363faa4a24",Type="MsgApp",le="0.0002"} 9
etcd_network_raft_send_total_duration_seconds_bucket{To="7339c4e5e833c029",Type="MsgAppResp",le="0.0001"} 8
etcd_network_raft_send_total_duration_seconds_bucket{To="7339c4e5e833c029",Type="MsgAppResp",le="0.0002"} 8
```

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
  • Loading branch information
gyuho committed Aug 29, 2018
1 parent dcb4d76 commit e07f1a2
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
14 changes: 14 additions & 0 deletions etcdserver/api/rafthttp/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,19 @@ var (
[]string{"From"},
)

raftSendSeconds = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "raft_send_total_duration_seconds",
Help: "Total latency distributions of Raft message sends",

// lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2
// highest bucket start of 0.0001 sec * 2^15 == 3.2768 sec
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 16),
},
[]string{"Type", "To"},
)

rttSec = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "etcd",
Subsystem: "network",
Expand Down Expand Up @@ -162,5 +175,6 @@ func init() {
prometheus.MustRegister(snapshotReceiveFailures)
prometheus.MustRegister(snapshotReceiveSeconds)

prometheus.MustRegister(raftSendSeconds)
prometheus.MustRegister(rttSec)
}
10 changes: 10 additions & 0 deletions etcdserver/api/rafthttp/stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,10 @@ func (cw *streamWriter) run() {
heartbeatc, msgc = nil, nil

case m := <-msgc:
start := time.Now()
err := enc.encode(&m)
if err == nil {
took := time.Since(start)
unflushed += m.Size()

if len(msgc) == 0 || batched > streamBufSize/2 {
Expand All @@ -214,6 +216,14 @@ func (cw *streamWriter) run() {
batched++
}

// snapshot sends are tracked via separate metrics https://github.com/etcd-io/etcd/pull/9997
// heartbeats are tracked via prober https://github.com/etcd-io/etcd/pull/10022
// TODO: track other messages?
if m.Type == raftpb.MsgProp ||
m.Type == raftpb.MsgApp ||
m.Type == raftpb.MsgAppResp {
raftSendSeconds.WithLabelValues(m.Type.String(), types.ID(m.To).String()).Observe(took.Seconds())
}
continue
}

Expand Down

0 comments on commit e07f1a2

Please sign in to comment.