From 043489ed970f4a6e9c14afbda760c885a90fc6c3 Mon Sep 17 00:00:00 2001 From: Frederik Thuysbaert Date: Mon, 6 May 2024 11:52:24 +0200 Subject: [PATCH] feat(inputs.redis): Add latency percentiles metric (#15293) --- plugins/inputs/redis/README.md | 17 ++++++++++- plugins/inputs/redis/redis.go | 47 +++++++++++++++++++++++++++++- plugins/inputs/redis/redis_test.go | 20 +++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/plugins/inputs/redis/README.md b/plugins/inputs/redis/README.md index 35e8b3528e07e..358a2d0e847f8 100644 --- a/plugins/inputs/redis/README.md +++ b/plugins/inputs/redis/README.md @@ -157,6 +157,12 @@ and the elapsed time since the last rdb save (rdb\_last\_save\_time\_elapsed). - usec(int, mircoseconds) - usec_per_call(float, microseconds) +- redis_latency_percentiles_usec + - fields: + - p50(float, microseconds) + - p99(float, microseconds) + - p99.9(float, microseconds) + - redis_replication - tags: - replication_role @@ -184,7 +190,10 @@ and the elapsed time since the last rdb save (rdb\_last\_save\_time\_elapsed). - The redis_keyspace measurement has an additional database tag: - database -- The redis_cmdstat measurement has an additional tag: +- The redis_cmdstat measurement has an additional command tag: + - command + +- The redis_latency_percentiles_usec measurement has an additional command tag: - command ## Example Output @@ -228,6 +237,12 @@ redis_command: redis_cmdstat,command=publish,host=host,port=6379,replication_role=master,server=localhost calls=569514i,failed_calls=0i,rejected_calls=0i,usec=9916334i,usec_per_call=17.41 1559227136000000000 ``` +redis_latency_percentiles_usec: + +```text +redis_latency_percentiles_usec,command=zadd,host=host,port=6379,replication_role=master,server=localhost p50=9.023,p99=28.031,p99.9=43.007 1559227136000000000 +``` + redis_error: ```text diff --git a/plugins/inputs/redis/redis.go b/plugins/inputs/redis/redis.go index b19cdd8a611a3..960495631318b 100644 --- a/plugins/inputs/redis/redis.go +++ b/plugins/inputs/redis/redis.go @@ -415,6 +415,11 @@ func gatherInfoOutput( gatherCommandstateLine(name, kline, acc, tags) continue } + if section == "Latencystats" { + kline := strings.TrimSpace(parts[1]) + gatherLatencystatsLine(name, kline, acc, tags) + continue + } if section == "Replication" && replicationSlaveMetricPrefix.MatchString(name) { kline := strings.TrimSpace(parts[1]) gatherReplicationLine(name, kline, acc, tags) @@ -515,7 +520,7 @@ func gatherKeyspaceLine( // // cmdstat_publish:calls=33791,usec=208789,usec_per_call=6.18 // -// Tag: cmdstat=publish; Fields: calls=33791i,usec=208789i,usec_per_call=6.18 +// Tag: command=publish; Fields: calls=33791i,usec=208789i,usec_per_call=6.18 func gatherCommandstateLine( name string, line string, @@ -557,6 +562,46 @@ func gatherCommandstateLine( acc.AddFields("redis_cmdstat", fields, tags) } +// Parse the special latency_percentiles_usec lines. +// Example: +// +// latency_percentiles_usec_zadd:p50=9.023,p99=28.031,p99.9=43.007 +// +// Tag: command=zadd; Fields: p50=9.023,p99=28.031,p99.9=43.007 +func gatherLatencystatsLine( + name string, + line string, + acc telegraf.Accumulator, + globalTags map[string]string, +) { + if !strings.HasPrefix(name, "latency_percentiles_usec") { + return + } + + fields := make(map[string]interface{}) + tags := make(map[string]string) + for k, v := range globalTags { + tags[k] = v + } + tags["command"] = strings.TrimPrefix(name, "latency_percentiles_usec_") + parts := strings.Split(line, ",") + for _, part := range parts { + kv := strings.Split(part, "=") + if len(kv) != 2 { + continue + } + + switch kv[0] { + case "p50", "p99", "p99.9": + fval, err := strconv.ParseFloat(kv[1], 64) + if err == nil { + fields[kv[0]] = fval + } + } + } + acc.AddFields("redis_latency_percentiles_usec", fields, tags) +} + // Parse the special Replication line // Example: // diff --git a/plugins/inputs/redis/redis_test.go b/plugins/inputs/redis/redis_test.go index e5e588fd76e23..7e381412ffd16 100644 --- a/plugins/inputs/redis/redis_test.go +++ b/plugins/inputs/redis/redis_test.go @@ -258,6 +258,22 @@ func TestRedis_ParseMetrics(t *testing.T) { } acc.AssertContainsTaggedFields(t, "redis_cmdstat", cmdstatPublishFields, cmdstatPublishTags) + latencyZaddTags := map[string]string{"host": "redis.net", "replication_role": "master", "command": "zadd"} + latencyZaddFields := map[string]interface{}{ + "p50": float64(9.023), + "p99": float64(28.031), + "p99.9": float64(43.007), + } + acc.AssertContainsTaggedFields(t, "redis_latency_percentiles_usec", latencyZaddFields, latencyZaddTags) + + latencyHgetallTags := map[string]string{"host": "redis.net", "replication_role": "master", "command": "hgetall"} + latencyHgetallFields := map[string]interface{}{ + "p50": float64(11.007), + "p99": float64(34.047), + "p99.9": float64(66.047), + } + acc.AssertContainsTaggedFields(t, "redis_latency_percentiles_usec", latencyHgetallFields, latencyHgetallTags) + replicationTags := map[string]string{ "host": "redis.net", "replication_role": "slave", @@ -541,6 +557,10 @@ errorstat_NOSCRIPT:count=4 errorstat_WRONGPASS:count=2 errorstat_WRONGTYPE:count=30 +# Latencystats +latency_percentiles_usec_zadd:p50=9.023,p99=28.031,p99.9=43.007 +latency_percentiles_usec_hgetall:p50=11.007,p99=34.047,p99.9=66.047 + # Keyspace db0:keys=2,expires=0,avg_ttl=0