From 196625b917106f2cba65f3d446fa30f829da21a5 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Thu, 7 Jun 2018 10:10:23 -0700 Subject: [PATCH 1/6] integration: promote db size metrics to "etcd" Signed-off-by: Gyuho Lee --- integration/metrics_test.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/integration/metrics_test.go b/integration/metrics_test.go index 3dccd220af1..ca50ef67e65 100644 --- a/integration/metrics_test.go +++ b/integration/metrics_test.go @@ -40,8 +40,16 @@ func TestMetricDbSizeBoot(t *testing.T) { } } -// TestMetricDbSizeDefrag checks that the db size metric is set after defrag. func TestMetricDbSizeDefrag(t *testing.T) { + testMetricDbSizeDefrag(t, "etcd") +} + +func TestMetricDbSizeDefragDebugging(t *testing.T) { + testMetricDbSizeDefrag(t, "etcd_debugging") +} + +// testMetricDbSizeDefrag checks that the db size metric is set after defrag. +func testMetricDbSizeDefrag(t *testing.T, name string) { defer testutil.AfterTest(t) clus := NewClusterV3(t, &ClusterConfig{Size: 1}) defer clus.Terminate(t) @@ -63,7 +71,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { time.Sleep(500 * time.Millisecond) expected := numPuts * len(putreq.Value) - beforeDefrag, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_bytes") + beforeDefrag, err := clus.Members[0].Metric(name + "_mvcc_db_total_size_in_bytes") if err != nil { t.Fatal(err) } @@ -74,7 +82,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { if bv < expected { t.Fatalf("expected db size greater than %d, got %d", expected, bv) } - beforeDefragInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes") + beforeDefragInUse, err := clus.Members[0].Metric("etcd_mvcc_db_total_size_in_use_in_bytes") if err != nil { t.Fatal(err) } @@ -98,7 +106,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { } time.Sleep(500 * time.Millisecond) - afterCompactionInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes") + afterCompactionInUse, err := clus.Members[0].Metric("etcd_mvcc_db_total_size_in_use_in_bytes") if err != nil { t.Fatal(err) } @@ -113,7 +121,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { // defrag should give freed space back to fs mc.Defragment(context.TODO(), &pb.DefragmentRequest{}) - afterDefrag, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_bytes") + afterDefrag, err := clus.Members[0].Metric(name + "_mvcc_db_total_size_in_bytes") if err != nil { t.Fatal(err) } @@ -125,7 +133,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { t.Fatalf("expected less than %d, got %d after defrag", bv, av) } - afterDefragInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes") + afterDefragInUse, err := clus.Members[0].Metric("etcd_mvcc_db_total_size_in_use_in_bytes") if err != nil { t.Fatal(err) } From cf196fc4d8e88f8a99eb11858243660346ea12da Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Thu, 7 Jun 2018 10:11:06 -0700 Subject: [PATCH 2/6] Documentation/op-guide: promote db size metric in grafana.json Signed-off-by: Gyuho Lee --- Documentation/op-guide/grafana.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/op-guide/grafana.json b/Documentation/op-guide/grafana.json index 45bfc651362..c405fdfee53 100644 --- a/Documentation/op-guide/grafana.json +++ b/Documentation/op-guide/grafana.json @@ -341,7 +341,7 @@ "steppedLine": false, "targets": [ { - "expr": "etcd_debugging_mvcc_db_total_size_in_bytes{job=\"$cluster\"}", + "expr": "etcd_mvcc_db_total_size_in_bytes{job=\"$cluster\"}", "hide": false, "interval": "", "intervalFactor": 2, From 21130d5fb646c99408b992c30b313f5ec9af22b8 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Thu, 7 Jun 2018 10:11:58 -0700 Subject: [PATCH 3/6] mvcc: promote db size metrics to "etcd" Signed-off-by: Gyuho Lee --- mvcc/metrics.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mvcc/metrics.go b/mvcc/metrics.go index f80c70c8563..c7ea0ce4c6e 100644 --- a/mvcc/metrics.go +++ b/mvcc/metrics.go @@ -146,7 +146,7 @@ var ( }) dbTotalSize = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ - Namespace: "etcd_debugging", + Namespace: "etcd", Subsystem: "mvcc", Name: "db_total_size_in_bytes", Help: "Total size of the underlying database physically allocated in bytes.", @@ -159,10 +159,10 @@ var ( ) // overridden by mvcc initialization reportDbTotalSizeInBytesMu sync.RWMutex - reportDbTotalSizeInBytes func() float64 = func() float64 { return 0 } + reportDbTotalSizeInBytes = func() float64 { return 0 } dbTotalSizeInUse = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ - Namespace: "etcd_debugging", + Namespace: "etcd", Subsystem: "mvcc", Name: "db_total_size_in_use_in_bytes", Help: "Total size of the underlying database logically in use in bytes.", From f2db05a86901daac2a1a057ff465618b0d0a402e Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Thu, 7 Jun 2018 10:22:51 -0700 Subject: [PATCH 4/6] mvcc: server db size with "etcd_debugging" namespace for backward compatibility Signed-off-by: Gyuho Lee --- mvcc/kvstore.go | 3 +++ mvcc/metrics.go | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/mvcc/kvstore.go b/mvcc/kvstore.go index a445f6a4a1a..9c7f5c3ad85 100644 --- a/mvcc/kvstore.go +++ b/mvcc/kvstore.go @@ -323,6 +323,9 @@ func (s *store) restore() error { reportDbTotalSizeInBytesMu.Lock() reportDbTotalSizeInBytes = func() float64 { return float64(b.Size()) } reportDbTotalSizeInBytesMu.Unlock() + reportDbTotalSizeInBytesDebuggingMu.Lock() + reportDbTotalSizeInBytesDebugging = func() float64 { return float64(b.Size()) } + reportDbTotalSizeInBytesDebuggingMu.Unlock() reportDbTotalSizeInUseInBytesMu.Lock() reportDbTotalSizeInUseInBytes = func() float64 { return float64(b.SizeInUse()) } reportDbTotalSizeInUseInBytesMu.Unlock() diff --git a/mvcc/metrics.go b/mvcc/metrics.go index c7ea0ce4c6e..9163cc7c66d 100644 --- a/mvcc/metrics.go +++ b/mvcc/metrics.go @@ -161,6 +161,23 @@ var ( reportDbTotalSizeInBytesMu sync.RWMutex reportDbTotalSizeInBytes = func() float64 { return 0 } + // TODO: remove this in v3.5 + dbTotalSizeDebugging = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: "etcd_debugging", + Subsystem: "mvcc", + Name: "db_total_size_in_bytes", + Help: "Total size of the underlying database physically allocated in bytes.", + }, + func() float64 { + reportDbTotalSizeInBytesDebuggingMu.RLock() + defer reportDbTotalSizeInBytesDebuggingMu.RUnlock() + return reportDbTotalSizeInBytesDebugging() + }, + ) + // overridden by mvcc initialization + reportDbTotalSizeInBytesDebuggingMu sync.RWMutex + reportDbTotalSizeInBytesDebugging = func() float64 { return 0 } + dbTotalSizeInUse = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ Namespace: "etcd", Subsystem: "mvcc", @@ -218,6 +235,7 @@ func init() { prometheus.MustRegister(dbCompactionTotalMs) prometheus.MustRegister(dbCompactionKeysCounter) prometheus.MustRegister(dbTotalSize) + prometheus.MustRegister(dbTotalSizeDebugging) prometheus.MustRegister(dbTotalSizeInUse) prometheus.MustRegister(hashSec) prometheus.MustRegister(hashRevSec) From 009d05ae4fc6ff8d32c5fee4b03d3e5b78df3de3 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Thu, 7 Jun 2018 10:26:45 -0700 Subject: [PATCH 5/6] Documentation/op-guide: highlight db size metrics change Signed-off-by: Gyuho Lee --- Documentation/op-guide/maintenance.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/op-guide/maintenance.md b/Documentation/op-guide/maintenance.md index 294d8c7d61e..7e85a11cf43 100644 --- a/Documentation/op-guide/maintenance.md +++ b/Documentation/op-guide/maintenance.md @@ -149,7 +149,9 @@ $ ETCDCTL_API=3 etcdctl put newkey 123 OK ``` -The metric `etcd_debugging_mvcc_db_total_size_in_use_in_bytes` indicates the actual database usage after a history compaction, while `etcd_debugging_mvcc_db_total_size_in_bytes` shows the database size including free space waiting for defragmentation. The latter increases only when the former is close to it, meaning when both of these metrics are close to the quota, a history compaction is required to avoid triggering the space quota. +The metric `etcd_mvcc_db_total_size_in_use_in_bytes` indicates the actual database usage after a history compaction, while `etcd_debugging_mvcc_db_total_size_in_bytes` shows the database size including free space waiting for defragmentation. The latter increases only when the former is close to it, meaning when both of these metrics are close to the quota, a history compaction is required to avoid triggering the space quota. + +`etcd_debugging_mvcc_db_total_size_in_bytes` is renamed to `etcd_mvcc_db_total_size_in_bytes` from v3.4. ## Snapshot backup From a8d7d5ad72281f9f565170ea0db57480604cbd93 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Thu, 7 Jun 2018 10:33:02 -0700 Subject: [PATCH 6/6] Documentation/upgrades: highlight db metrics change Signed-off-by: Gyuho Lee --- Documentation/upgrades/upgrade_3_4.md | 13 +++++++++++++ Documentation/upgrades/upgrade_3_5.md | 11 +++++++++++ 2 files changed, 24 insertions(+) diff --git a/Documentation/upgrades/upgrade_3_4.md b/Documentation/upgrades/upgrade_3_4.md index fc5b3fb292a..130f87ff05f 100644 --- a/Documentation/upgrades/upgrade_3_4.md +++ b/Documentation/upgrades/upgrade_3_4.md @@ -47,6 +47,19 @@ OK +etcd --peer-trusted-ca-file ca-peer.crt ``` +#### Promote `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics + +v3.4 promotes `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics to `etcd_mvcc_db_total_size_in_bytes`, in order to encourage etcd storage monitoring. + +`etcd_debugging_mvcc_db_total_size_in_bytes` is still served in v3.4 for backward compatibilities. It will be completely deprecated in v3.5. + +```diff +-etcd_debugging_mvcc_db_total_size_in_bytes ++etcd_mvcc_db_total_size_in_bytes +``` + +Note that `etcd_debugging_*` namespace metrics have been marked as experimental. As we improve monitoring guide, we will promote more metrics. + #### Deprecating `etcd --log-output` flag (now `--log-outputs`) Rename [`etcd --log-output` to `--log-outputs`](https://github.com/coreos/etcd/pull/9624) to support multiple log outputs. **`etcd --logger=capnslog` does not support multiple log outputs.** diff --git a/Documentation/upgrades/upgrade_3_5.md b/Documentation/upgrades/upgrade_3_5.md index 9f0d1eeebd8..6d9a70a15ba 100644 --- a/Documentation/upgrades/upgrade_3_5.md +++ b/Documentation/upgrades/upgrade_3_5.md @@ -14,6 +14,17 @@ Before [starting an upgrade](#upgrade-procedure), read through the rest of this Highlighted breaking changes in 3.5. +#### Deprecate `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics + +v3.4 promoted `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics to `etcd_mvcc_db_total_size_in_bytes`, in order to encourage etcd storage monitoring. And v3.5 completely deprcates `etcd_debugging_mvcc_db_total_size_in_bytes`. + +```diff +-etcd_debugging_mvcc_db_total_size_in_bytes ++etcd_mvcc_db_total_size_in_bytes +``` + +Note that `etcd_debugging_*` namespace metrics have been marked as experimental. As we improve monitoring guide, we will promote more metrics. + #### Deprecated in `etcd --logger capnslog` v3.4 defaults to `--logger=zap` in order to support multiple log outputs and structured logging.