diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index 9aabb983f615..a1c6e50d3a02 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -1971,23 +1971,22 @@ Support status: [reserved](#support-status) | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | -| stats | [EngineStatsInfo](#cockroach.server.serverpb.EngineStatsResponse-cockroach.server.serverpb.EngineStatsInfo) | repeated | | [reserved](#support-status) | +| stats_by_store_id | [EngineStatsResponse.StatsByStoreIdEntry](#cockroach.server.serverpb.EngineStatsResponse-cockroach.server.serverpb.EngineStatsResponse.StatsByStoreIdEntry) | repeated | maps store IDs to pretty-printed stats about the store's LSM. | [reserved](#support-status) | - -#### EngineStatsInfo + +#### EngineStatsResponse.StatsByStoreIdEntry | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | -| store_id | [int32](#cockroach.server.serverpb.EngineStatsResponse-int32) | | | [reserved](#support-status) | -| tickers_and_histograms | [cockroach.storage.enginepb.TickersAndHistograms](#cockroach.server.serverpb.EngineStatsResponse-cockroach.storage.enginepb.TickersAndHistograms) | | | [reserved](#support-status) | -| engine_type | [cockroach.storage.enginepb.EngineType](#cockroach.server.serverpb.EngineStatsResponse-cockroach.storage.enginepb.EngineType) | | | [reserved](#support-status) | +| key | [int32](#cockroach.server.serverpb.EngineStatsResponse-int32) | | | | +| value | [string](#cockroach.server.serverpb.EngineStatsResponse-string) | | | | diff --git a/pkg/cli/BUILD.bazel b/pkg/cli/BUILD.bazel index f95e1befa20b..013a777a14cd 100644 --- a/pkg/cli/BUILD.bazel +++ b/pkg/cli/BUILD.bazel @@ -142,6 +142,7 @@ go_library( "//pkg/security/username", "//pkg/server", "//pkg/server/authserver", + "//pkg/server/debug", "//pkg/server/pgurl", "//pkg/server/profiler", "//pkg/server/serverctl", diff --git a/pkg/cli/testdata/zip/partial1 b/pkg/cli/testdata/zip/partial1 index 7503b94ee995..b523d1e483fa 100644 --- a/pkg/cli/testdata/zip/partial1 +++ b/pkg/cli/testdata/zip/partial1 @@ -114,10 +114,10 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... [node 1] requesting heap profile list: last request failed: rpc error: ... [node 1] requesting heap profile list: creating error output: debug/nodes/1/heapprof.err.txt... done @@ -208,9 +208,6 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null [node 2] requesting data for debug/nodes/2/gossip... received response... [node 2] requesting data for debug/nodes/2/gossip: last request failed: rpc error: ... [node 2] requesting data for debug/nodes/2/gossip: creating error output: debug/nodes/2/gossip.json.err.txt... done -[node 2] requesting data for debug/nodes/2/enginestats... received response... -[node 2] requesting data for debug/nodes/2/enginestats: last request failed: rpc error: ... -[node 2] requesting data for debug/nodes/2/enginestats: creating error output: debug/nodes/2/enginestats.json.err.txt... done [node 2] requesting stacks... received response... [node 2] requesting stacks: last request failed: rpc error: ... [node 2] requesting stacks: creating error output: debug/nodes/2/stacks.txt.err.txt... done @@ -220,6 +217,9 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null [node 2] requesting heap profile... received response... [node 2] requesting heap profile: last request failed: rpc error: ... [node 2] requesting heap profile: creating error output: debug/nodes/2/heap.pprof.err.txt... done +[node 2] requesting engine stats... received response... +[node 2] requesting engine stats: last request failed: rpc error: ... +[node 2] requesting engine stats: creating error output: debug/nodes/2/lsm.txt.err.txt... done [node 2] requesting heap profile list... received response... [node 2] requesting heap profile list: last request failed: rpc error: ... [node 2] requesting heap profile list: creating error output: debug/nodes/2/heapprof.err.txt... done @@ -262,10 +262,10 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null [node 3] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/3/crdb_internal.node_txn_stats.txt... done [node 3] requesting data for debug/nodes/3/details... received response... writing JSON output: debug/nodes/3/details.json... done [node 3] requesting data for debug/nodes/3/gossip... received response... writing JSON output: debug/nodes/3/gossip.json... done -[node 3] requesting data for debug/nodes/3/enginestats... received response... writing JSON output: debug/nodes/3/enginestats.json... done [node 3] requesting stacks... received response... writing binary output: debug/nodes/3/stacks.txt... done [node 3] requesting stacks with labels... received response... writing binary output: debug/nodes/3/stacks_with_labels.txt... done [node 3] requesting heap profile... received response... writing binary output: debug/nodes/3/heap.pprof... done +[node 3] requesting engine stats... received response... writing binary output: debug/nodes/3/lsm.txt... done [node 3] requesting heap profile list... received response... [node 3] requesting heap profile list: last request failed: rpc error: ... [node 3] requesting heap profile list: creating error output: debug/nodes/3/heapprof.err.txt... done diff --git a/pkg/cli/testdata/zip/partial1_excluded b/pkg/cli/testdata/zip/partial1_excluded index a91fcd59770c..b215a4a39ed5 100644 --- a/pkg/cli/testdata/zip/partial1_excluded +++ b/pkg/cli/testdata/zip/partial1_excluded @@ -114,10 +114,10 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... [node 1] requesting heap profile list: last request failed: rpc error: ... [node 1] requesting heap profile list: creating error output: debug/nodes/1/heapprof.err.txt... done @@ -159,10 +159,10 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 [node 3] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/3/crdb_internal.node_txn_stats.txt... done [node 3] requesting data for debug/nodes/3/details... received response... writing JSON output: debug/nodes/3/details.json... done [node 3] requesting data for debug/nodes/3/gossip... received response... writing JSON output: debug/nodes/3/gossip.json... done -[node 3] requesting data for debug/nodes/3/enginestats... received response... writing JSON output: debug/nodes/3/enginestats.json... done [node 3] requesting stacks... received response... writing binary output: debug/nodes/3/stacks.txt... done [node 3] requesting stacks with labels... received response... writing binary output: debug/nodes/3/stacks_with_labels.txt... done [node 3] requesting heap profile... received response... writing binary output: debug/nodes/3/heap.pprof... done +[node 3] requesting engine stats... received response... writing binary output: debug/nodes/3/lsm.txt... done [node 3] requesting heap profile list... received response... [node 3] requesting heap profile list: last request failed: rpc error: ... [node 3] requesting heap profile list: creating error output: debug/nodes/3/heapprof.err.txt... done diff --git a/pkg/cli/testdata/zip/partial2 b/pkg/cli/testdata/zip/partial2 index d681708ae07d..cfd005a52ab6 100644 --- a/pkg/cli/testdata/zip/partial2 +++ b/pkg/cli/testdata/zip/partial2 @@ -114,10 +114,10 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... [node 1] requesting heap profile list: last request failed: rpc error: ... [node 1] requesting heap profile list: creating error output: debug/nodes/1/heapprof.err.txt... done @@ -158,10 +158,10 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null [node 3] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/3/crdb_internal.node_txn_stats.txt... done [node 3] requesting data for debug/nodes/3/details... received response... writing JSON output: debug/nodes/3/details.json... done [node 3] requesting data for debug/nodes/3/gossip... received response... writing JSON output: debug/nodes/3/gossip.json... done -[node 3] requesting data for debug/nodes/3/enginestats... received response... writing JSON output: debug/nodes/3/enginestats.json... done [node 3] requesting stacks... received response... writing binary output: debug/nodes/3/stacks.txt... done [node 3] requesting stacks with labels... received response... writing binary output: debug/nodes/3/stacks_with_labels.txt... done [node 3] requesting heap profile... received response... writing binary output: debug/nodes/3/heap.pprof... done +[node 3] requesting engine stats... received response... writing binary output: debug/nodes/3/lsm.txt... done [node 3] requesting heap profile list... received response... [node 3] requesting heap profile list: last request failed: rpc error: ... [node 3] requesting heap profile list: creating error output: debug/nodes/3/heapprof.err.txt... done diff --git a/pkg/cli/testdata/zip/testzip b/pkg/cli/testdata/zip/testzip index 43bc769e4327..badcb676bd19 100644 --- a/pkg/cli/testdata/zip/testzip +++ b/pkg/cli/testdata/zip/testzip @@ -117,10 +117,10 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... done [node ?] ? heap profiles found [node 1] requesting goroutine dump list... received response... done diff --git a/pkg/cli/testdata/zip/testzip_concurrent b/pkg/cli/testdata/zip/testzip_concurrent index bd6e40c6e27a..e91f6305d766 100644 --- a/pkg/cli/testdata/zip/testzip_concurrent +++ b/pkg/cli/testdata/zip/testzip_concurrent @@ -278,14 +278,14 @@ zip [node 1] requesting data for debug/nodes/1/details: done [node 1] requesting data for debug/nodes/1/details: received response... [node 1] requesting data for debug/nodes/1/details: writing JSON output: debug/nodes/1/details.json... -[node 1] requesting data for debug/nodes/1/enginestats... -[node 1] requesting data for debug/nodes/1/enginestats: done -[node 1] requesting data for debug/nodes/1/enginestats: received response... -[node 1] requesting data for debug/nodes/1/enginestats: writing JSON output: debug/nodes/1/enginestats.json... [node 1] requesting data for debug/nodes/1/gossip... [node 1] requesting data for debug/nodes/1/gossip: done [node 1] requesting data for debug/nodes/1/gossip: received response... [node 1] requesting data for debug/nodes/1/gossip: writing JSON output: debug/nodes/1/gossip.json... +[node 1] requesting engine stats... +[node 1] requesting engine stats: done +[node 1] requesting engine stats: received response... +[node 1] requesting engine stats: writing binary output: debug/nodes/1/lsm.txt... [node 1] requesting goroutine dump list... [node 1] requesting goroutine dump list: creating error output: debug/nodes/1/goroutines.err.txt... [node 1] requesting goroutine dump list: done @@ -399,14 +399,14 @@ zip [node 2] requesting data for debug/nodes/2/details: done [node 2] requesting data for debug/nodes/2/details: received response... [node 2] requesting data for debug/nodes/2/details: writing JSON output: debug/nodes/2/details.json... -[node 2] requesting data for debug/nodes/2/enginestats... -[node 2] requesting data for debug/nodes/2/enginestats: done -[node 2] requesting data for debug/nodes/2/enginestats: received response... -[node 2] requesting data for debug/nodes/2/enginestats: writing JSON output: debug/nodes/2/enginestats.json... [node 2] requesting data for debug/nodes/2/gossip... [node 2] requesting data for debug/nodes/2/gossip: done [node 2] requesting data for debug/nodes/2/gossip: received response... [node 2] requesting data for debug/nodes/2/gossip: writing JSON output: debug/nodes/2/gossip.json... +[node 2] requesting engine stats... +[node 2] requesting engine stats: done +[node 2] requesting engine stats: received response... +[node 2] requesting engine stats: writing binary output: debug/nodes/2/lsm.txt... [node 2] requesting goroutine dump list... [node 2] requesting goroutine dump list: creating error output: debug/nodes/2/goroutines.err.txt... [node 2] requesting goroutine dump list: done @@ -520,14 +520,14 @@ zip [node 3] requesting data for debug/nodes/3/details: done [node 3] requesting data for debug/nodes/3/details: received response... [node 3] requesting data for debug/nodes/3/details: writing JSON output: debug/nodes/3/details.json... -[node 3] requesting data for debug/nodes/3/enginestats... -[node 3] requesting data for debug/nodes/3/enginestats: done -[node 3] requesting data for debug/nodes/3/enginestats: received response... -[node 3] requesting data for debug/nodes/3/enginestats: writing JSON output: debug/nodes/3/enginestats.json... [node 3] requesting data for debug/nodes/3/gossip... [node 3] requesting data for debug/nodes/3/gossip: done [node 3] requesting data for debug/nodes/3/gossip: received response... [node 3] requesting data for debug/nodes/3/gossip: writing JSON output: debug/nodes/3/gossip.json... +[node 3] requesting engine stats... +[node 3] requesting engine stats: done +[node 3] requesting engine stats: received response... +[node 3] requesting engine stats: writing binary output: debug/nodes/3/lsm.txt... [node 3] requesting goroutine dump list... [node 3] requesting goroutine dump list: creating error output: debug/nodes/3/goroutines.err.txt... [node 3] requesting goroutine dump list: done diff --git a/pkg/cli/testdata/zip/testzip_exclude_goroutine_stacks b/pkg/cli/testdata/zip/testzip_exclude_goroutine_stacks index 1f917cc69ce1..0fdf44914143 100644 --- a/pkg/cli/testdata/zip/testzip_exclude_goroutine_stacks +++ b/pkg/cli/testdata/zip/testzip_exclude_goroutine_stacks @@ -117,9 +117,9 @@ debug zip --concurrency=1 --cpu-profile-duration=1s --include-goroutine-stacks=f [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] Skipping fetching goroutine stacks. Enable via the --include-goroutine-stacks flag. [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... done [node ?] ? heap profiles found [node 1] requesting goroutine dump list... received response... done diff --git a/pkg/cli/testdata/zip/testzip_exclude_range_info b/pkg/cli/testdata/zip/testzip_exclude_range_info index 85c15511dee9..5453e2c9230e 100644 --- a/pkg/cli/testdata/zip/testzip_exclude_range_info +++ b/pkg/cli/testdata/zip/testzip_exclude_range_info @@ -113,10 +113,10 @@ debug zip --concurrency=1 --cpu-profile-duration=1s --include-range-info=false / [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... done [node ?] ? heap profiles found [node 1] requesting goroutine dump list... received response... done diff --git a/pkg/cli/testdata/zip/testzip_external_process_virtualization b/pkg/cli/testdata/zip/testzip_external_process_virtualization index 541bcd498eea..4ab199e9edb9 100644 --- a/pkg/cli/testdata/zip/testzip_external_process_virtualization +++ b/pkg/cli/testdata/zip/testzip_external_process_virtualization @@ -137,12 +137,12 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [node 1] requesting data for debug/nodes/1/gossip... received response... [node 1] requesting data for debug/nodes/1/gossip: last request failed: rpc error: ... [node 1] requesting data for debug/nodes/1/gossip: creating error output: debug/nodes/1/gossip.json.err.txt... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... -[node 1] requesting data for debug/nodes/1/enginestats: last request failed: rpc error: ... -[node 1] requesting data for debug/nodes/1/enginestats: creating error output: debug/nodes/1/enginestats.json.err.txt... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... +[node 1] requesting engine stats: last request failed: rpc error: ... +[node 1] requesting engine stats: creating error output: debug/nodes/1/lsm.txt.err.txt... done [node 1] requesting heap profile list... received response... done [node ?] ? heap profiles found [node 1] requesting goroutine dump list... received response... done diff --git a/pkg/cli/testdata/zip/testzip_include_goroutine_stacks b/pkg/cli/testdata/zip/testzip_include_goroutine_stacks index 43bc769e4327..badcb676bd19 100644 --- a/pkg/cli/testdata/zip/testzip_include_goroutine_stacks +++ b/pkg/cli/testdata/zip/testzip_include_goroutine_stacks @@ -117,10 +117,10 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... done [node ?] ? heap profiles found [node 1] requesting goroutine dump list... received response... done diff --git a/pkg/cli/testdata/zip/testzip_include_range_info b/pkg/cli/testdata/zip/testzip_include_range_info index d6b78f525294..3539f7b36c48 100644 --- a/pkg/cli/testdata/zip/testzip_include_range_info +++ b/pkg/cli/testdata/zip/testzip_include_range_info @@ -117,10 +117,10 @@ debug zip --concurrency=1 --cpu-profile-duration=1s --include-range-info /dev/nu [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... done [node ?] ? heap profiles found [node 1] requesting goroutine dump list... received response... done diff --git a/pkg/cli/testdata/zip/testzip_redacted b/pkg/cli/testdata/zip/testzip_redacted index 830bbc30e147..dd3c4860f818 100644 --- a/pkg/cli/testdata/zip/testzip_redacted +++ b/pkg/cli/testdata/zip/testzip_redacted @@ -117,10 +117,10 @@ debug zip --concurrency=1 --cpu-profile-duration=1s --redact /dev/null [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... done [node ?] ? heap profiles found [node 1] requesting goroutine dump list... received response... done diff --git a/pkg/cli/testdata/zip/testzip_shared_process_virtualization b/pkg/cli/testdata/zip/testzip_shared_process_virtualization index a6140072310b..189e943ed901 100644 --- a/pkg/cli/testdata/zip/testzip_shared_process_virtualization +++ b/pkg/cli/testdata/zip/testzip_shared_process_virtualization @@ -117,10 +117,10 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... done [node ?] ? heap profiles found [node 1] requesting goroutine dump list... received response... done @@ -268,12 +268,12 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [node 1] requesting data for debug/cluster/test-tenant/nodes/1/gossip... received response... [node 1] requesting data for debug/cluster/test-tenant/nodes/1/gossip: last request failed: rpc error: ... [node 1] requesting data for debug/cluster/test-tenant/nodes/1/gossip: creating error output: debug/cluster/test-tenant/nodes/1/gossip.json.err.txt... done -[node 1] requesting data for debug/cluster/test-tenant/nodes/1/enginestats... received response... -[node 1] requesting data for debug/cluster/test-tenant/nodes/1/enginestats: last request failed: rpc error: ... -[node 1] requesting data for debug/cluster/test-tenant/nodes/1/enginestats: creating error output: debug/cluster/test-tenant/nodes/1/enginestats.json.err.txt... done [node 1] requesting stacks... received response... writing binary output: debug/cluster/test-tenant/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/cluster/test-tenant/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/cluster/test-tenant/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... +[node 1] requesting engine stats: last request failed: rpc error: ... +[node 1] requesting engine stats: creating error output: debug/cluster/test-tenant/nodes/1/lsm.txt.err.txt... done [node 1] requesting heap profile list... received response... [node 1] requesting heap profile list: last request failed: rpc error: ... [node 1] requesting heap profile list: creating error output: debug/cluster/test-tenant/nodes/1/heapprof.err.txt... done diff --git a/pkg/cli/testdata/zip/testzip_shared_process_virtualization_with_default_tenant b/pkg/cli/testdata/zip/testzip_shared_process_virtualization_with_default_tenant index a6140072310b..189e943ed901 100644 --- a/pkg/cli/testdata/zip/testzip_shared_process_virtualization_with_default_tenant +++ b/pkg/cli/testdata/zip/testzip_shared_process_virtualization_with_default_tenant @@ -117,10 +117,10 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done [node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done [node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done -[node 1] requesting data for debug/nodes/1/enginestats... received response... writing JSON output: debug/nodes/1/enginestats.json... done [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done [node 1] requesting heap profile list... received response... done [node ?] ? heap profiles found [node 1] requesting goroutine dump list... received response... done @@ -268,12 +268,12 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [node 1] requesting data for debug/cluster/test-tenant/nodes/1/gossip... received response... [node 1] requesting data for debug/cluster/test-tenant/nodes/1/gossip: last request failed: rpc error: ... [node 1] requesting data for debug/cluster/test-tenant/nodes/1/gossip: creating error output: debug/cluster/test-tenant/nodes/1/gossip.json.err.txt... done -[node 1] requesting data for debug/cluster/test-tenant/nodes/1/enginestats... received response... -[node 1] requesting data for debug/cluster/test-tenant/nodes/1/enginestats: last request failed: rpc error: ... -[node 1] requesting data for debug/cluster/test-tenant/nodes/1/enginestats: creating error output: debug/cluster/test-tenant/nodes/1/enginestats.json.err.txt... done [node 1] requesting stacks... received response... writing binary output: debug/cluster/test-tenant/nodes/1/stacks.txt... done [node 1] requesting stacks with labels... received response... writing binary output: debug/cluster/test-tenant/nodes/1/stacks_with_labels.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/cluster/test-tenant/nodes/1/heap.pprof... done +[node 1] requesting engine stats... received response... +[node 1] requesting engine stats: last request failed: rpc error: ... +[node 1] requesting engine stats: creating error output: debug/cluster/test-tenant/nodes/1/lsm.txt.err.txt... done [node 1] requesting heap profile list... received response... [node 1] requesting heap profile list: last request failed: rpc error: ... [node 1] requesting heap profile list: creating error output: debug/cluster/test-tenant/nodes/1/heapprof.err.txt... done diff --git a/pkg/cli/zip_per_node.go b/pkg/cli/zip_per_node.go index d58ecf2fc5ec..fdb72d2fb776 100644 --- a/pkg/cli/zip_per_node.go +++ b/pkg/cli/zip_per_node.go @@ -26,6 +26,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/rpc" + "github.com/cockroachdb/cockroach/pkg/server/debug" "github.com/cockroachdb/cockroach/pkg/server/serverpb" "github.com/cockroachdb/cockroach/pkg/server/status/statuspb" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -54,12 +55,6 @@ func makePerNodeZipRequests(prefix, id string, status serverpb.StatusClient) []z }, pathName: prefix + "/gossip", }, - { - fn: func(ctx context.Context) (interface{}, error) { - return status.EngineStats(ctx, &serverpb.EngineStatsRequest{NodeId: id}) - }, - pathName: prefix + "/enginestats", - }, } } @@ -378,6 +373,21 @@ func (zc *debugZipContext) collectPerNodeData( return err } + // Collect storage engine metrics using the same format as the /debug/lsm route. + var lsmStats string + s = nodePrinter.start("requesting engine stats") + requestErr = zc.runZipFn(ctx, s, + func(ctx context.Context) error { + resp, err := zc.status.EngineStats(ctx, &serverpb.EngineStatsRequest{NodeId: id}) + if err == nil { + lsmStats = debug.FormatLSMStats(resp.StatsByStoreId) + } + return err + }) + if err := zc.z.createRawOrError(s, prefix+"/lsm.txt", []byte(lsmStats), requestErr); err != nil { + return err + } + // Collect all relevant heap profiles. if err := zc.collectFileList(ctx, nodePrinter, id, prefix, serverpb.FileType_HEAP); err != nil { return err diff --git a/pkg/server/debug/BUILD.bazel b/pkg/server/debug/BUILD.bazel index f071957b01f4..491e39ba3c2c 100644 --- a/pkg/server/debug/BUILD.bazel +++ b/pkg/server/debug/BUILD.bazel @@ -16,7 +16,6 @@ go_library( "//pkg/base/serverident", "//pkg/kv/kvserver", "//pkg/kv/kvserver/closedts/sidetransport", - "//pkg/kv/kvserver/kvstorage", "//pkg/multitenant/tenantcapabilities", "//pkg/roachpb", "//pkg/server/debug/goroutineui", diff --git a/pkg/server/debug/server.go b/pkg/server/debug/server.go index 9bfd31b06d10..76c583783fe3 100644 --- a/pkg/server/debug/server.go +++ b/pkg/server/debug/server.go @@ -11,19 +11,18 @@ package debug import ( - "context" "expvar" "fmt" "io" "net/http" "net/http/pprof" "strconv" + "strings" "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/base/serverident" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/sidetransport" - "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvstorage" "github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/server/debug/goroutineui" @@ -221,6 +220,30 @@ func (ds *Server) RegisterWorkloadCollector(stores *kvserver.Stores) error { return nil } +// GetLSMStats creates a mapping between store IDs and LSM stats for all of the +// provided storage engines. +func GetLSMStats(engines []storage.Engine) (map[roachpb.StoreID]string, error) { + stats := make(map[roachpb.StoreID]string) + for _, eng := range engines { + storeID, err := eng.GetStoreID() + if err != nil { + return nil, err + } + stats[roachpb.StoreID(storeID)] = eng.GetMetrics().String() + } + + return stats, nil +} + +// FormatLSMStats combines LSM stats from multiple stores into a single string. +func FormatLSMStats(stats map[roachpb.StoreID]string) string { + var sb strings.Builder + for storeID, stat := range stats { + sb.WriteString(fmt.Sprintf("Store %d:\n%s\n\n", storeID, stat)) + } + return sb.String() +} + // RegisterEngines setups up debug engine endpoints for the known storage engines. func (ds *Server) RegisterEngines(specs []base.StoreSpec, engines []storage.Engine) error { if len(specs) != len(engines) { @@ -228,21 +251,12 @@ func (ds *Server) RegisterEngines(specs []base.StoreSpec, engines []storage.Engi return errors.New("number of store specs must match number of engines") } - storeIDs := make([]roachpb.StoreIdent, len(engines)) - for i := range engines { - id, err := kvstorage.ReadStoreIdent(context.Background(), engines[i]) - if err != nil { - return err - } - storeIDs[i] = id - } - ds.mux.HandleFunc("/debug/lsm", func(w http.ResponseWriter, req *http.Request) { - for i := range engines { - fmt.Fprintf(w, "Store %d:\n", storeIDs[i].StoreID) - _, _ = io.WriteString(w, engines[i].GetMetrics().String()) - fmt.Fprintln(w) + stats, err := GetLSMStats(engines) + if err != nil { + fmt.Fprintf(w, "error retrieving LSM stats: %v", err) } + fmt.Fprint(w, FormatLSMStats(stats)) }) for i := 0; i < len(specs); i++ { @@ -251,8 +265,13 @@ func (ds *Server) RegisterEngines(specs []base.StoreSpec, engines []storage.Engi continue } + storeID, err := engines[i].GetStoreID() + if err != nil { + return err + } + dir := specs[i].Path - ds.mux.HandleFunc(fmt.Sprintf("/debug/lsm-viz/%d", storeIDs[i].StoreID), + ds.mux.HandleFunc(fmt.Sprintf("/debug/lsm-viz/%d", storeID), func(w http.ResponseWriter, req *http.Request) { if err := analyzeLSM(dir, w); err != nil { fmt.Fprintf(w, "error analyzing LSM at %s: %v", dir, err) diff --git a/pkg/server/server.go b/pkg/server/server.go index 4ece740c6a23..725372a4356f 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -1006,6 +1006,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (serverctl.ServerStartupInterf storePool, rpcContext, node.stores, + &engines, stopper, sessionRegistry, closedSessionCache, diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index 27cd1a42ccd0..7cddd3f1e269 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -599,16 +599,6 @@ message GossipRequest { bool redact = 2; } -message EngineStatsInfo { - int32 store_id = 1 [ - (gogoproto.customname) = "StoreID", - (gogoproto.casttype) = - "github.com/cockroachdb/cockroach/pkg/roachpb.StoreID" - ]; - cockroach.storage.enginepb.TickersAndHistograms tickers_and_histograms = 2; - cockroach.storage.enginepb.EngineType engine_type = 3; -} - message EngineStatsRequest { // node_id is a string so that "local" can be used to specify that no // forwarding is necessary. @@ -616,7 +606,11 @@ message EngineStatsRequest { } message EngineStatsResponse { - repeated EngineStatsInfo stats = 1 [ (gogoproto.nullable) = false ]; + // maps store IDs to pretty-printed stats about the store's LSM. + map stats_by_store_id = 2 [ + (gogoproto.castkey) = "github.com/cockroachdb/cockroach/pkg/roachpb.StoreID", + (gogoproto.nullable) = false + ]; } message DownloadSpanRequest { diff --git a/pkg/server/status.go b/pkg/server/status.go index 2bd59cbe8ae7..7370fad95018 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -52,6 +52,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/security/username" "github.com/cockroachdb/cockroach/pkg/server/apiconstants" "github.com/cockroachdb/cockroach/pkg/server/authserver" + "github.com/cockroachdb/cockroach/pkg/server/debug" "github.com/cockroachdb/cockroach/pkg/server/diagnostics/diagnosticspb" "github.com/cockroachdb/cockroach/pkg/server/privchecker" "github.com/cockroachdb/cockroach/pkg/server/serverpb" @@ -505,6 +506,7 @@ type systemStatusServer struct { gossip *gossip.Gossip storePool *storepool.StorePool stores *kvserver.Stores + engines *Engines nodeLiveness *liveness.NodeLiveness spanConfigReporter spanconfig.Reporter rangeStatsFetcher *rangestats.Fetcher @@ -622,6 +624,7 @@ func newSystemStatusServer( storePool *storepool.StorePool, rpcCtx *rpc.Context, stores *kvserver.Stores, + engines *Engines, stopper *stop.Stopper, sessionRegistry *sql.SessionRegistry, closedSessionCache *sql.ClosedSessionCache, @@ -657,6 +660,7 @@ func newSystemStatusServer( gossip: gossip, storePool: storePool, stores: stores, + engines: engines, nodeLiveness: nodeLiveness, spanConfigReporter: spanConfigReporter, rangeStatsFetcher: rangeStatsFetcher, @@ -786,21 +790,14 @@ func (s *systemStatusServer) EngineStats( return status.EngineStats(ctx, req) } - resp := new(serverpb.EngineStatsResponse) - err = s.stores.VisitStores(func(store *kvserver.Store) error { - engineStatsInfo := serverpb.EngineStatsInfo{ - StoreID: store.Ident.StoreID, - TickersAndHistograms: nil, - EngineType: store.TODOEngine().Type(), - } - - resp.Stats = append(resp.Stats, engineStatsInfo) - return nil - }) + stats, err := debug.GetLSMStats(*s.engines) if err != nil { return nil, srverrors.ServerError(ctx, err) } - return resp, nil + + return &serverpb.EngineStatsResponse{ + StatsByStoreId: stats, + }, nil } // Allocator returns simulated allocator info for the ranges on the given node. diff --git a/pkg/server/storage_api/BUILD.bazel b/pkg/server/storage_api/BUILD.bazel index 504939bd3581..fab6a613820f 100644 --- a/pkg/server/storage_api/BUILD.bazel +++ b/pkg/server/storage_api/BUILD.bazel @@ -48,11 +48,11 @@ go_test( "//pkg/security/username", "//pkg/server", "//pkg/server/apiconstants", + "//pkg/server/debug", "//pkg/server/decommissioning", "//pkg/server/serverpb", "//pkg/server/srvtestutils", "//pkg/server/status/statuspb", - "//pkg/storage/enginepb", "//pkg/testutils", "//pkg/testutils/serverutils", "//pkg/testutils/skip", diff --git a/pkg/server/storage_api/engine_test.go b/pkg/server/storage_api/engine_test.go index 47ccc88619b9..df70a9104026 100644 --- a/pkg/server/storage_api/engine_test.go +++ b/pkg/server/storage_api/engine_test.go @@ -12,12 +12,13 @@ package storage_api_test import ( "context" + "regexp" "testing" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/server/debug" "github.com/cockroachdb/cockroach/pkg/server/serverpb" "github.com/cockroachdb/cockroach/pkg/server/srvtestutils" - "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" @@ -55,41 +56,9 @@ func TestStatusEngineStatsJson(t *testing.T) { return srvtestutils.GetStatusJSONProto(s, "enginestats/local", &engineStats) }) - if len(engineStats.Stats) != 1 { - t.Fatal(errors.Errorf("expected one engine stats, got: %v", engineStats)) - } - - if engineStats.Stats[0].EngineType == enginepb.EngineTypePebble || - engineStats.Stats[0].EngineType == enginepb.EngineTypeDefault { - // Pebble does not have RocksDB style TickersAnd Histogram. - return - } - - tickers := engineStats.Stats[0].TickersAndHistograms.Tickers - if len(tickers) == 0 { - t.Fatal(errors.Errorf("expected non-empty tickers list, got: %v", tickers)) - } - allTickersZero := true - for _, ticker := range tickers { - if ticker != 0 { - allTickersZero = false - } - } - if allTickersZero { - t.Fatal(errors.Errorf("expected some tickers nonzero, got: %v", tickers)) - } - - histograms := engineStats.Stats[0].TickersAndHistograms.Histograms - if len(histograms) == 0 { - t.Fatal(errors.Errorf("expected non-empty histograms list, got: %v", histograms)) - } - allHistogramsZero := true - for _, histogram := range histograms { - if histogram.Max == 0 { - allHistogramsZero = false - } - } - if allHistogramsZero { - t.Fatal(errors.Errorf("expected some histograms nonzero, got: %v", histograms)) + formattedStats := debug.FormatLSMStats(engineStats.StatsByStoreId) + re := regexp.MustCompile(`^(Store \d+:(.|\n)+?)+$`) + if !re.MatchString(formattedStats) { + t.Fatal(errors.Errorf("expected engine metrics to be correctly formatted, got:\n %s", formattedStats)) } }