Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cortex_ingester_tsdb_data_replay_duration_seconds metric #5477

Merged
merged 2 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## master / unreleased
* [ENHANCEMENT] Store Gateway: Added `-store-gateway.enabled-tenants` and `-store-gateway.disabled-tenants` to explicitly enable or disable store-gateway for specific tenants. #5638
* [FEATURE] Ingester: Add per-tenant new metric `cortex_ingester_tsdb_data_replay_duration_seconds`. #5477

## 1.16.0 in progress

Expand Down
7 changes: 7 additions & 0 deletions pkg/ingester/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ type tsdbMetrics struct {
tsdbSnapshotReplayErrorTotal *prometheus.Desc
tsdbOOOHistogram *prometheus.Desc
tsdbMmapChunksTotal *prometheus.Desc
tsdbDataTotalReplayDuration *prometheus.Desc

tsdbExemplarsTotal *prometheus.Desc
tsdbExemplarsInStorage *prometheus.Desc
Expand Down Expand Up @@ -394,6 +395,10 @@ func newTSDBMetrics(r prometheus.Registerer) *tsdbMetrics {
"cortex_ingester_tsdb_chunk_write_queue_operations_total",
"Number of currently tsdb chunk write queues.",
[]string{"user", "operation"}, nil),
tsdbDataTotalReplayDuration: prometheus.NewDesc(
"cortex_ingester_tsdb_data_replay_duration_seconds",
"Time taken to replay the tsdb data on disk.",
[]string{"user"}, nil),
tsdbLoadedBlocks: prometheus.NewDesc(
"cortex_ingester_tsdb_blocks_loaded",
"Number of currently loaded data blocks",
Expand Down Expand Up @@ -516,6 +521,7 @@ func (sm *tsdbMetrics) Describe(out chan<- *prometheus.Desc) {
out <- sm.tsdbChunksRemovedTotal
out <- sm.tsdbMmapChunkCorruptionTotal
out <- sm.tsdbChunkwriteQueueOperationsTotal
out <- sm.tsdbDataTotalReplayDuration
out <- sm.tsdbLoadedBlocks
out <- sm.tsdbSymbolTableSize
out <- sm.tsdbReloads
Expand Down Expand Up @@ -571,6 +577,7 @@ func (sm *tsdbMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfCountersPerUser(out, sm.tsdbChunksRemovedTotal, "prometheus_tsdb_head_chunks_removed_total")
data.SendSumOfCounters(out, sm.tsdbMmapChunkCorruptionTotal, "prometheus_tsdb_mmap_chunk_corruptions_total")
data.SendSumOfCountersPerUserWithLabels(out, sm.tsdbChunkwriteQueueOperationsTotal, "prometheus_tsdb_chunk_write_queue_operations_total", "operation")
data.SendSumOfGaugesPerUser(out, sm.tsdbDataTotalReplayDuration, "prometheus_tsdb_data_replay_duration_seconds")
data.SendSumOfGauges(out, sm.tsdbLoadedBlocks, "prometheus_tsdb_blocks_loaded")
data.SendSumOfGaugesPerUser(out, sm.tsdbSymbolTableSize, "prometheus_tsdb_symbol_table_size_bytes")
data.SendSumOfCounters(out, sm.tsdbReloads, "prometheus_tsdb_reloads_total")
Expand Down
21 changes: 19 additions & 2 deletions pkg/ingester/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,16 @@ func TestTSDBMetrics(t *testing.T) {
cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{user="user2"} 1234
cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{user="user3"} 1234

# HELP cortex_ingester_tsdb_data_replay_duration_seconds Time taken to replay the tsdb data on disk.
# TYPE cortex_ingester_tsdb_data_replay_duration_seconds gauge
cortex_ingester_tsdb_data_replay_duration_seconds{user="user1"} 12345
cortex_ingester_tsdb_data_replay_duration_seconds{user="user2"} 12345
cortex_ingester_tsdb_data_replay_duration_seconds{user="user3"} 12345

# HELP cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total Total number of out of order exemplar ingestion failed attempts.
# TYPE cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total counter
cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total 9

# HELP cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage Number of TSDB series with exemplars currently in storage.
# TYPE cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage gauge
cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{user="user1"} 1
Expand Down Expand Up @@ -458,10 +464,15 @@ func TestTSDBMetricsWithRemoval(t *testing.T) {
cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{user="user1"} 1234
cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{user="user2"} 1234

# HELP cortex_ingester_tsdb_data_replay_duration_seconds Time taken to replay the tsdb data on disk.
# TYPE cortex_ingester_tsdb_data_replay_duration_seconds gauge
cortex_ingester_tsdb_data_replay_duration_seconds{user="user1"} 12345
cortex_ingester_tsdb_data_replay_duration_seconds{user="user2"} 12345

# HELP cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total Total number of out of order exemplar ingestion failed attempts.
# TYPE cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total counter
cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total 9

# HELP cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage Number of TSDB series with exemplars currently in storage.
# TYPE cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage gauge
cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{user="user1"} 1
Expand Down Expand Up @@ -696,6 +707,12 @@ func populateTSDBMetrics(base float64) *prometheus.Registry {
})
mmapChunksTotal.Add(104)

dataTotalReplayDuration := promauto.With(r).NewGauge(prometheus.GaugeOpts{
Name: "prometheus_tsdb_data_replay_duration_seconds",
Help: "Time taken to replay the data on disk.",
})
dataTotalReplayDuration.Set(12345)

loadedBlocks := promauto.With(r).NewGauge(prometheus.GaugeOpts{
Name: "prometheus_tsdb_blocks_loaded",
Help: "Number of currently loaded data blocks",
Expand Down