Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TSDB metrics update, metadata cache metrics #85

Merged
merged 3 commits into from
Jun 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cortex-mixin/alerts/blocks.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@
alert: 'CortexStoreGatewayHasNotSyncTheBucket',
'for': '5m',
expr: |||
(time() - cortex_storegateway_blocks_last_successful_sync_timestamp_seconds{%s} > 60 * 30)
(time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway",%s} > 60 * 30)
and
cortex_storegateway_blocks_last_successful_sync_timestamp_seconds{%s} > 0
cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway",%s} > 0
||| % [$.namespace_matcher(''), $.namespace_matcher('')],
labels: {
severity: 'critical',
Expand Down
31 changes: 29 additions & 2 deletions cortex-mixin/dashboards/dashboard-utils.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
{ yaxes: $.yaxes('percentunit') },
)
.addPanel(
$.panel('Op: ObjectSize') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="%s",operation="objectsize"}' % [$.namespaceMatcher(), component]),
$.panel('Op: Attributes') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="%s",operation="attributes"}' % [$.namespaceMatcher(), component]),
)
.addPanel(
$.panel('Op: Exists') +
Expand All @@ -202,4 +202,31 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panel('Op: Delete') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="%s",operation="delete"}' % [$.namespaceMatcher(), component]),
),

thanosMemcachedCache(title, jobName, component, cacheName)::
super.row(title)
.addPanel(
$.panel('QPS') +
$.queryPanel('sum by(operation) (rate(thanos_memcached_operations_total{%s,component="%s",name="%s"}[$__interval]))' % [$.jobMatcher(jobName), component, cacheName], '{{operation}}') +
$.stack +
{ yaxes: $.yaxes('ops') },
)
.addPanel(
$.panel('Latency (getmulti)') +
$.latencyPanel('thanos_memcached_operation_duration_seconds', '{%s,operation="getmulti",component="%s",name="%s"}' % [$.jobMatcher(jobName), component, cacheName])
)
.addPanel(
$.panel('Hit ratio') +
$.queryPanel('sum(rate(thanos_cache_memcached_hits_total{%s,component="%s",name="%s"}[$__interval])) / sum(rate(thanos_cache_memcached_requests_total{%s,component="%s",name="%s"}[$__interval]))' %
[
$.jobMatcher(jobName),
component,
cacheName,
$.jobMatcher(jobName),
component,
cacheName,
], 'items') +
{ yaxes: $.yaxes('percentunit') },
),

}
4 changes: 2 additions & 2 deletions cortex-mixin/dashboards/object-store.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
.addRow(
$.row('')
.addPanel(
$.panel('Op: ObjectSize') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,operation="objectsize"}' % $.namespaceMatcher()),
$.panel('Op: Attributes') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,operation="attributes"}' % $.namespaceMatcher()),
)
.addPanel(
$.panel('Op: Upload') +
Expand Down
22 changes: 11 additions & 11 deletions cortex-mixin/dashboards/queries.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,18 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('Store-gateway - Blocks')
.addPanel(
$.panel('Blocks queried / sec') +
$.queryPanel('sum(rate(cortex_storegateway_bucket_store_series_blocks_queried_sum{%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway), 'blocks') +
$.queryPanel('sum(rate(cortex_bucket_store_series_blocks_queried_sum{component="store-gateway",%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway), 'blocks') +
{ yaxes: $.yaxes('ops') },
)
.addPanel(
$.panel('Data fetched / sec') +
$.queryPanel('sum by(data_type) (rate(cortex_storegateway_bucket_store_series_data_fetched_sum{%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway), '{{data_type}}') +
$.queryPanel('sum by(data_type) (rate(cortex_bucket_store_series_data_fetched_sum{component="store-gateway",%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway), '{{data_type}}') +
$.stack +
{ yaxes: $.yaxes('ops') },
)
.addPanel(
$.panel('Data touched / sec') +
$.queryPanel('sum by(data_type) (rate(cortex_storegateway_bucket_store_series_data_touched_sum{%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway), '{{data_type}}') +
$.queryPanel('sum by(data_type) (rate(cortex_bucket_store_series_data_touched_sum{component="store-gateway",%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway), '{{data_type}}') +
$.stack +
{ yaxes: $.yaxes('ops') },
)
Expand All @@ -150,36 +150,36 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('')
.addPanel(
$.panel('Series fetch duration (per request)') +
$.latencyPanel('cortex_storegateway_bucket_store_series_get_all_duration_seconds', '{%s}' % $.jobMatcher($._config.job_names.store_gateway)),
$.latencyPanel('cortex_bucket_store_series_get_all_duration_seconds', '{component="store-gateway",%s}' % $.jobMatcher($._config.job_names.store_gateway)),
)
.addPanel(
$.panel('Series merge duration (per request)') +
$.latencyPanel('cortex_storegateway_bucket_store_series_merge_duration_seconds', '{%s}' % $.jobMatcher($._config.job_names.store_gateway)),
$.latencyPanel('cortex_bucket_store_series_merge_duration_seconds', '{component="store-gateway",%s}' % $.jobMatcher($._config.job_names.store_gateway)),
)
.addPanel(
$.panel('Series returned (per request)') +
$.queryPanel('sum(rate(cortex_storegateway_bucket_store_series_result_series_sum{%s}[$__interval])) / sum(rate(cortex_storegateway_bucket_store_series_result_series_count{%s}[$__interval]))' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)], 'avg series returned'),
$.queryPanel('sum(rate(cortex_bucket_store_series_result_series_sum{component="store-gateway",%s}[$__interval])) / sum(rate(cortex_bucket_store_series_result_series_count{component="store-gateway",%s}[$__interval]))' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)], 'avg series returned'),
)
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.row('')
.addPanel(
$.panel('Blocks currently loaded') +
$.queryPanel('cortex_storegateway_bucket_store_blocks_loaded{%s}' % $.jobMatcher($._config.job_names.store_gateway), '{{instance}}')
$.queryPanel('cortex_bucket_store_blocks_loaded{component="store-gateway",%s}' % $.jobMatcher($._config.job_names.store_gateway), '{{instance}}')
)
.addPanel(
$.successFailurePanel(
'Blocks loaded / sec',
'sum(rate(cortex_storegateway_bucket_store_block_loads_total{%s}[$__interval])) - sum(rate(cortex_storegateway_bucket_store_block_load_failures_total{%s}[$__interval]))' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)],
'sum(rate(cortex_storegateway_bucket_store_block_load_failures_total{%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway),
'sum(rate(cortex_bucket_store_block_loads_total{component="store-gateway",%s}[$__interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component="store-gateway",%s}[$__interval]))' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)],
'sum(rate(cortex_bucket_store_block_load_failures_total{component="store-gateway",%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway),
)
)
.addPanel(
$.successFailurePanel(
'Blocks dropped / sec',
'sum(rate(cortex_storegateway_bucket_store_block_drops_total{%s}[$__interval])) - sum(rate(cortex_storegateway_bucket_store_block_drop_failures_total{%s}[$__interval]))' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)],
'sum(rate(cortex_storegateway_bucket_store_block_drop_failures_total{%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway),
'sum(rate(cortex_bucket_store_block_drops_total{component="store-gateway",%s}[$__interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component="store-gateway",%s}[$__interval]))' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)],
'sum(rate(cortex_bucket_store_block_drop_failures_total{component="store-gateway",%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway),
)
)
),
Expand Down
33 changes: 13 additions & 20 deletions cortex-mixin/dashboards/reads.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -97,41 +97,34 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.row('Memcached - Blocks Storage - Index header')
$.row('Memcached Blocks Storage Index header (Store-gateway)')
.addPanel(
$.panel('QPS') +
$.queryPanel('sum by(operation) (rate(cortex_storegateway_blocks_index_cache_memcached_operation_duration_seconds_count{%s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway), '{{operation}}') +
$.queryPanel('sum by(operation) (rate(thanos_memcached_operations_total{component="store-gateway",name="index-cache", %s}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway), '{{operation}}') +
$.stack +
{ yaxes: $.yaxes('ops') },
)
.addPanel(
$.panel('Latency (getmulti)') +
$.latencyPanel('cortex_storegateway_blocks_index_cache_memcached_operation_duration_seconds', '{%s,operation="getmulti"}' % $.jobMatcher($._config.job_names.store_gateway))
$.latencyPanel('thanos_memcached_operation_duration_seconds', '{%s,operation="getmulti",component="store-gateway",name="index-cache"}' % $.jobMatcher($._config.job_names.store_gateway))
)
.addPanel(
$.panel('Hit ratio') +
$.queryPanel('sum by(item_type) (rate(cortex_storegateway_blocks_index_cache_hits_total{%s}[$__interval])) / sum by(item_type) (rate(cortex_storegateway_blocks_index_cache_requests_total{%s}[$__interval]))' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)], '{{item_type}}') +
$.queryPanel('sum by(item_type) (rate(thanos_store_index_cache_hits_total{component="store-gateway",%s}[$__interval])) / sum by(item_type) (rate(thanos_store_index_cache_requests_total{component="store-gateway",%s}[$__interval]))' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)], '{{item_type}}') +
{ yaxes: $.yaxes('percentunit') },
)
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.row('Memcached - Blocks Storage - Chunks')
.addPanel(
$.panel('QPS') +
$.queryPanel('sum by(operation) (rate(cortex_storegateway_thanos_memcached_operations_total{%s,name="chunks-cache"}[$__interval]))' % $.jobMatcher($._config.job_names.store_gateway), '{{operation}}') +
$.stack +
{ yaxes: $.yaxes('ops') },
)
.addPanel(
$.panel('Latency (getmulti)') +
$.latencyPanel('cortex_storegateway_thanos_memcached_operation_duration_seconds', '{%s,operation="getmulti",name="chunks-cache"}' % $.jobMatcher($._config.job_names.store_gateway))
)
.addPanel(
$.panel('Hit ratio') +
$.queryPanel('sum(rate(cortex_storegateway_thanos_cache_memcached_hits_total{%s,name="chunks-cache"}[$__interval])) / sum(rate(cortex_storegateway_thanos_cache_memcached_requests_total{%s,name="chunks-cache"}[$__interval]))' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)], 'chunks') +
{ yaxes: $.yaxes('percentunit') },
)
$.thanosMemcachedCache('Memcached – Blocks Storage – Chunks (Store-gateway)', $._config.job_names.store_gateway, 'store-gateway', 'chunks-cache')
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.thanosMemcachedCache('Memcached – Blocks Storage – Metadada (Store-gateway)', $._config.job_names.store_gateway, 'store-gateway', 'metadata-cache')
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.thanosMemcachedCache('Memcached – Blocks Storage – Metadada (Querier)', $._config.job_names.querier, 'querier', 'metadata-cache')
)
.addRowIf(
std.setMember('chunks', $._config.storage_engine) &&
Expand Down