Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't prefix Thanos index-cache metrics #2627

Merged
merged 10 commits into from
Jun 1, 2020
40 changes: 40 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,46 @@

* [CHANGE] Query Frontend now uses Round Robin to choose a tenant queue to service next. #2553
* [CHANGE] `-promql.lookback-delta` is now deprecated and has been replaced by `-querier.lookback-delta` along with `lookback_delta` entry under `querier` in the config file. `-promql.lookback-delta` will be removed in v1.4.0. #2604
* [CHANGE] Experimental TSDB: Renamed index-cache metrics to use original metric names from Thanos, as Cortex is not aggregating them in any way:
pstibrany marked this conversation as resolved.
Show resolved Hide resolved
* `cortex_<service>_blocks_index_cache_items_evicted_total` => `thanos_store_index_cache_items_evicted_total{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_items_added_total` => `thanos_store_index_cache_items_added_total{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_requests_total` => `thanos_store_index_cache_requests_total{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_items_overflowed_total` => `thanos_store_index_cache_items_overflowed_total{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_hits_total` => `thanos_store_index_cache_hits_total{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_items` => `thanos_store_index_cache_items{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_items_size_bytes` => `thanos_store_index_cache_items_size_bytes{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_total_size_bytes` => `thanos_store_index_cache_total_size_bytes{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_memcached_operations_total` => `thanos_memcached_operations_total{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_memcached_operation_failures_total` => `thanos_memcached_operation_failures_total{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_memcached_operation_duration_seconds` => `thanos_memcached_operation_duration_seconds{name="index-cache"}`
* `cortex_<service>_blocks_index_cache_memcached_operation_skipped_total` => `thanos_memcached_operation_skipped_total{name="index-cache"}`
* [CHANGE] Experimental TSDB: Renamed metrics in bucket stores:
* `cortex_<service>_blocks_meta_syncs_total` => `cortex_blocks_meta_syncs_total{component="<service>"}`
* `cortex_<service>_blocks_meta_sync_failures_total` => `cortex_blocks_meta_sync_failures_total{component="<service>"}`
* `cortex_<service>_blocks_meta_sync_duration_seconds` => `cortex_blocks_meta_sync_duration_seconds{component="<service>"}`
* `cortex_<service>_blocks_meta_sync_consistency_delay_seconds` => `cortex_blocks_meta_sync_consistency_delay_seconds{component="<service>"}`
* `cortex_<service>_blocks_meta_synced` => `cortex_blocks_meta_synced{component="<service>"}`
* `cortex_<service>_bucket_store_block_loads_total` => `cortex_block_loads_total{component="<service>"}`
* `cortex_<service>_bucket_store_block_load_failures_total` => `cortex_block_load_failures_total{component="<service>"}`
* `cortex_<service>_bucket_store_block_drops_total` => `cortex_block_drops_total{component="<service>"}`
* `cortex_<service>_bucket_store_block_drop_failures_total` => `cortex_bucket_store_block_drop_failures_total{component="<service>"}`
* `cortex_<service>_bucket_store_blocks_loaded` => `cortex_bucket_store_blocks_loaded{component="<service>"}`
* `cortex_<service>_bucket_store_series_data_touched` => `cortex_bucket_store_series_data_touched{component="<service>"}`
* `cortex_<service>_bucket_store_series_data_fetched` => `cortex_bucket_store_series_data_fetched{component="<service>"}`
* `cortex_<service>_bucket_store_series_data_size_touched_bytes` => `cortex_bucket_store_series_data_size_touched_bytes{component="<service>"}`
* `cortex_<service>_bucket_store_series_data_size_fetched_bytes` => `cortex_bucket_store_series_data_size_fetched_bytes{component="<service>"}`
* `cortex_<service>_bucket_store_series_blocks_queried` => `cortex_bucket_store_series_blocks_queried{component="<service>"}`
* `cortex_<service>_bucket_store_series_get_all_duration_seconds` => `cortex_bucket_store_series_get_all_duration_seconds{component="<service>"}`
* `cortex_<service>_bucket_store_series_merge_duration_seconds` => `cortex_bucket_store_series_merge_duration_seconds{component="<service>"}`
* `cortex_<service>_bucket_store_series_refetches_total` => `cortex_bucket_store_series_refetches_total{component="<service>"}`
* `cortex_<service>_bucket_store_series_result_series` => `cortex_bucket_store_series_result_series{component="<service>"}`
* `cortex_<service>_bucket_store_cached_postings_compressions_total` => `cortex_bucket_store_cached_postings_compressions_total{component="<service>"}`
* `cortex_<service>_bucket_store_cached_postings_compression_errors_total` => `cortex_bucket_store_cached_postings_compression_errors_total{component="<service>"}`
* `cortex_<service>_bucket_store_cached_postings_compression_time_seconds` => `cortex_bucket_store_cached_postings_compression_time_seconds{component="<service>"}`
* `cortex_<service>_bucket_store_cached_postings_original_size_bytes_total` => `cortex_bucket_store_cached_postings_original_size_bytes_total{component="<service>"}`
* `cortex_<service>_bucket_store_cached_postings_compressed_size_bytes_total` => `cortex_bucket_store_cached_postings_compressed_size_bytes_total{component="<service>"}`
* `cortex_<service>_blocks_sync_seconds` => `cortex_bucket_stores_blocks_sync_seconds{component="<service>"}`
* `cortex_<service>_blocks_last_successful_sync_timestamp_seconds` => `cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="<service>"}`
* [FEATURE] TLS config options added for GRPC clients in Querier (Query-frontend client & Ingester client), Ruler, Store Gateway, as well as HTTP client in Config store client. #2502
* [FEATURE] The flag `-frontend.max-cache-freshness` is now supported within the limits overrides, to specify per-tenant max cache freshness values. The corresponding YAML config parameter has been changed from `results_cache.max_freshness` to `limits_config.max_cache_freshness`. The legacy YAML config parameter (`results_cache.max_freshness`) will continue to be supported till Cortex release `v1.4.0`. #2609
* [ENHANCEMENT] Experimental TSDB: added the following metrics to the ingester: #2580 #2583 #2589
Expand Down
35 changes: 35 additions & 0 deletions integration/e2e/composite_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,25 @@ func (s *CompositeHTTPService) WaitSumMetrics(isExpected func(sums ...float64) b
return fmt.Errorf("unable to find metrics %s with expected values. Last values: %v", metricNames, sums)
}

func (s *CompositeHTTPService) WaitSumMetricWithLabels(isExpected func(sums float64) bool, metricName string, expectedLabels map[string]string) error {
lastSum := 0.0

for s.retryBackoff.Reset(); s.retryBackoff.Ongoing(); {
lastSum, err := s.SumMetricWithLabels(metricName, expectedLabels)
if err != nil {
return err
}

if isExpected(lastSum) {
return nil
}

s.retryBackoff.Wait()
}

return fmt.Errorf("unable to find metric %s with labels %v with expected value. Last value: %v", metricName, expectedLabels, lastSum)
}

// SumMetrics returns the sum of the values of each given metric names.
func (s *CompositeHTTPService) SumMetrics(metricNames ...string) ([]float64, error) {
sums := make([]float64, len(metricNames))
Expand All @@ -81,3 +100,19 @@ func (s *CompositeHTTPService) SumMetrics(metricNames ...string) ([]float64, err

return sums, nil
}

// SumMetricWithLabels returns the sum of the values of metric with matching labels across all services.
func (s *CompositeHTTPService) SumMetricWithLabels(metricName string, expectedLabels map[string]string) (float64, error) {
sum := 0.0

for _, service := range s.services {
s, err := service.SumMetricWithLabels(metricName, expectedLabels)
if err != nil {
return 0, err
}

sum += s
}

return sum, nil
}
85 changes: 58 additions & 27 deletions integration/e2e/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"github.com/go-kit/kit/log"
"github.com/pkg/errors"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
"github.com/thanos-io/thanos/pkg/runutil"

Expand Down Expand Up @@ -575,44 +576,74 @@ func (s *HTTPService) SumMetrics(metricNames ...string) ([]float64, error) {
// wait continues. If no such matching metric can be found or wait times out, function returns error.
func (s *HTTPService) WaitForMetricWithLabels(okFn func(v float64) bool, metricName string, expectedLabels map[string]string) error {
for s.retryBackoff.Reset(); s.retryBackoff.Ongoing(); {
metrics, err := s.Metrics()
ms, err := s.getMetricsMatchingLabels(metricName, expectedLabels)
if err != nil {
return err
}

var tp expfmt.TextParser
families, err := tp.TextToMetricFamilies(strings.NewReader(metrics))
if err != nil {
return err
for _, m := range ms {
if okFn(getValue(m)) {
return nil
}
}

mf, ok := families[metricName]
if !ok {
return errors.Errorf("metric %s not found in %s metric page", metricName, s.name)
}
s.retryBackoff.Wait()
}

for _, m := range mf.GetMetric() {
// check if some metric has all required labels
metricLabels := map[string]string{}
for _, lp := range m.GetLabel() {
metricLabels[lp.GetName()] = lp.GetValue()
}
return fmt.Errorf("unable to find metric %s with labels %v with expected value", metricName, expectedLabels)
}

matches := true
for k, v := range expectedLabels {
if mv, ok := metricLabels[k]; !ok || mv != v {
matches = false
break
}
}
// Returns sum of all metrics matching given labels.
func (s *HTTPService) SumMetricWithLabels(metricName string, expectedLabels map[string]string) (float64, error) {
sum := 0.0
ms, err := s.getMetricsMatchingLabels(metricName, expectedLabels)
if err != nil {
return 0, err
}

if matches && okFn(getValue(m)) {
return nil
for _, m := range ms {
sum += getValue(m)
}
return sum, nil
}

func (s *HTTPService) getMetricsMatchingLabels(metricName string, expectedLabels map[string]string) ([]*dto.Metric, error) {
metrics, err := s.Metrics()
if err != nil {
return nil, err
}

var tp expfmt.TextParser
families, err := tp.TextToMetricFamilies(strings.NewReader(metrics))
if err != nil {
return nil, err
}

mf, ok := families[metricName]
if !ok {
return nil, errors.Errorf("metric %s not found in %s metric page", metricName, s.name)
}

result := []*dto.Metric(nil)

for _, m := range mf.GetMetric() {
// check if some metric has all required labels
metricLabels := map[string]string{}
for _, lp := range m.GetLabel() {
metricLabels[lp.GetName()] = lp.GetValue()
}

matches := true
for k, v := range expectedLabels {
if mv, ok := metricLabels[k]; !ok || mv != v {
matches = false
break
}
}

s.retryBackoff.Wait()
if matches {
result = append(result, m)
}
}

return fmt.Errorf("unable to find metric %s with labels %v with expected value", metricName, expectedLabels)
return result, nil
}
Loading