Skip to content

Commit

Permalink
Create cortex_reduced_resolution_histogram_samples_total metric (#6182
Browse files Browse the repository at this point in the history
)

* Create new metric to count how many native histrogram samples had the resolution reduced

Signed-off-by: alanprot <alanprot@gmail.com>

* changelog

Signed-off-by: alanprot <alanprot@gmail.com>

---------

Signed-off-by: alanprot <alanprot@gmail.com>
  • Loading branch information
alanprot authored Aug 30, 2024
1 parent 2f4e1fd commit 39a168d
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

* [ENHANCEMENT] Ruler: Add new ruler metric `cortex_ruler_rule_groups_in_store` that is the total rule groups per tenant in store, which can be used to compare with `cortex_prometheus_rule_group_rules` to count the number of rule groups that are not loaded by a ruler. #5869
* [ENHANCEMENT] Ruler: Add query statistics metrics when --ruler.query-stats-enabled=true. #6173
* [ENHANCEMENT] Distributor: Add new `cortex_reduced_resolution_histogram_samples_total` metric to to track the number of histogram samples which resolution was reduced. #6182

## 1.18.0 in progress

Expand Down
33 changes: 27 additions & 6 deletions pkg/util/validation/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,10 @@ const (
)

type ValidateMetrics struct {
DiscardedSamples *prometheus.CounterVec
DiscardedExemplars *prometheus.CounterVec
DiscardedMetadata *prometheus.CounterVec
DiscardedSamples *prometheus.CounterVec
DiscardedExemplars *prometheus.CounterVec
DiscardedMetadata *prometheus.CounterVec
HistogramSamplesReducedResolution *prometheus.CounterVec
}

func registerCollector(r prometheus.Registerer, c prometheus.Collector) {
Expand Down Expand Up @@ -111,10 +112,19 @@ func NewValidateMetrics(r prometheus.Registerer) *ValidateMetrics {
[]string{discardReasonLabel, "user"},
)
registerCollector(r, discardedMetadata)
histogramSamplesReducedResolution := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "cortex_reduced_resolution_histogram_samples_total",
Help: "The total number of histogram samples that had the resolution reduced.",
},
[]string{"user"},
)
registerCollector(r, histogramSamplesReducedResolution)
m := &ValidateMetrics{
DiscardedSamples: discardedSamples,
DiscardedExemplars: discardedExemplars,
DiscardedMetadata: discardedMetadata,
DiscardedSamples: discardedSamples,
DiscardedExemplars: discardedExemplars,
DiscardedMetadata: discardedMetadata,
HistogramSamplesReducedResolution: histogramSamplesReducedResolution,
}

return m
Expand Down Expand Up @@ -286,13 +296,17 @@ func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, u
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
}
fh := cortexpb.FloatHistogramProtoToFloatHistogram(histogramSample)
oBuckets := len(fh.PositiveBuckets) + len(fh.NegativeBuckets)
for len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > limits.MaxNativeHistogramBuckets {
if fh.Schema <= histogram.ExponentialSchemaMin {
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
}
fh = fh.ReduceResolution(fh.Schema - 1)
}
if oBuckets != len(fh.PositiveBuckets)+len(fh.NegativeBuckets) {
validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID).Inc()
}
// If resolution reduced, convert new float histogram to protobuf type again.
return cortexpb.FloatHistogramToHistogramProto(histogramSample.TimestampMs, fh), nil
}
Expand All @@ -308,13 +322,17 @@ func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, u
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
}
h := cortexpb.HistogramProtoToHistogram(histogramSample)
oBuckets := len(h.PositiveBuckets) + len(h.NegativeBuckets)
for len(h.PositiveBuckets)+len(h.NegativeBuckets) > limits.MaxNativeHistogramBuckets {
if h.Schema <= histogram.ExponentialSchemaMin {
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
}
h = h.ReduceResolution(h.Schema - 1)
}
if oBuckets != len(h.PositiveBuckets)+len(h.NegativeBuckets) {
validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID).Inc()
}
// If resolution reduced, convert new histogram to protobuf type again.
return cortexpb.HistogramToHistogramProto(histogramSample.TimestampMs, h), nil
}
Expand All @@ -331,4 +349,7 @@ func DeletePerUserValidationMetrics(validateMetrics *ValidateMetrics, userID str
if err := util.DeleteMatchingLabels(validateMetrics.DiscardedMetadata, filter); err != nil {
level.Warn(log).Log("msg", "failed to remove cortex_discarded_metadata_total metric for user", "user", userID, "err", err)
}
if err := util.DeleteMatchingLabels(validateMetrics.HistogramSamplesReducedResolution, filter); err != nil {
level.Warn(log).Log("msg", "failed to remove cortex_reduced_resolution_histogram_samples_total metric for user", "user", userID, "err", err)
}
}
9 changes: 9 additions & 0 deletions pkg/util/validation/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ func TestValidateNativeHistogram(t *testing.T) {
for _, tc := range []struct {
name string
bucketLimit int
resolutionReduced bool
histogram cortexpb.Histogram
expectedHistogram cortexpb.Histogram
expectedErr error
Expand Down Expand Up @@ -341,12 +342,14 @@ func TestValidateNativeHistogram(t *testing.T) {
bucketLimit: 6,
histogram: cortexpb.HistogramToHistogramProto(0, h.Copy()),
expectedHistogram: cortexpb.HistogramToHistogramProto(0, h.Copy().ReduceResolution(0)),
resolutionReduced: true,
},
{
name: "exceed limit and reduce resolution for 1 level, float histogram",
bucketLimit: 6,
histogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy()),
expectedHistogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy().ReduceResolution(0)),
resolutionReduced: true,
},
{
name: "exceed limit and reduce resolution for 2 levels, histogram",
Expand Down Expand Up @@ -394,7 +397,13 @@ func TestValidateNativeHistogram(t *testing.T) {
if tc.expectedErr != nil {
require.Equal(t, tc.expectedErr, actualErr)
require.Equal(t, float64(1), testutil.ToFloat64(validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID)))
// Should never increment if error was returned
require.Equal(t, float64(0), testutil.ToFloat64(validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID)))

} else {
if tc.resolutionReduced {
require.Equal(t, float64(1), testutil.ToFloat64(validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID)))
}
require.NoError(t, actualErr)
require.Equal(t, tc.expectedHistogram, actualHistogram)
}
Expand Down

0 comments on commit 39a168d

Please sign in to comment.