diff --git a/CHANGELOG.md b/CHANGELOG.md index 32d8505b9f..d84738f710 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ * [CHANGE] Querier: Deprecate and enable by default `querier.ingester-metadata-streaming` flag. #6147 * [CHANGE] QueryFrontend/QueryScheduler: Deprecate `-querier.max-outstanding-requests-per-tenant` and `-query-scheduler.max-outstanding-requests-per-tenant` flags. Use frontend.max-outstanding-requests-per-tenant instead. #6146 * [CHANGE] Ingesters: Enable 'snappy-block' compression on ingester clients by default. #6148 +* [CHANGE] Ruler: Scheduling `ruler.evaluation-delay-duration` to be deprecated. Use the highest value between `ruler.evaluation-delay-duration` and `ruler.query-offset` #6149 * [FEATURE] Ingester/Distributor: Experimental: Enable native histogram ingestion via `-blocks-storage.tsdb.enable-native-histograms` flag. #5986 #6010 #6020 * [FEATURE] Querier: Enable querying native histogram chunks. #5944 #6031 * [FEATURE] Query Frontend: Support native histogram in query frontend response. #5996 #6043 diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 50fdbdde75..b0330fd454 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -3333,6 +3333,7 @@ query_rejection: # them. [query_attributes: | default = []] +# Deprecated(use ruler.query-offset instead) and will be removed in v1.19.0: # Duration to delay the evaluation of rules to ensure the underlying metrics # have been pushed to Cortex. # CLI flag: -ruler.evaluation-delay-duration diff --git a/integration/configs.go b/integration/configs.go index 07d9e78b15..c95a6bdc76 100644 --- a/integration/configs.go +++ b/integration/configs.go @@ -63,14 +63,6 @@ receivers: labels: {} annotations: {} ` - - cortexRulerEvalStaleNanConfigYaml = `groups: -- name: rule - interval: 1s - rules: - - record: stale_nan_eval - expr: a_sometimes_stale_nan_series * 2 -` ) var ( diff --git a/integration/ruler_test.go b/integration/ruler_test.go index 2da8db2e01..c05fd2ceb4 100644 --- a/integration/ruler_test.go +++ b/integration/ruler_test.go @@ -10,7 +10,6 @@ import ( "crypto/x509/pkix" "encoding/json" "fmt" - "math" "math/rand" "net/http" "os" @@ -24,7 +23,6 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/rulefmt" - "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/prompb" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -200,119 +198,6 @@ func TestRulerAPISingleBinary(t *testing.T) { require.NoError(t, cortexRestarted.WaitSumMetrics(e2e.Equals(1), "cortex_ruler_managers_total")) } -func TestRulerEvaluationDelay(t *testing.T) { - s, err := e2e.NewScenario(networkName) - require.NoError(t, err) - defer s.Close() - - namespace := "ns" - user := "fake" - - evaluationDelay := time.Minute * 5 - - configOverrides := map[string]string{ - "-ruler-storage.local.directory": filepath.Join(e2e.ContainerSharedDir, "ruler_configs"), - "-ruler.poll-interval": "2s", - "-ruler.rule-path": filepath.Join(e2e.ContainerSharedDir, "rule_tmp/"), - "-ruler.evaluation-delay-duration": evaluationDelay.String(), - } - - // Start Cortex components. - require.NoError(t, copyFileToSharedDir(s, "docs/configuration/single-process-config-blocks-local.yaml", cortexConfigFile)) - require.NoError(t, writeFileToSharedDir(s, filepath.Join("ruler_configs", user, namespace), []byte(cortexRulerEvalStaleNanConfigYaml))) - cortex := e2ecortex.NewSingleBinaryWithConfigFile("cortex", cortexConfigFile, configOverrides, "", 9009, 9095) - require.NoError(t, s.StartAndWaitReady(cortex)) - - // Create a client with the ruler address configured - c, err := e2ecortex.NewClient(cortex.HTTPEndpoint(), cortex.HTTPEndpoint(), "", cortex.HTTPEndpoint(), "") - require.NoError(t, err) - - now := time.Now() - - // Generate series that includes stale nans - samplesToSend := 10 - series := prompb.TimeSeries{ - Labels: []prompb.Label{ - {Name: "__name__", Value: "a_sometimes_stale_nan_series"}, - {Name: "instance", Value: "sometimes-stale"}, - }, - } - series.Samples = make([]prompb.Sample, samplesToSend) - posStale := 2 - - // Create samples, that are delayed by the evaluation delay with increasing values. - for pos := range series.Samples { - series.Samples[pos].Timestamp = e2e.TimeToMilliseconds(now.Add(-evaluationDelay).Add(time.Duration(pos) * time.Second)) - series.Samples[pos].Value = float64(pos + 1) - - // insert staleness marker at the positions marked by posStale - if pos == posStale { - series.Samples[pos].Value = math.Float64frombits(value.StaleNaN) - } - } - - // Insert metrics - res, err := c.Push([]prompb.TimeSeries{series}) - require.NoError(t, err) - require.Equal(t, 200, res.StatusCode) - - // Get number of rule evaluations just after push - ruleEvaluationsAfterPush, err := cortex.SumMetrics([]string{"cortex_prometheus_rule_evaluations_total"}) - require.NoError(t, err) - - // Wait until the rule is evaluated for the first time - require.NoError(t, cortex.WaitSumMetrics(e2e.Greater(ruleEvaluationsAfterPush[0]), "cortex_prometheus_rule_evaluations_total")) - - // Query the timestamp of the latest result to ensure the evaluation is delayed - result, err := c.Query("timestamp(stale_nan_eval)", now) - require.NoError(t, err) - require.Equal(t, model.ValVector, result.Type()) - - vector := result.(model.Vector) - require.Equal(t, 1, vector.Len(), "expect one sample returned") - - // 290 seconds gives 10 seconds of slack between the rule evaluation and the query - // to account for CI latency, but ensures the latest evaluation was in the past. - var maxDiff int64 = 290_000 - require.GreaterOrEqual(t, e2e.TimeToMilliseconds(time.Now())-int64(vector[0].Value)*1000, maxDiff) - - // Wait until all the pushed samples have been evaluated by the rule. This - // ensures that rule results are successfully written even after a - // staleness period. - require.NoError(t, cortex.WaitSumMetrics(e2e.GreaterOrEqual(ruleEvaluationsAfterPush[0]+float64(samplesToSend)), "cortex_prometheus_rule_evaluations_total")) - - // query all results to verify rules have been evaluated correctly - result, err = c.QueryRange("stale_nan_eval", now.Add(-evaluationDelay), now, time.Second) - require.NoError(t, err) - require.Equal(t, model.ValMatrix, result.Type()) - - matrix := result.(model.Matrix) - require.GreaterOrEqual(t, 1, matrix.Len(), "expect at least a series returned") - - // Iterate through the values recorded and ensure they exist as expected. - inputPos := 0 - for _, m := range matrix { - for _, v := range m.Values { - // Skip values for stale positions - if inputPos == posStale { - inputPos++ - } - - expectedValue := model.SampleValue(2 * (inputPos + 1)) - require.Equal(t, expectedValue, v.Value) - - // Look for next value - inputPos++ - - // We have found all input values - if inputPos >= len(series.Samples) { - break - } - } - } - require.Equal(t, len(series.Samples), inputPos, "expect to have returned all evaluations") -} - func TestRulerSharding(t *testing.T) { const numRulesGroups = 100 diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 9c3fd2f0f9..21b609a4fd 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -13,7 +13,6 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" @@ -46,27 +45,15 @@ type PusherAppender struct { histogramLabels []labels.Labels histograms []cortexpb.Histogram userID string - evaluationDelay time.Duration } func (a *PusherAppender) AppendHistogram(_ storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { if h == nil && fh == nil { return 0, errors.New("no histogram") } - if h != nil { - // A histogram sample is considered stale if its sum is set to NaN. - // https://github.com/prometheus/prometheus/blob/b6ef745016fa9472fdd0ae20f75a9682e01d1e5c/tsdb/head_append.go#L339-L346 - if a.evaluationDelay > 0 && (value.IsStaleNaN(h.Sum)) { - t -= a.evaluationDelay.Milliseconds() - } a.histograms = append(a.histograms, cortexpb.HistogramToHistogramProto(t, h)) } else { - // A histogram sample is considered stale if its sum is set to NaN. - // https://github.com/prometheus/prometheus/blob/b6ef745016fa9472fdd0ae20f75a9682e01d1e5c/tsdb/head_append.go#L339-L346 - if a.evaluationDelay > 0 && (value.IsStaleNaN(fh.Sum)) { - t -= a.evaluationDelay.Milliseconds() - } a.histograms = append(a.histograms, cortexpb.FloatHistogramToHistogramProto(t, fh)) } a.histogramLabels = append(a.histogramLabels, l) @@ -75,19 +62,6 @@ func (a *PusherAppender) AppendHistogram(_ storage.SeriesRef, l labels.Labels, t func (a *PusherAppender) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { a.labels = append(a.labels, l) - - // Adapt staleness markers for ruler evaluation delay. As the upstream code - // is using the actual time, when there is a no longer available series. - // This then causes 'out of order' append failures once the series is - // becoming available again. - // see https://github.com/prometheus/prometheus/blob/6c56a1faaaad07317ff585bda75b99bdba0517ad/rules/manager.go#L647-L660 - // Similar to staleness markers, the rule manager also appends actual time to the ALERTS and ALERTS_FOR_STATE series. - // See: https://github.com/prometheus/prometheus/blob/ae086c73cb4d6db9e8b67d5038d3704fea6aec4a/rules/alerting.go#L414-L417 - metricName := l.Get(labels.MetricName) - if a.evaluationDelay > 0 && (value.IsStaleNaN(v) || metricName == "ALERTS" || metricName == "ALERTS_FOR_STATE") { - t -= a.evaluationDelay.Milliseconds() - } - a.samples = append(a.samples, cortexpb.Sample{ TimestampMs: t, Value: v, @@ -164,16 +138,14 @@ func (t *PusherAppendable) Appender(ctx context.Context) storage.Appender { failedWrites: t.failedWrites, totalWrites: t.totalWrites, - ctx: ctx, - pusher: t.pusher, - userID: t.userID, - evaluationDelay: t.rulesLimits.EvaluationDelay(t.userID), + ctx: ctx, + pusher: t.pusher, + userID: t.userID, } } // RulesLimits defines limits used by Ruler. type RulesLimits interface { - EvaluationDelay(userID string) time.Duration MaxQueryLength(userID string) time.Duration RulerTenantShardSize(userID string) int RulerMaxRuleGroupsPerTenant(userID string) int @@ -182,7 +154,7 @@ type RulesLimits interface { DisabledRuleGroups(userID string) validation.DisabledRuleGroups } -// EngineQueryFunc returns a new engine query function by passing an altered timestamp. +// EngineQueryFunc returns a new engine query function validating max queryLength. // Modified from Prometheus rules.EngineQueryFunc // https://github.com/prometheus/prometheus/blob/v2.39.1/rules/manager.go#L189. func EngineQueryFunc(engine promql.QueryEngine, q storage.Queryable, overrides RulesLimits, userID string, lookbackDelta time.Duration) rules.QueryFunc { @@ -202,8 +174,7 @@ func EngineQueryFunc(engine promql.QueryEngine, q storage.Queryable, overrides R } } - evaluationDelay := overrides.EvaluationDelay(userID) - q, err := engine.NewInstantQuery(ctx, q, nil, qs, t.Add(-evaluationDelay)) + q, err := engine.NewInstantQuery(ctx, q, nil, qs, t) if err != nil { return nil, err } diff --git a/pkg/ruler/compat_test.go b/pkg/ruler/compat_test.go index cf4dc238d6..28c261f549 100644 --- a/pkg/ruler/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -43,7 +43,6 @@ func TestPusherAppendable(t *testing.T) { lbls1 := cortexpb.FromLabelsToLabelAdapters(labels.FromMap(map[string]string{labels.MetricName: "foo_bar"})) lbls2 := cortexpb.FromLabelsToLabelAdapters(labels.FromMap(map[string]string{labels.MetricName: "ALERTS", labels.AlertName: "boop"})) - lbls3 := cortexpb.FromLabelsToLabelAdapters(labels.FromMap(map[string]string{labels.MetricName: "ALERTS_FOR_STATE", labels.AlertName: "boop"})) testHistogram := tsdbutil.GenerateTestHistogram(1) testFloatHistogram := tsdbutil.GenerateTestFloatHistogram(2) @@ -55,14 +54,13 @@ func TestPusherAppendable(t *testing.T) { for _, tc := range []struct { name string series string - evalDelay time.Duration value float64 histogram *histogram.Histogram floatHistogram *histogram.FloatHistogram expectedReq *cortexpb.WriteRequest }{ { - name: "tenant without delay, normal value", + name: "tenant, normal value", series: "foo_bar", value: 1.234, expectedReq: &cortexpb.WriteRequest{ @@ -80,7 +78,7 @@ func TestPusherAppendable(t *testing.T) { }, }, { - name: "tenant without delay, stale nan value", + name: "tenant, stale nan value", series: "foo_bar", value: math.Float64frombits(value.StaleNaN), expectedReq: &cortexpb.WriteRequest{ @@ -98,45 +96,7 @@ func TestPusherAppendable(t *testing.T) { }, }, { - name: "tenant with delay, normal value", - series: "foo_bar", - value: 1.234, - evalDelay: time.Minute, - expectedReq: &cortexpb.WriteRequest{ - Timeseries: []cortexpb.PreallocTimeseries{ - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls1, - Samples: []cortexpb.Sample{ - {Value: 1.234, TimestampMs: 120_000}, - }, - }, - }, - }, - Source: cortexpb.RULE, - }, - }, - { - name: "tenant with delay, stale nan value", - series: "foo_bar", - value: math.Float64frombits(value.StaleNaN), - evalDelay: time.Minute, - expectedReq: &cortexpb.WriteRequest{ - Timeseries: []cortexpb.PreallocTimeseries{ - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls1, - Samples: []cortexpb.Sample{ - {Value: math.Float64frombits(value.StaleNaN), TimestampMs: 60_000}, - }, - }, - }, - }, - Source: cortexpb.RULE, - }, - }, - { - name: "ALERTS without delay, normal value", + name: "ALERTS, normal value", series: `ALERTS{alertname="boop"}`, value: 1.234, expectedReq: &cortexpb.WriteRequest{ @@ -154,7 +114,7 @@ func TestPusherAppendable(t *testing.T) { }, }, { - name: "ALERTS without delay, stale nan value", + name: "ALERTS, stale nan value", series: `ALERTS{alertname="boop"}`, value: math.Float64frombits(value.StaleNaN), expectedReq: &cortexpb.WriteRequest{ @@ -172,45 +132,7 @@ func TestPusherAppendable(t *testing.T) { }, }, { - name: "ALERTS with delay, normal value", - series: `ALERTS{alertname="boop"}`, - value: 1.234, - evalDelay: time.Minute, - expectedReq: &cortexpb.WriteRequest{ - Timeseries: []cortexpb.PreallocTimeseries{ - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls2, - Samples: []cortexpb.Sample{ - {Value: 1.234, TimestampMs: 60_000}, - }, - }, - }, - }, - Source: cortexpb.RULE, - }, - }, - { - name: "ALERTS with delay, stale nan value", - series: `ALERTS_FOR_STATE{alertname="boop"}`, - value: math.Float64frombits(value.StaleNaN), - evalDelay: time.Minute, - expectedReq: &cortexpb.WriteRequest{ - Timeseries: []cortexpb.PreallocTimeseries{ - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls3, - Samples: []cortexpb.Sample{ - {Value: math.Float64frombits(value.StaleNaN), TimestampMs: 60_000}, - }, - }, - }, - }, - Source: cortexpb.RULE, - }, - }, - { - name: "tenant without delay, normal histogram", + name: "tenant, normal histogram", series: "foo_bar", histogram: testHistogram, expectedReq: &cortexpb.WriteRequest{ @@ -228,7 +150,7 @@ func TestPusherAppendable(t *testing.T) { }, }, { - name: "tenant without delay, float histogram", + name: "tenant, float histogram", series: "foo_bar", floatHistogram: testFloatHistogram, expectedReq: &cortexpb.WriteRequest{ @@ -246,7 +168,7 @@ func TestPusherAppendable(t *testing.T) { }, }, { - name: "tenant without delay, both sample and histogram", + name: "tenant, both sample and histogram", series: "foo_bar", value: 1.234, histogram: testHistogram, @@ -273,7 +195,7 @@ func TestPusherAppendable(t *testing.T) { }, }, { - name: "tenant without delay, both sample and float histogram", + name: "tenant, both sample and float histogram", series: "foo_bar", value: 1.234, floatHistogram: testFloatHistogram, @@ -299,106 +221,9 @@ func TestPusherAppendable(t *testing.T) { Source: cortexpb.RULE, }, }, - { - name: "tenant with delay and NaN sample, normal histogram", - series: "foo_bar", - value: math.Float64frombits(value.StaleNaN), - evalDelay: time.Minute, - histogram: testHistogram, - expectedReq: &cortexpb.WriteRequest{ - Timeseries: []cortexpb.PreallocTimeseries{ - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls1, - Samples: []cortexpb.Sample{ - {Value: math.Float64frombits(value.StaleNaN), TimestampMs: 60_000}, - }, - }, - }, - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls1, - Histograms: []cortexpb.Histogram{ - cortexpb.HistogramToHistogramProto(120_000, testHistogram), - }, - }, - }, - }, - Source: cortexpb.RULE, - }, - }, - { - name: "tenant with delay and NaN sample, float histogram", - series: "foo_bar", - value: math.Float64frombits(value.StaleNaN), - evalDelay: time.Minute, - floatHistogram: testFloatHistogram, - expectedReq: &cortexpb.WriteRequest{ - Timeseries: []cortexpb.PreallocTimeseries{ - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls1, - Samples: []cortexpb.Sample{ - {Value: math.Float64frombits(value.StaleNaN), TimestampMs: 60_000}, - }, - }, - }, - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls1, - Histograms: []cortexpb.Histogram{ - cortexpb.FloatHistogramToHistogramProto(120_000, testFloatHistogram), - }, - }, - }, - }, - Source: cortexpb.RULE, - }, - }, - { - name: "tenant with delay, NaN histogram", - series: "foo_bar", - histogram: testHistogramWithNaN, - evalDelay: time.Minute, - expectedReq: &cortexpb.WriteRequest{ - Timeseries: []cortexpb.PreallocTimeseries{ - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls1, - Histograms: []cortexpb.Histogram{ - cortexpb.HistogramToHistogramProto(60_000, testHistogramWithNaN), - }, - }, - }, - }, - Source: cortexpb.RULE, - }, - }, - { - name: "tenant with delay, NaN float histogram", - series: "foo_bar", - floatHistogram: testFloatHistogramWithNaN, - evalDelay: time.Minute, - expectedReq: &cortexpb.WriteRequest{ - Timeseries: []cortexpb.PreallocTimeseries{ - { - TimeSeries: &cortexpb.TimeSeries{ - Labels: lbls1, - Histograms: []cortexpb.Histogram{ - cortexpb.FloatHistogramToHistogramProto(60_000, testFloatHistogramWithNaN), - }, - }, - }, - }, - Source: cortexpb.RULE, - }, - }, } { t.Run(tc.name, func(t *testing.T) { ctx := context.Background() - pa.rulesLimits = &ruleLimits{ - evalDelay: tc.evalDelay, - } lbls, err := parser.ParseMetric(tc.series) require.NoError(t, err) @@ -461,7 +286,7 @@ func TestPusherErrors(t *testing.T) { writes := prometheus.NewCounter(prometheus.CounterOpts{}) failures := prometheus.NewCounter(prometheus.CounterOpts{}) - pa := NewPusherAppendable(pusher, "user-1", ruleLimits{evalDelay: 10 * time.Second}, writes, failures) + pa := NewPusherAppendable(pusher, "user-1", ruleLimits{}, writes, failures) lbls, err := parser.ParseMetric("foo_bar") require.NoError(t, err) diff --git a/pkg/ruler/ruler_ring_test.go b/pkg/ruler/ruler_ring_test.go index 95f8009912..c40ada8964 100644 --- a/pkg/ruler/ruler_ring_test.go +++ b/pkg/ruler/ruler_ring_test.go @@ -223,7 +223,7 @@ func TestGetReplicationSetForListRule(t *testing.T) { } r, _ := buildRuler(t, cfg, nil, store, nil) - r.limits = ruleLimits{evalDelay: 0} + r.limits = ruleLimits{} rulerRing := r.ring // We start ruler's ring, but nothing else (not even lifecycler). diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index 68a42548e5..c0d9e43804 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -82,7 +82,6 @@ func defaultRulerConfig(t testing.TB) Config { } type ruleLimits struct { - evalDelay time.Duration tenantShard int maxRulesPerRuleGroup int maxRuleGroups int @@ -91,10 +90,6 @@ type ruleLimits struct { queryOffset time.Duration } -func (r ruleLimits) EvaluationDelay(_ string) time.Duration { - return r.evalDelay -} - func (r ruleLimits) RulerTenantShardSize(_ string) int { return r.tenantShard } @@ -178,7 +173,7 @@ func testSetup(t *testing.T, querierTestConfig *querier.TestConfig) (*promql.Eng reg := prometheus.NewRegistry() queryable := testQueryableFunc(querierTestConfig, reg, l) - return engine, queryable, pusher, l, ruleLimits{evalDelay: 0, maxRuleGroups: 20, maxRulesPerRuleGroup: 15}, reg + return engine, queryable, pusher, l, ruleLimits{maxRuleGroups: 20, maxRulesPerRuleGroup: 15}, reg } func newManager(t *testing.T, cfg Config) *DefaultMultiTenantManager { @@ -971,7 +966,7 @@ func TestGetRules(t *testing.T) { } r, _ := buildRuler(t, cfg, nil, store, rulerAddrMap) - r.limits = ruleLimits{evalDelay: 0, tenantShard: tc.shuffleShardSize} + r.limits = ruleLimits{tenantShard: tc.shuffleShardSize} rulerAddrMap[id] = r if r.ring != nil { require.NoError(t, services.StartAndAwaitRunning(context.Background(), r.ring)) @@ -1208,7 +1203,7 @@ func TestGetRulesFromBackup(t *testing.T) { } r, _ := buildRuler(t, cfg, nil, store, rulerAddrMap) - r.limits = ruleLimits{evalDelay: 0, tenantShard: 3} + r.limits = ruleLimits{tenantShard: 3} rulerAddrMap[id] = r if r.ring != nil { require.NoError(t, services.StartAndAwaitRunning(context.Background(), r.ring)) @@ -1792,7 +1787,7 @@ func TestSharding(t *testing.T) { } r, _ := buildRuler(t, cfg, nil, store, nil) - r.limits = ruleLimits{evalDelay: 0, tenantShard: tc.shuffleShardSize} + r.limits = ruleLimits{tenantShard: tc.shuffleShardSize} if forceRing != nil { r.ring = forceRing @@ -1941,7 +1936,7 @@ func Test_LoadPartialGroups(t *testing.T) { } r1, manager := buildRuler(t, cfg, nil, store, nil) - r1.limits = ruleLimits{evalDelay: 0, tenantShard: 1} + r1.limits = ruleLimits{tenantShard: 1} require.NoError(t, services.StartAndAwaitRunning(context.Background(), r1)) t.Cleanup(r1.StopAsync) @@ -2465,7 +2460,7 @@ func TestRulerDisablesRuleGroups(t *testing.T) { } r, _ := buildRuler(t, cfg, nil, store, nil) - r.limits = ruleLimits{evalDelay: 0, tenantShard: 3, disabledRuleGroups: tc.disabledRuleGroups} + r.limits = ruleLimits{tenantShard: 3, disabledRuleGroups: tc.disabledRuleGroups} if forceRing != nil { r.ring = forceRing diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 2e885a6c40..87173abb13 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -11,6 +11,7 @@ import ( "time" "github.com/cespare/xxhash/v2" + "github.com/go-kit/log/level" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" @@ -265,7 +266,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.IntVar(&l.MaxOutstandingPerTenant, "frontend.max-outstanding-requests-per-tenant", 100, "Maximum number of outstanding requests per tenant per request queue (either query frontend or query scheduler); requests beyond this error with HTTP 429.") - f.Var(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.") + f.Var(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", "Deprecated(use ruler.query-offset instead) and will be removed in v1.19.0: Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.") f.IntVar(&l.RulerTenantShardSize, "ruler.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by ruler. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.") f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.") @@ -758,11 +759,6 @@ func (o *Overrides) IngestionTenantShardSize(userID string) int { return o.GetOverridesForUser(userID).IngestionTenantShardSize } -// EvaluationDelay returns the rules evaluation delay for a given user. -func (o *Overrides) EvaluationDelay(userID string) time.Duration { - return time.Duration(o.GetOverridesForUser(userID).RulerEvaluationDelay) -} - // CompactorBlocksRetentionPeriod returns the retention period for a given user. func (o *Overrides) CompactorBlocksRetentionPeriod(userID string) time.Duration { return time.Duration(o.GetOverridesForUser(userID).CompactorBlocksRetentionPeriod) @@ -795,7 +791,13 @@ func (o *Overrides) RulerMaxRuleGroupsPerTenant(userID string) int { // RulerQueryOffset returns the rule query offset for a given user. func (o *Overrides) RulerQueryOffset(userID string) time.Duration { - return time.Duration(o.GetOverridesForUser(userID).RulerQueryOffset) + ruleOffset := time.Duration(o.GetOverridesForUser(userID).RulerQueryOffset) + evaluationDelay := time.Duration(o.GetOverridesForUser(userID).RulerEvaluationDelay) + if evaluationDelay > ruleOffset { + level.Warn(util_log.Logger).Log("msg", "ruler.query-offset was overridden by highest value in [Deprecated]ruler.evaluation-delay-duration", "ruler.query-offset", ruleOffset, "ruler.evaluation-delay-duration", evaluationDelay) + return evaluationDelay + } + return ruleOffset } // StoreGatewayTenantShardSize returns the store-gateway shard size for a given user. diff --git a/pkg/util/validation/limits_test.go b/pkg/util/validation/limits_test.go index 05807f63c9..997988ada9 100644 --- a/pkg/util/validation/limits_test.go +++ b/pkg/util/validation/limits_test.go @@ -764,3 +764,21 @@ func TestCompileQueryPriorityRegex(t *testing.T) { require.NoError(t, err) require.Nil(t, l.QueryPriority.Priorities[0].QueryAttributes[0].CompiledRegex) } + +func TestEvaluationDelayHigherThanRulerQueryOffset(t *testing.T) { + tenant := "tenant" + evaluationDelay := time.Duration(10) + tenantLimits := map[string]*Limits{ + tenant: { + RulerQueryOffset: 5, + RulerEvaluationDelay: model.Duration(evaluationDelay), + }, + } + + defaults := Limits{} + ov, err := NewOverrides(defaults, newMockTenantLimits(tenantLimits)) + require.NoError(t, err) + + rulerQueryOffset := ov.RulerQueryOffset(tenant) + assert.Equal(t, evaluationDelay, rulerQueryOffset) +}