From e488a3b5de1fd56a955e4f4ac8bd613f1144fe51 Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Thu, 15 Oct 2020 16:46:45 +0300 Subject: [PATCH 01/16] fixes most of topk, bottomk ordered tests when instant queries are used --- .../handler/prometheus/native/read_common.go | 1 + src/query/functions/aggregation/take.go | 132 +++++++++++++++--- src/query/functions/aggregation/take_test.go | 72 +++++++++- src/query/functions/utils/heap.go | 47 +++++-- src/query/functions/utils/heap_test.go | 31 ++++ src/query/models/query_context.go | 2 + .../compatibility/testdata/aggregators.test | 59 ++++---- 7 files changed, 281 insertions(+), 63 deletions(-) diff --git a/src/query/api/v1/handler/prometheus/native/read_common.go b/src/query/api/v1/handler/prometheus/native/read_common.go index 434329d464..7f863ba201 100644 --- a/src/query/api/v1/handler/prometheus/native/read_common.go +++ b/src/query/api/v1/handler/prometheus/native/read_common.go @@ -90,6 +90,7 @@ func ParseRequest( QueryContextOptions: models.QueryContextOptions{ LimitMaxTimeseries: fetchOpts.SeriesLimit, LimitMaxDocs: fetchOpts.DocsLimit, + Instantaneous: instantaneous, }} restrictOpts := fetchOpts.RestrictQueryOptions.GetRestrictByType() diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index eab7ceee2c..abfa21274b 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -22,14 +22,13 @@ package aggregation import ( "fmt" - "math" - "github.com/m3db/m3/src/query/block" "github.com/m3db/m3/src/query/executor/transform" "github.com/m3db/m3/src/query/functions/utils" "github.com/m3db/m3/src/query/models" "github.com/m3db/m3/src/query/parser" "github.com/m3db/m3/src/query/util" + "math" ) const ( @@ -39,7 +38,13 @@ const ( TopKType = "topk" ) +type ValueAndMeta struct { + Val float64 + SeriesMeta block.SeriesMeta +} + type takeFunc func(values []float64, buckets [][]int) []float64 +type takeInstantFunc func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []ValueAndMeta // NewTakeOp creates a new takeK operation func NewTakeOp( @@ -52,19 +57,27 @@ func NewTakeOp( } var fn takeFunc + var fnInstant takeInstantFunc k := int(params.Parameter) + if k < 1 { fn = func(values []float64, buckets [][]int) []float64 { - return takeNone(values, buckets) + return takeNone(values) + } + fnInstant = func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []ValueAndMeta { + return takeInstantNone(values, seriesMetas) } } else { heap := utils.NewFloatHeap(takeTop, k) fn = func(values []float64, buckets [][]int) []float64 { return takeFn(heap, values, buckets) } + fnInstant = func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []ValueAndMeta { + return takeInstantFn(heap, values, buckets, seriesMetas); + } } - return newTakeOp(params, opType, fn), nil + return newTakeOp(params, opType, fn, fnInstant), nil } // takeOp stores required properties for take ops @@ -72,6 +85,7 @@ type takeOp struct { params NodeParams opType string takeFunc takeFunc + takeInstantFunc takeInstantFunc } // OpType for the operator @@ -95,11 +109,12 @@ func (o takeOp) Node( } } -func newTakeOp(params NodeParams, opType string, takeFunc takeFunc) takeOp { +func newTakeOp(params NodeParams, opType string, takeFunc takeFunc, takeInstantFunc takeInstantFunc) takeOp { return takeOp{ params: params, opType: opType, takeFunc: takeFunc, + takeInstantFunc: takeInstantFunc, } } @@ -126,6 +141,7 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, return nil, err } + instantaneous := queryCtx.Options.Instantaneous params := n.op.params meta := b.Meta() seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta()) @@ -136,22 +152,52 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, seriesMetas, ) - // retain original metadatas - builder, err := n.controller.BlockBuilder(queryCtx, meta, stepIter.SeriesMeta()) + builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) if err != nil { return nil, err } - - if err = builder.AddCols(stepIter.StepCount()); err != nil { + stepCount := stepIter.StepCount() + if err = builder.AddCols(stepCount); err != nil { return nil, err } - for index := 0; stepIter.Next(); index++ { - step := stepIter.Current() - values := step.Values() - aggregatedValues := n.op.takeFunc(values, buckets) - if err := builder.AppendValues(index, aggregatedValues); err != nil { - return nil, err + if instantaneous { + for index := 0; stepIter.Next(); index++ { + step := stepIter.Current() + values := step.Values() + if isLastStep(index, stepCount) { + aggregatedValues := n.op.takeInstantFunc(values, buckets, seriesMetas) + var valuesToSet = make([]float64, len(aggregatedValues)) + for i := range aggregatedValues { + valuesToSet[i] = aggregatedValues[i].Val + } + + if err := builder.AppendValues(index, valuesToSet); err != nil { + return nil, err + } + + var rowValues = make([]float64, stepCount) + for i, value := range aggregatedValues { + rowValues[len(rowValues)-1] = value.Val + if err := builder.SetRow(i, rowValues, value.SeriesMeta); err != nil { + return nil, err + } + } + } else { + if err := builder.AppendValues(index, values); err != nil { + return nil, err + } + } + } + } else { + for index := 0; stepIter.Next(); index++ { + step := stepIter.Current() + values := step.Values() + + aggregatedValues := n.op.takeFunc(values, buckets) + if err := builder.AppendValues(index, aggregatedValues); err != nil { + return nil, err + } } } @@ -162,19 +208,32 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, return builder.Build(), nil } +func isLastStep(stepIndex int, stepCount int) bool { + return stepIndex == stepCount - 1 +} + // shortcut to return empty when taking <= 0 values -func takeNone(values []float64, buckets [][]int) []float64 { +func takeNone(values []float64) []float64 { util.Memset(values, math.NaN()) return values } -func takeFn(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { - cap := heap.Cap() +func takeInstantNone(values []float64, seriesMetas []block.SeriesMeta) []ValueAndMeta { + result := make([]ValueAndMeta, len(values)) + for i := range result { + result[i].Val = math.NaN() + result[i].SeriesMeta = seriesMetas[i] + } + return result +} + +func takeFn(heap *utils.FloatHeap, values []float64, buckets [][]int) []float64 { + capacity := heap.Cap() for _, bucket := range buckets { // If this bucket's length is less than or equal to the heap's // capacity do not need to clear any values from the input vector, // as they are all included in the output. - if len(bucket) <= cap { + if len(bucket) <= capacity { continue } @@ -198,3 +257,38 @@ func takeFn(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { return values } + +func takeInstantFn(heap *utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []ValueAndMeta { + var result = make([]ValueAndMeta, len(values)) + for i := range result { + result[i] = ValueAndMeta{ + Val: values[i], + SeriesMeta: metas[i], + } + } + + for _, bucket := range buckets { + for _, idx := range bucket { + val := values[idx] + if !math.IsNaN(val) { + heap.Push(val, idx) + } + } + + valIndexPairs := heap.FlushInOrder() + for ix, pair := range valIndexPairs { + prevIndex := pair.Index + prevMeta := metas[prevIndex] + idx := bucket[ix] + + result[idx].Val = pair.Val + result[idx].SeriesMeta = prevMeta + } + + //clear remaining values + for i := len(valIndexPairs); i < len(bucket); i++ { + result[bucket[i]].Val = math.NaN() + } + } + return result +} \ No newline at end of file diff --git a/src/query/functions/aggregation/take_test.go b/src/query/functions/aggregation/take_test.go index c0a5bef13b..bf86af9c25 100644 --- a/src/query/functions/aggregation/take_test.go +++ b/src/query/functions/aggregation/take_test.go @@ -21,20 +21,71 @@ package aggregation import ( - "math" - "testing" - + "fmt" + "github.com/m3db/m3/src/query/block" "github.com/m3db/m3/src/query/executor/transform" "github.com/m3db/m3/src/query/functions/utils" "github.com/m3db/m3/src/query/models" "github.com/m3db/m3/src/query/parser" "github.com/m3db/m3/src/query/test" "github.com/m3db/m3/src/query/test/executor" + "math" + "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +func TestTakeInstantFn(t *testing.T) { + valuesMin := []float64{1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1} + buckets := [][]int{{0, 1, 2, 3}, {4}, {5, 6, 7, 8}} + + expectedMin := []ValueAndMeta{ + {Val: 1.1, SeriesMeta: seriesMetasTakeOrdered[0]}, + {Val: 2.1, SeriesMeta: seriesMetasTakeOrdered[1]}, + {Val: 3.1, SeriesMeta: seriesMetasTakeOrdered[2]}, + {Val: math.NaN(), SeriesMeta: seriesMetasTakeOrdered[3]}, + + {Val: 5.1, SeriesMeta: seriesMetasTakeOrdered[4]}, + + {Val: 6.1, SeriesMeta: seriesMetasTakeOrdered[5]}, + {Val: 7.1, SeriesMeta: seriesMetasTakeOrdered[6]}, + {Val: 8.1, SeriesMeta: seriesMetasTakeOrdered[7]}, + {Val: math.NaN(), SeriesMeta: seriesMetasTakeOrdered[8]}, + } + + size := 3 + minHeap := utils.NewFloatHeap(false, size) + actual := takeInstantFn(minHeap, valuesMin, buckets, seriesMetasTakeOrdered) //9 + + actualString := fmt.Sprint(actual) + expectedString := fmt.Sprint(expectedMin) + + require.EqualValues(t, expectedString, actualString) + + valuesMax := []float64{1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1} + expectedMax := []ValueAndMeta{ + {Val: 4.1, SeriesMeta: seriesMetasTakeOrdered[3]}, + {Val: 3.1, SeriesMeta: seriesMetasTakeOrdered[2]}, + {Val: 2.1, SeriesMeta: seriesMetasTakeOrdered[1]}, + {Val: math.NaN(), SeriesMeta: seriesMetasTakeOrdered[3]}, + + {Val: 5.1, SeriesMeta: seriesMetasTakeOrdered[4]}, + + {Val: 9.1, SeriesMeta: seriesMetasTakeOrdered[8]}, + {Val: 8.1, SeriesMeta: seriesMetasTakeOrdered[7]}, + {Val: 7.1, SeriesMeta: seriesMetasTakeOrdered[6]}, + {Val: math.NaN(), SeriesMeta: seriesMetasTakeOrdered[8]}, + } + + maxHeap := utils.NewFloatHeap(true, size) + actual = takeInstantFn(maxHeap, valuesMax, buckets, seriesMetasTakeOrdered) + actualString = fmt.Sprint(actual) + expectedString = fmt.Sprint(expectedMax) + + assert.EqualValues(t, expectedString, actualString) +} + func TestTakeFn(t *testing.T) { valuesMin := []float64{1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1} buckets := [][]int{{0, 1, 2, 3}, {4}, {5, 6, 7}} @@ -59,6 +110,20 @@ func TestTakeFn(t *testing.T) { test.EqualsWithNans(t, 1.1, actualQ) } +var ( + seriesMetasTakeOrdered = []block.SeriesMeta{ + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "0"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "1"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "2"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "0"}, {N: "group", V: "canary"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "1"}, {N: "group", V: "canary"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "0"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "1"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "0"}, {N: "group", V: "canary"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "1"}, {N: "group", V: "canary"}})}, + } +) + func processTakeOp(t *testing.T, op parser.Params) *executor.SinkNode { bl := test.NewBlockFromValuesWithSeriesMeta(bounds, seriesMetas, v) c, sink := executor.NewControllerWithSink(parser.NodeID(1)) @@ -98,6 +163,7 @@ func TestTakeTopFunctionFilteringWithoutA(t *testing.T) { }) require.NoError(t, err) sink := processTakeOp(t, op) + expected := [][]float64{ // Taking bottomk(1) of first two series, keeping both series {0, math.NaN(), math.NaN(), math.NaN(), math.NaN()}, diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index 1e3078025a..7978fbbf49 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -30,16 +30,16 @@ type ValueIndexPair struct { Index int } -type lessFn func(ValueIndexPair, ValueIndexPair) bool +type lessFn func(*ValueIndexPair, *ValueIndexPair) bool -func maxHeapLess(i, j ValueIndexPair) bool { +func maxHeapLess(i, j *ValueIndexPair) bool { if i.Val == j.Val { return i.Index > j.Index } return i.Val < j.Val } -func minHeapLess(i, j ValueIndexPair) bool { +func minHeapLess(i, j *ValueIndexPair) bool { if i.Val == j.Val { return i.Index > j.Index } @@ -56,7 +56,7 @@ type FloatHeap struct { // NewFloatHeap builds a new FloatHeap based on first parameter // and a capacity given by second parameter. Zero and negative // values for maxSize provide an unbounded FloatHeap -func NewFloatHeap(isMaxHeap bool, capacity int) FloatHeap { +func NewFloatHeap(isMaxHeap bool, capacity int) *FloatHeap { var less lessFn if isMaxHeap { less = maxHeapLess @@ -68,13 +68,17 @@ func NewFloatHeap(isMaxHeap bool, capacity int) FloatHeap { capacity = 0 } + if capacity > 10_000 { + capacity = 10_000 + } + floatHeap := &floatHeap{ heap: make([]ValueIndexPair, 0, capacity), less: less, } heap.Init(floatHeap) - return FloatHeap{ + return &FloatHeap{ isMaxHeap: isMaxHeap, capacity: capacity, floatHeap: floatHeap, @@ -82,7 +86,7 @@ func NewFloatHeap(isMaxHeap bool, capacity int) FloatHeap { } // Push pushes a value and index pair to the heap -func (fh FloatHeap) Push(value float64, index int) { +func (fh *FloatHeap) Push(value float64, index int) { h := fh.floatHeap // If capacity is zero or negative, allow infinite size heap if fh.capacity > 0 { @@ -121,13 +125,21 @@ func (fh FloatHeap) Push(value float64, index int) { }) } +func (fh *FloatHeap) Pop() (ValueIndexPair, bool) { + h := fh.floatHeap.heap + if len(h) == 0 { + return ValueIndexPair{}, false + } + return heap.Pop(fh.floatHeap).(ValueIndexPair), true +} + // Len returns the current length of the heap -func (fh FloatHeap) Len() int { +func (fh *FloatHeap) Len() int { return fh.floatHeap.Len() } // Cap returns the capacity of the heap -func (fh FloatHeap) Cap() int { +func (fh *FloatHeap) Cap() int { return fh.capacity } @@ -137,14 +149,27 @@ func (fh *FloatHeap) Reset() { } // Flush flushes the float heap and resets it. Does not guarantee order. -func (fh FloatHeap) Flush() []ValueIndexPair { +func (fh *FloatHeap) Flush() []ValueIndexPair { values := fh.floatHeap.heap fh.Reset() return values } +// FlushInOrder flushes the float heap and returns values in order. +func (fh *FloatHeap) FlushInOrder() []ValueIndexPair { + result := make([]ValueIndexPair, len(fh.floatHeap.heap)) + + for i := len(result)-1; i >= 0; i-- { + if e, ok := fh.Pop(); ok { + result[i] = e + } + } + + return result +} + // Peek reveals the top value of the heap without mutating the heap. -func (fh FloatHeap) Peek() (ValueIndexPair, bool) { +func (fh *FloatHeap) Peek() (ValueIndexPair, bool) { h := fh.floatHeap.heap if len(h) == 0 { return ValueIndexPair{}, false @@ -168,7 +193,7 @@ func (h *floatHeap) Len() int { // Less is true if value of i is less than value of j func (h *floatHeap) Less(i, j int) bool { - return h.less(h.heap[i], h.heap[j]) + return h.less(&h.heap[i], &h.heap[j]) } // Swap swaps values at these indices diff --git a/src/query/functions/utils/heap_test.go b/src/query/functions/utils/heap_test.go index c8a8e0f842..298d5ad453 100644 --- a/src/query/functions/utils/heap_test.go +++ b/src/query/functions/utils/heap_test.go @@ -216,3 +216,34 @@ func TestNegativeCapacityHeap(t *testing.T) { assert.Equal(t, testArray[pair.Index], pair.Val) } } + +func TestFlushOrdered(t *testing.T) { + maxHeap := NewFloatHeap(true, 3) + + maxHeap.Push(0.1, 0) + maxHeap.Push(1.1, 1) + maxHeap.Push(2.1, 2) + maxHeap.Push(3.1, 3) + + actualMax := maxHeap.FlushInOrder() + + assert.Equal(t, []ValueIndexPair{ + {Val: 3.1, Index: 3}, + {Val: 2.1, Index: 2}, + {Val: 1.1, Index: 1}, + }, actualMax) + + minHeap := NewFloatHeap(false, 3) + minHeap.Push(0.1, 0) + minHeap.Push(1.1, 1) + minHeap.Push(2.1, 2) + minHeap.Push(3.1, 3) + + actualMin := minHeap.FlushInOrder() + + assert.Equal(t, []ValueIndexPair{ + {Val: 0.1, Index: 0}, + {Val: 1.1, Index: 1}, + {Val: 2.1, Index: 2}, + }, actualMin) +} diff --git a/src/query/models/query_context.go b/src/query/models/query_context.go index abb87f47cf..75a36af2e9 100644 --- a/src/query/models/query_context.go +++ b/src/query/models/query_context.go @@ -48,6 +48,8 @@ type QueryContextOptions struct { LimitMaxDocs int // RequireExhaustive results in an error if the query exceeds the series limit. RequireExhaustive bool + // Is instant query + Instantaneous bool RestrictFetchType *RestrictFetchTypeQueryContextOptions } diff --git a/src/query/test/compatibility/testdata/aggregators.test b/src/query/test/compatibility/testdata/aggregators.test index 716395e8b7..e6aba9e099 100644 --- a/src/query/test/compatibility/testdata/aggregators.test +++ b/src/query/test/compatibility/testdata/aggregators.test @@ -161,43 +161,42 @@ load 5m http_requests{job="app-server", instance="1", group="canary"} 0+80x10 foo 3+0x10 -# FAILING issue #12. All topk and bottomk tests are failing. -#eval_ordered instant at 50m topk(3, http_requests) -# http_requests{group="canary", instance="1", job="app-server"} 800 -# http_requests{group="canary", instance="0", job="app-server"} 700 -# http_requests{group="production", instance="1", job="app-server"} 600 +eval_ordered instant at 50m topk(3, http_requests) + http_requests{group="canary", instance="1", job="app-server"} 800 + http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="production", instance="1", job="app-server"} 600 -#eval_ordered instant at 50m topk((3), (http_requests)) -# http_requests{group="canary", instance="1", job="app-server"} 800 -# http_requests{group="canary", instance="0", job="app-server"} 700 -# http_requests{group="production", instance="1", job="app-server"} 600 +eval_ordered instant at 50m topk((3), (http_requests)) + http_requests{group="canary", instance="1", job="app-server"} 800 + http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="production", instance="1", job="app-server"} 600 -#eval_ordered instant at 50m topk(5, http_requests{group="canary",job="app-server"}) -# http_requests{group="canary", instance="1", job="app-server"} 800 -# http_requests{group="canary", instance="0", job="app-server"} 700 +eval_ordered instant at 50m topk(5, http_requests{group="canary",job="app-server"}) + http_requests{group="canary", instance="1", job="app-server"} 800 + http_requests{group="canary", instance="0", job="app-server"} 700 -#eval_ordered instant at 50m bottomk(3, http_requests) -# http_requests{group="production", instance="0", job="api-server"} 100 -# http_requests{group="production", instance="1", job="api-server"} 200 -# http_requests{group="canary", instance="0", job="api-server"} 300 +eval_ordered instant at 50m bottomk(3, http_requests) + http_requests{group="production", instance="0", job="api-server"} 100 + http_requests{group="production", instance="1", job="api-server"} 200 + http_requests{group="canary", instance="0", job="api-server"} 300 -#eval_ordered instant at 50m bottomk(5, http_requests{group="canary",job="app-server"}) -# http_requests{group="canary", instance="0", job="app-server"} 700 -# http_requests{group="canary", instance="1", job="app-server"} 800 +eval_ordered instant at 50m bottomk(5, http_requests{group="canary",job="app-server"}) + http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="canary", instance="1", job="app-server"} 800 eval instant at 50m topk by (group) (1, http_requests) http_requests{group="production", instance="1", job="app-server"} 600 http_requests{group="canary", instance="1", job="app-server"} 800 -#eval instant at 50m bottomk by (group) (2, http_requests) -# http_requests{group="canary", instance="0", job="api-server"} 300 -# http_requests{group="canary", instance="1", job="api-server"} 400 -# http_requests{group="production", instance="0", job="api-server"} 100 -# http_requests{group="production", instance="1", job="api-server"} 200 +eval instant at 50m bottomk by (group) (2, http_requests) + http_requests{group="canary", instance="0", job="api-server"} 300 + http_requests{group="canary", instance="1", job="api-server"} 400 + http_requests{group="production", instance="0", job="api-server"} 100 + http_requests{group="production", instance="1", job="api-server"} 200 -#eval_ordered instant at 50m bottomk by (group) (2, http_requests{group="production"}) -# http_requests{group="production", instance="0", job="api-server"} 100 -# http_requests{group="production", instance="1", job="api-server"} 200 +eval_ordered instant at 50m bottomk by (group) (2, http_requests{group="production"}) + http_requests{group="production", instance="0", job="api-server"} 100 + http_requests{group="production", instance="1", job="api-server"} 200 # Test NaN is sorted away from the top/bottom. #eval_ordered instant at 50m topk(3, http_requests{job="api-server",group="production"}) @@ -211,9 +210,9 @@ eval instant at 50m topk by (group) (1, http_requests) # http_requests{job="api-server", instance="2", group="production"} NaN # Test topk and bottomk allocate min(k, input_vector) for results vector -#eval_ordered instant at 50m bottomk(9999999999, http_requests{job="app-server",group="canary"}) -# http_requests{group="canary", instance="0", job="app-server"} 700 -# http_requests{group="canary", instance="1", job="app-server"} 800 +eval_ordered instant at 50m bottomk(9999999999, http_requests{job="app-server",group="canary"}) + http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="canary", instance="1", job="app-server"} 800 #eval_ordered instant at 50m topk(9999999999, http_requests{job="api-server",group="production"}) # http_requests{job="api-server", instance="1", group="production"} 200 From f6ed6e12fca31c8c3dcf18c051a038c14722f53a Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Fri, 16 Oct 2020 11:33:37 +0300 Subject: [PATCH 02/16] formatted some code and extracted instant logic into separate method --- src/query/functions/aggregation/take.go | 92 ++++++++++++++----------- src/query/functions/utils/heap.go | 2 +- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index abfa21274b..f4de783cab 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -39,7 +39,7 @@ const ( ) type ValueAndMeta struct { - Val float64 + Val float64 SeriesMeta block.SeriesMeta } @@ -73,7 +73,7 @@ func NewTakeOp( return takeFn(heap, values, buckets) } fnInstant = func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []ValueAndMeta { - return takeInstantFn(heap, values, buckets, seriesMetas); + return takeInstantFn(heap, values, buckets, seriesMetas) } } @@ -82,9 +82,9 @@ func NewTakeOp( // takeOp stores required properties for take ops type takeOp struct { - params NodeParams - opType string - takeFunc takeFunc + params NodeParams + opType string + takeFunc takeFunc takeInstantFunc takeInstantFunc } @@ -111,9 +111,9 @@ func (o takeOp) Node( func newTakeOp(params NodeParams, opType string, takeFunc takeFunc, takeInstantFunc takeInstantFunc) takeOp { return takeOp{ - params: params, - opType: opType, - takeFunc: takeFunc, + params: params, + opType: opType, + takeFunc: takeFunc, takeInstantFunc: takeInstantFunc, } } @@ -151,43 +151,18 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, []byte(n.op.opType), seriesMetas, ) - builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) if err != nil { return nil, err } - stepCount := stepIter.StepCount() - if err = builder.AddCols(stepCount); err != nil { + + if err = builder.AddCols(stepIter.StepCount()); err != nil { return nil, err } if instantaneous { - for index := 0; stepIter.Next(); index++ { - step := stepIter.Current() - values := step.Values() - if isLastStep(index, stepCount) { - aggregatedValues := n.op.takeInstantFunc(values, buckets, seriesMetas) - var valuesToSet = make([]float64, len(aggregatedValues)) - for i := range aggregatedValues { - valuesToSet[i] = aggregatedValues[i].Val - } - - if err := builder.AppendValues(index, valuesToSet); err != nil { - return nil, err - } - - var rowValues = make([]float64, stepCount) - for i, value := range aggregatedValues { - rowValues[len(rowValues)-1] = value.Val - if err := builder.SetRow(i, rowValues, value.SeriesMeta); err != nil { - return nil, err - } - } - } else { - if err := builder.AppendValues(index, values); err != nil { - return nil, err - } - } + if err := n.appendValuesInstant(builder, stepIter, seriesMetas, buckets); err != nil { + return nil, err } } else { for index := 0; stepIter.Next(); index++ { @@ -196,8 +171,8 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, aggregatedValues := n.op.takeFunc(values, buckets) if err := builder.AppendValues(index, aggregatedValues); err != nil { - return nil, err - } + return nil, err + } } } @@ -208,8 +183,43 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, return builder.Build(), nil } +func (n *takeNode) appendValuesInstant(builder block.Builder, stepIter block.StepIter, seriesMetas []block.SeriesMeta, buckets [][]int) error { + stepCount := stepIter.StepCount() + for index := 0; stepIter.Next(); index++ { + step := stepIter.Current() + values := step.Values() + + if isLastStep(index, stepCount) { + //we only care for the last step values for the instant query + aggregatedValues := n.op.takeInstantFunc(values, buckets, seriesMetas) + for i := range aggregatedValues { + values[i] = aggregatedValues[i].Val + } + + if err := builder.AppendValues(index, values); err != nil { + return err + } + + //apply series metas in order + var rowValues = make([]float64, stepCount) + for i, value := range aggregatedValues { + rowValues[len(rowValues)-1] = value.Val + if err := builder.SetRow(i, rowValues, value.SeriesMeta); err != nil { + return err + } + } + return nil + } + + if err := builder.AppendValues(index, values); err != nil { + return err + } + } + return nil +} + func isLastStep(stepIndex int, stepCount int) bool { - return stepIndex == stepCount - 1 + return stepIndex == stepCount-1 } // shortcut to return empty when taking <= 0 values @@ -291,4 +301,4 @@ func takeInstantFn(heap *utils.FloatHeap, values []float64, buckets [][]int, met } } return result -} \ No newline at end of file +} diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index 7978fbbf49..29d15bbe0a 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -159,7 +159,7 @@ func (fh *FloatHeap) Flush() []ValueIndexPair { func (fh *FloatHeap) FlushInOrder() []ValueIndexPair { result := make([]ValueIndexPair, len(fh.floatHeap.heap)) - for i := len(result)-1; i >= 0; i-- { + for i := len(result) - 1; i >= 0; i-- { if e, ok := fh.Pop(); ok { result[i] = e } From 690dcd38c1187d85beae8b3297b70f3b54d947aa Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Fri, 16 Oct 2020 14:11:10 +0300 Subject: [PATCH 03/16] Changes after review. Small optimization in appendValuesInstant. --- src/query/functions/aggregation/take.go | 47 ++++++++------------ src/query/functions/aggregation/take_test.go | 36 +++++++-------- src/query/functions/utils/heap.go | 2 +- src/query/functions/utils/heap_test.go | 2 + 4 files changed, 39 insertions(+), 48 deletions(-) diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index f4de783cab..6c14c2fd27 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -39,8 +39,8 @@ const ( ) type ValueAndMeta struct { - Val float64 - SeriesMeta block.SeriesMeta + val float64 + seriesMeta block.SeriesMeta } type takeFunc func(values []float64, buckets [][]int) []float64 @@ -166,9 +166,7 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, } } else { for index := 0; stepIter.Next(); index++ { - step := stepIter.Current() - values := step.Values() - + values := stepIter.Current().Values() aggregatedValues := n.op.takeFunc(values, buckets) if err := builder.AppendValues(index, aggregatedValues); err != nil { return nil, err @@ -186,35 +184,26 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, func (n *takeNode) appendValuesInstant(builder block.Builder, stepIter block.StepIter, seriesMetas []block.SeriesMeta, buckets [][]int) error { stepCount := stepIter.StepCount() for index := 0; stepIter.Next(); index++ { - step := stepIter.Current() - values := step.Values() + values := stepIter.Current().Values() + + if err := builder.AppendValues(index, values); err != nil { + return err + } if isLastStep(index, stepCount) { //we only care for the last step values for the instant query aggregatedValues := n.op.takeInstantFunc(values, buckets, seriesMetas) - for i := range aggregatedValues { - values[i] = aggregatedValues[i].Val - } - - if err := builder.AppendValues(index, values); err != nil { - return err - } - //apply series metas in order var rowValues = make([]float64, stepCount) for i, value := range aggregatedValues { - rowValues[len(rowValues)-1] = value.Val - if err := builder.SetRow(i, rowValues, value.SeriesMeta); err != nil { + rowValues[len(rowValues)-1] = value.val + if err := builder.SetRow(i, rowValues, value.seriesMeta); err != nil { return err } } - return nil - } - - if err := builder.AppendValues(index, values); err != nil { - return err } } + return nil } @@ -231,8 +220,8 @@ func takeNone(values []float64) []float64 { func takeInstantNone(values []float64, seriesMetas []block.SeriesMeta) []ValueAndMeta { result := make([]ValueAndMeta, len(values)) for i := range result { - result[i].Val = math.NaN() - result[i].SeriesMeta = seriesMetas[i] + result[i].val = math.NaN() + result[i].seriesMeta = seriesMetas[i] } return result } @@ -272,8 +261,8 @@ func takeInstantFn(heap *utils.FloatHeap, values []float64, buckets [][]int, met var result = make([]ValueAndMeta, len(values)) for i := range result { result[i] = ValueAndMeta{ - Val: values[i], - SeriesMeta: metas[i], + val: values[i], + seriesMeta: metas[i], } } @@ -291,13 +280,13 @@ func takeInstantFn(heap *utils.FloatHeap, values []float64, buckets [][]int, met prevMeta := metas[prevIndex] idx := bucket[ix] - result[idx].Val = pair.Val - result[idx].SeriesMeta = prevMeta + result[idx].val = pair.Val + result[idx].seriesMeta = prevMeta } //clear remaining values for i := len(valIndexPairs); i < len(bucket); i++ { - result[bucket[i]].Val = math.NaN() + result[bucket[i]].val = math.NaN() } } return result diff --git a/src/query/functions/aggregation/take_test.go b/src/query/functions/aggregation/take_test.go index bf86af9c25..6a5ff4675d 100644 --- a/src/query/functions/aggregation/take_test.go +++ b/src/query/functions/aggregation/take_test.go @@ -41,17 +41,17 @@ func TestTakeInstantFn(t *testing.T) { buckets := [][]int{{0, 1, 2, 3}, {4}, {5, 6, 7, 8}} expectedMin := []ValueAndMeta{ - {Val: 1.1, SeriesMeta: seriesMetasTakeOrdered[0]}, - {Val: 2.1, SeriesMeta: seriesMetasTakeOrdered[1]}, - {Val: 3.1, SeriesMeta: seriesMetasTakeOrdered[2]}, - {Val: math.NaN(), SeriesMeta: seriesMetasTakeOrdered[3]}, + {val: 1.1, seriesMeta: seriesMetasTakeOrdered[0]}, + {val: 2.1, seriesMeta: seriesMetasTakeOrdered[1]}, + {val: 3.1, seriesMeta: seriesMetasTakeOrdered[2]}, + {val: math.NaN(), seriesMeta: seriesMetasTakeOrdered[3]}, - {Val: 5.1, SeriesMeta: seriesMetasTakeOrdered[4]}, + {val: 5.1, seriesMeta: seriesMetasTakeOrdered[4]}, - {Val: 6.1, SeriesMeta: seriesMetasTakeOrdered[5]}, - {Val: 7.1, SeriesMeta: seriesMetasTakeOrdered[6]}, - {Val: 8.1, SeriesMeta: seriesMetasTakeOrdered[7]}, - {Val: math.NaN(), SeriesMeta: seriesMetasTakeOrdered[8]}, + {val: 6.1, seriesMeta: seriesMetasTakeOrdered[5]}, + {val: 7.1, seriesMeta: seriesMetasTakeOrdered[6]}, + {val: 8.1, seriesMeta: seriesMetasTakeOrdered[7]}, + {val: math.NaN(), seriesMeta: seriesMetasTakeOrdered[8]}, } size := 3 @@ -65,17 +65,17 @@ func TestTakeInstantFn(t *testing.T) { valuesMax := []float64{1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1} expectedMax := []ValueAndMeta{ - {Val: 4.1, SeriesMeta: seriesMetasTakeOrdered[3]}, - {Val: 3.1, SeriesMeta: seriesMetasTakeOrdered[2]}, - {Val: 2.1, SeriesMeta: seriesMetasTakeOrdered[1]}, - {Val: math.NaN(), SeriesMeta: seriesMetasTakeOrdered[3]}, + {val: 4.1, seriesMeta: seriesMetasTakeOrdered[3]}, + {val: 3.1, seriesMeta: seriesMetasTakeOrdered[2]}, + {val: 2.1, seriesMeta: seriesMetasTakeOrdered[1]}, + {val: math.NaN(), seriesMeta: seriesMetasTakeOrdered[3]}, - {Val: 5.1, SeriesMeta: seriesMetasTakeOrdered[4]}, + {val: 5.1, seriesMeta: seriesMetasTakeOrdered[4]}, - {Val: 9.1, SeriesMeta: seriesMetasTakeOrdered[8]}, - {Val: 8.1, SeriesMeta: seriesMetasTakeOrdered[7]}, - {Val: 7.1, SeriesMeta: seriesMetasTakeOrdered[6]}, - {Val: math.NaN(), SeriesMeta: seriesMetasTakeOrdered[8]}, + {val: 9.1, seriesMeta: seriesMetasTakeOrdered[8]}, + {val: 8.1, seriesMeta: seriesMetasTakeOrdered[7]}, + {val: 7.1, seriesMeta: seriesMetasTakeOrdered[6]}, + {val: math.NaN(), seriesMeta: seriesMetasTakeOrdered[8]}, } maxHeap := utils.NewFloatHeap(true, size) diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index 29d15bbe0a..6a67035e11 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -157,7 +157,7 @@ func (fh *FloatHeap) Flush() []ValueIndexPair { // FlushInOrder flushes the float heap and returns values in order. func (fh *FloatHeap) FlushInOrder() []ValueIndexPair { - result := make([]ValueIndexPair, len(fh.floatHeap.heap)) + result := make([]ValueIndexPair, fh.Len()) for i := len(result) - 1; i >= 0; i-- { if e, ok := fh.Pop(); ok { diff --git a/src/query/functions/utils/heap_test.go b/src/query/functions/utils/heap_test.go index 298d5ad453..661ec403f7 100644 --- a/src/query/functions/utils/heap_test.go +++ b/src/query/functions/utils/heap_test.go @@ -232,6 +232,7 @@ func TestFlushOrdered(t *testing.T) { {Val: 2.1, Index: 2}, {Val: 1.1, Index: 1}, }, actualMax) + assert.Equal(t, 0, maxHeap.Len()) minHeap := NewFloatHeap(false, 3) minHeap.Push(0.1, 0) @@ -246,4 +247,5 @@ func TestFlushOrdered(t *testing.T) { {Val: 1.1, Index: 1}, {Val: 2.1, Index: 2}, }, actualMin) + assert.Equal(t, 0, minHeap.Len()) } From 5c7cf6510334f033ceb243b31d954bcd80e0675c Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Fri, 16 Oct 2020 15:39:20 +0300 Subject: [PATCH 04/16] fixed imports. made valueAndMeta struct package private. --- src/query/functions/aggregation/take.go | 21 ++++++++++---------- src/query/functions/aggregation/take_test.go | 10 +++++----- src/query/functions/utils/heap.go | 7 +++++-- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index 6c14c2fd27..cd2a79856e 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -22,13 +22,14 @@ package aggregation import ( "fmt" + "math" + "github.com/m3db/m3/src/query/block" "github.com/m3db/m3/src/query/executor/transform" "github.com/m3db/m3/src/query/functions/utils" "github.com/m3db/m3/src/query/models" "github.com/m3db/m3/src/query/parser" "github.com/m3db/m3/src/query/util" - "math" ) const ( @@ -38,13 +39,13 @@ const ( TopKType = "topk" ) -type ValueAndMeta struct { +type valueAndMeta struct { val float64 seriesMeta block.SeriesMeta } type takeFunc func(values []float64, buckets [][]int) []float64 -type takeInstantFunc func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []ValueAndMeta +type takeInstantFunc func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta // NewTakeOp creates a new takeK operation func NewTakeOp( @@ -64,7 +65,7 @@ func NewTakeOp( fn = func(values []float64, buckets [][]int) []float64 { return takeNone(values) } - fnInstant = func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []ValueAndMeta { + fnInstant = func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { return takeInstantNone(values, seriesMetas) } } else { @@ -72,7 +73,7 @@ func NewTakeOp( fn = func(values []float64, buckets [][]int) []float64 { return takeFn(heap, values, buckets) } - fnInstant = func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []ValueAndMeta { + fnInstant = func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { return takeInstantFn(heap, values, buckets, seriesMetas) } } @@ -217,8 +218,8 @@ func takeNone(values []float64) []float64 { return values } -func takeInstantNone(values []float64, seriesMetas []block.SeriesMeta) []ValueAndMeta { - result := make([]ValueAndMeta, len(values)) +func takeInstantNone(values []float64, seriesMetas []block.SeriesMeta) []valueAndMeta { + result := make([]valueAndMeta, len(values)) for i := range result { result[i].val = math.NaN() result[i].seriesMeta = seriesMetas[i] @@ -257,10 +258,10 @@ func takeFn(heap *utils.FloatHeap, values []float64, buckets [][]int) []float64 return values } -func takeInstantFn(heap *utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []ValueAndMeta { - var result = make([]ValueAndMeta, len(values)) +func takeInstantFn(heap *utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []valueAndMeta { + var result = make([]valueAndMeta, len(values)) for i := range result { - result[i] = ValueAndMeta{ + result[i] = valueAndMeta{ val: values[i], seriesMeta: metas[i], } diff --git a/src/query/functions/aggregation/take_test.go b/src/query/functions/aggregation/take_test.go index 6a5ff4675d..718c7f861a 100644 --- a/src/query/functions/aggregation/take_test.go +++ b/src/query/functions/aggregation/take_test.go @@ -22,6 +22,9 @@ package aggregation import ( "fmt" + "math" + "testing" + "github.com/m3db/m3/src/query/block" "github.com/m3db/m3/src/query/executor/transform" "github.com/m3db/m3/src/query/functions/utils" @@ -29,9 +32,6 @@ import ( "github.com/m3db/m3/src/query/parser" "github.com/m3db/m3/src/query/test" "github.com/m3db/m3/src/query/test/executor" - "math" - "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -40,7 +40,7 @@ func TestTakeInstantFn(t *testing.T) { valuesMin := []float64{1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1} buckets := [][]int{{0, 1, 2, 3}, {4}, {5, 6, 7, 8}} - expectedMin := []ValueAndMeta{ + expectedMin := []valueAndMeta{ {val: 1.1, seriesMeta: seriesMetasTakeOrdered[0]}, {val: 2.1, seriesMeta: seriesMetasTakeOrdered[1]}, {val: 3.1, seriesMeta: seriesMetasTakeOrdered[2]}, @@ -64,7 +64,7 @@ func TestTakeInstantFn(t *testing.T) { require.EqualValues(t, expectedString, actualString) valuesMax := []float64{1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1} - expectedMax := []ValueAndMeta{ + expectedMax := []valueAndMeta{ {val: 4.1, seriesMeta: seriesMetasTakeOrdered[3]}, {val: 3.1, seriesMeta: seriesMetasTakeOrdered[2]}, {val: 2.1, seriesMeta: seriesMetasTakeOrdered[1]}, diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index 6a67035e11..44ef7b016a 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -53,6 +53,8 @@ type FloatHeap struct { floatHeap *floatHeap } +const limitMaxValues = 10_000 + // NewFloatHeap builds a new FloatHeap based on first parameter // and a capacity given by second parameter. Zero and negative // values for maxSize provide an unbounded FloatHeap @@ -68,8 +70,9 @@ func NewFloatHeap(isMaxHeap bool, capacity int) *FloatHeap { capacity = 0 } - if capacity > 10_000 { - capacity = 10_000 + // we limit heap capacity because we don't want to go OOM + if capacity > limitMaxValues { + capacity = limitMaxValues } floatHeap := &floatHeap{ From 66746ac15713734554721c04ab815e01b80670ca Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Mon, 19 Oct 2020 13:20:24 +0300 Subject: [PATCH 05/16] Optimized instant queries - now only single block will be created. OrderedFlush() now sorts the results instead popping them one by one. FloatHeap won't be created if it's not used. --- src/query/functions/aggregation/take.go | 134 +++++++++++++++--------- src/query/functions/utils/heap.go | 45 ++++---- src/query/functions/utils/heap_test.go | 37 ++++++- 3 files changed, 140 insertions(+), 76 deletions(-) diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index cd2a79856e..a3eefe2f50 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -44,47 +44,43 @@ type valueAndMeta struct { seriesMeta block.SeriesMeta } -type takeFunc func(values []float64, buckets [][]int) []float64 -type takeInstantFunc func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta +type takeFunc func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 +type takeInstantFunc func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta +type takeValuesFunc func(values []float64, buckets [][]int) []float64 // NewTakeOp creates a new takeK operation func NewTakeOp( opType string, params NodeParams, ) (parser.Params, error) { - takeTop := opType == TopKType - if !takeTop && opType != BottomKType { - return baseOp{}, fmt.Errorf("operator not supported: %s", opType) - } - var fn takeFunc var fnInstant takeInstantFunc k := int(params.Parameter) if k < 1 { - fn = func(values []float64, buckets [][]int) []float64 { + fn = func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { return takeNone(values) } - fnInstant = func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { + fnInstant = func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { return takeInstantNone(values, seriesMetas) } } else { - heap := utils.NewFloatHeap(takeTop, k) - fn = func(values []float64, buckets [][]int) []float64 { + fn = func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { return takeFn(heap, values, buckets) } - fnInstant = func(values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { + fnInstant = func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { return takeInstantFn(heap, values, buckets, seriesMetas) } } - return newTakeOp(params, opType, fn, fnInstant), nil + return newTakeOp(params, opType, k, fn, fnInstant), nil } // takeOp stores required properties for take ops type takeOp struct { params NodeParams opType string + k int takeFunc takeFunc takeInstantFunc takeInstantFunc } @@ -110,10 +106,11 @@ func (o takeOp) Node( } } -func newTakeOp(params NodeParams, opType string, takeFunc takeFunc, takeInstantFunc takeInstantFunc) takeOp { +func newTakeOp(params NodeParams, opType string, k int, takeFunc takeFunc, takeInstantFunc takeInstantFunc) takeOp { return takeOp{ params: params, opType: opType, + k: k, takeFunc: takeFunc, takeInstantFunc: takeInstantFunc, } @@ -143,6 +140,11 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, } instantaneous := queryCtx.Options.Instantaneous + takeTop := n.op.opType == TopKType + if !takeTop && n.op.opType != BottomKType { + return nil, fmt.Errorf("operator not supported: %s", n.op.opType) + } + params := n.op.params meta := b.Meta() seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta()) @@ -152,60 +154,92 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, []byte(n.op.opType), seriesMetas, ) - builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) - if err != nil { - return nil, err - } - - if err = builder.AddCols(stepIter.StepCount()); err != nil { - return nil, err - } - + seriesCount := len(seriesMetas) if instantaneous { - if err := n.appendValuesInstant(builder, stepIter, seriesMetas, buckets); err != nil { + heap := utils.NewFloatHeap(takeTop, utils.Min(n.op.k, seriesCount)) + return n.processBlockInstantaneous(heap, queryCtx, b.Meta(), stepIter, seriesMetas, buckets) + } else { + fnTake := n.resolveTakeFn(seriesCount, takeTop) + builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) + if err != nil { return nil, err } - } else { + + if err = builder.AddCols(stepIter.StepCount()); err != nil { + return nil, err + } + for index := 0; stepIter.Next(); index++ { values := stepIter.Current().Values() - aggregatedValues := n.op.takeFunc(values, buckets) - if err := builder.AppendValues(index, aggregatedValues); err != nil { + if err := builder.AppendValues(index, fnTake(values, buckets)); err != nil { return nil, err } } + if err = stepIter.Err(); err != nil { + return nil, err + } + return builder.Build(), nil } +} - if err = stepIter.Err(); err != nil { - return nil, err +func (n *takeNode) resolveTakeFn(seriesCount int, takeTop bool) takeValuesFunc { + if n.op.k < seriesCount { + heap := utils.NewFloatHeap(takeTop, n.op.k) + return func (values []float64, buckets [][]int) []float64 { + return n.op.takeFunc(heap, values, buckets) + } } - return builder.Build(), nil + return func (values []float64, buckets [][]int) []float64 { + return values + } } -func (n *takeNode) appendValuesInstant(builder block.Builder, stepIter block.StepIter, seriesMetas []block.SeriesMeta, buckets [][]int) error { +func (n *takeNode) processBlockInstantaneous( + heap utils.FloatHeap, + queryCtx *models.QueryContext, + metadata block.Metadata, + stepIter block.StepIter, + seriesMetas []block.SeriesMeta, + buckets [][]int) (block.Block, error) { stepCount := stepIter.StepCount() - for index := 0; stepIter.Next(); index++ { - values := stepIter.Current().Values() - - if err := builder.AppendValues(index, values); err != nil { - return err - } + for index := 0; stepIter.Next(); index++ { if isLastStep(index, stepCount) { //we only care for the last step values for the instant query - aggregatedValues := n.op.takeInstantFunc(values, buckets, seriesMetas) - //apply series metas in order - var rowValues = make([]float64, stepCount) - for i, value := range aggregatedValues { - rowValues[len(rowValues)-1] = value.val - if err := builder.SetRow(i, rowValues, value.seriesMeta); err != nil { - return err - } + step := stepIter.Current() + values := step.Values() + takenSortedValues := n.op.takeInstantFunc(heap, values, buckets, seriesMetas) + for i := range takenSortedValues { + values[i] = takenSortedValues[i].val + seriesMetas[i] = takenSortedValues[i].seriesMeta + } + + //adjust bounds to contain single step + time, err := metadata.Bounds.TimeForIndex(index) + if err != nil { + return nil, err } + metadata.Bounds = models.Bounds{ + Start: time, + Duration: metadata.Bounds.StepSize, + StepSize: metadata.Bounds.StepSize, + } + + blockBuilder, err := n.controller.BlockBuilder(queryCtx, metadata, seriesMetas) + if err != nil { + return nil, err + } + if err = blockBuilder.AddCols(1); err != nil { + return nil, err + } + if err := blockBuilder.AppendValues(0, values); err != nil { + return nil, err + } + return blockBuilder.Build(), nil } } - - return nil + return nil, fmt.Errorf("no data to return: %s", n.op.opType) } func isLastStep(stepIndex int, stepCount int) bool { @@ -227,7 +261,7 @@ func takeInstantNone(values []float64, seriesMetas []block.SeriesMeta) []valueAn return result } -func takeFn(heap *utils.FloatHeap, values []float64, buckets [][]int) []float64 { +func takeFn(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { capacity := heap.Cap() for _, bucket := range buckets { // If this bucket's length is less than or equal to the heap's @@ -258,7 +292,7 @@ func takeFn(heap *utils.FloatHeap, values []float64, buckets [][]int) []float64 return values } -func takeInstantFn(heap *utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []valueAndMeta { +func takeInstantFn(heap utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []valueAndMeta { var result = make([]valueAndMeta, len(values)) for i := range result { result[i] = valueAndMeta{ @@ -275,7 +309,7 @@ func takeInstantFn(heap *utils.FloatHeap, values []float64, buckets [][]int, met } } - valIndexPairs := heap.FlushInOrder() + valIndexPairs := heap.OrderedFlush() for ix, pair := range valIndexPairs { prevIndex := pair.Index prevMeta := metas[prevIndex] diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index 44ef7b016a..f213a70476 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -22,6 +22,7 @@ package utils import ( "container/heap" + "sort" ) // ValueIndexPair is a pair of float value and index at which it exists @@ -30,22 +31,29 @@ type ValueIndexPair struct { Index int } -type lessFn func(*ValueIndexPair, *ValueIndexPair) bool +type lessFn func(ValueIndexPair, ValueIndexPair) bool -func maxHeapLess(i, j *ValueIndexPair) bool { +func maxHeapLess(i, j ValueIndexPair) bool { if i.Val == j.Val { return i.Index > j.Index } return i.Val < j.Val } -func minHeapLess(i, j *ValueIndexPair) bool { +func minHeapLess(i, j ValueIndexPair) bool { if i.Val == j.Val { return i.Index > j.Index } return i.Val > j.Val } +func Min(a, b int) int { + if a < b { + return a + } + return b +} + // FloatHeap is a heap that can be given a maximum size type FloatHeap struct { isMaxHeap bool @@ -53,12 +61,10 @@ type FloatHeap struct { floatHeap *floatHeap } -const limitMaxValues = 10_000 - // NewFloatHeap builds a new FloatHeap based on first parameter // and a capacity given by second parameter. Zero and negative // values for maxSize provide an unbounded FloatHeap -func NewFloatHeap(isMaxHeap bool, capacity int) *FloatHeap { +func NewFloatHeap(isMaxHeap bool, capacity int) FloatHeap { var less lessFn if isMaxHeap { less = maxHeapLess @@ -70,18 +76,13 @@ func NewFloatHeap(isMaxHeap bool, capacity int) *FloatHeap { capacity = 0 } - // we limit heap capacity because we don't want to go OOM - if capacity > limitMaxValues { - capacity = limitMaxValues - } - floatHeap := &floatHeap{ heap: make([]ValueIndexPair, 0, capacity), less: less, } heap.Init(floatHeap) - return &FloatHeap{ + return FloatHeap{ isMaxHeap: isMaxHeap, capacity: capacity, floatHeap: floatHeap, @@ -158,17 +159,13 @@ func (fh *FloatHeap) Flush() []ValueIndexPair { return values } -// FlushInOrder flushes the float heap and returns values in order. -func (fh *FloatHeap) FlushInOrder() []ValueIndexPair { - result := make([]ValueIndexPair, fh.Len()) - - for i := len(result) - 1; i >= 0; i-- { - if e, ok := fh.Pop(); ok { - result[i] = e - } - } - - return result +// OrderedFlush flushes the float heap and returns values in order. +func (fh *FloatHeap) OrderedFlush() []ValueIndexPair { + flushed := fh.Flush() + sort.Slice(flushed, func(i, j int) bool { + return fh.floatHeap.less(flushed[j], flushed[i]) + }) + return flushed } // Peek reveals the top value of the heap without mutating the heap. @@ -196,7 +193,7 @@ func (h *floatHeap) Len() int { // Less is true if value of i is less than value of j func (h *floatHeap) Less(i, j int) bool { - return h.less(&h.heap[i], &h.heap[j]) + return h.less(h.heap[i], h.heap[j]) } // Swap swaps values at these indices diff --git a/src/query/functions/utils/heap_test.go b/src/query/functions/utils/heap_test.go index 661ec403f7..05d88c8218 100644 --- a/src/query/functions/utils/heap_test.go +++ b/src/query/functions/utils/heap_test.go @@ -225,7 +225,7 @@ func TestFlushOrdered(t *testing.T) { maxHeap.Push(2.1, 2) maxHeap.Push(3.1, 3) - actualMax := maxHeap.FlushInOrder() + actualMax := maxHeap.OrderedFlush() assert.Equal(t, []ValueIndexPair{ {Val: 3.1, Index: 3}, @@ -240,7 +240,40 @@ func TestFlushOrdered(t *testing.T) { minHeap.Push(2.1, 2) minHeap.Push(3.1, 3) - actualMin := minHeap.FlushInOrder() + actualMin := minHeap.OrderedFlush() + + assert.Equal(t, []ValueIndexPair{ + {Val: 0.1, Index: 0}, + {Val: 1.1, Index: 1}, + {Val: 2.1, Index: 2}, + }, actualMin) + assert.Equal(t, 0, minHeap.Len()) +} + +func TestFlushOrderedWhenRandomInsertionOrder(t *testing.T) { + maxHeap := NewFloatHeap(true, 3) + + maxHeap.Push(0.1, 0) + maxHeap.Push(2.1, 2) + maxHeap.Push(1.1, 1) + maxHeap.Push(3.1, 3) + + actualMax := maxHeap.OrderedFlush() + + assert.Equal(t, []ValueIndexPair{ + {Val: 3.1, Index: 3}, + {Val: 2.1, Index: 2}, + {Val: 1.1, Index: 1}, + }, actualMax) + assert.Equal(t, 0, maxHeap.Len()) + + minHeap := NewFloatHeap(false, 3) + minHeap.Push(0.1, 0) + minHeap.Push(2.1, 2) + minHeap.Push(1.1, 1) + minHeap.Push(3.1, 3) + + actualMin := minHeap.OrderedFlush() assert.Equal(t, []ValueIndexPair{ {Val: 0.1, Index: 0}, From 3d12b53c6bb7fb2a5cbd174729cf01ecdfae767f Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Tue, 20 Oct 2020 13:17:09 +0300 Subject: [PATCH 06/16] improved how max series count is calculated --- src/query/functions/aggregation/take.go | 20 ++++++++++++++++---- src/query/functions/aggregation/take_test.go | 15 +++++++-------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index a3eefe2f50..82c535833e 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -154,10 +154,11 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, []byte(n.op.opType), seriesMetas, ) - seriesCount := len(seriesMetas) + + seriesCount := maxSeriesCount(buckets) if instantaneous { heap := utils.NewFloatHeap(takeTop, utils.Min(n.op.k, seriesCount)) - return n.processBlockInstantaneous(heap, queryCtx, b.Meta(), stepIter, seriesMetas, buckets) + return n.processBlockInstantaneous(heap, queryCtx, meta, stepIter, seriesMetas, buckets) } else { fnTake := n.resolveTakeFn(seriesCount, takeTop) builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) @@ -182,6 +183,18 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, } } +func maxSeriesCount(buckets [][]int) int { + result := 0 + + for _, bucket := range buckets { + if len(bucket) > result { + result = len(bucket) + } + } + + return result +} + func (n *takeNode) resolveTakeFn(seriesCount int, takeTop bool) takeValuesFunc { if n.op.k < seriesCount { heap := utils.NewFloatHeap(takeTop, n.op.k) @@ -207,8 +220,7 @@ func (n *takeNode) processBlockInstantaneous( for index := 0; stepIter.Next(); index++ { if isLastStep(index, stepCount) { //we only care for the last step values for the instant query - step := stepIter.Current() - values := step.Values() + values := stepIter.Current().Values() takenSortedValues := n.op.takeInstantFunc(heap, values, buckets, seriesMetas) for i := range takenSortedValues { values[i] = takenSortedValues[i].val diff --git a/src/query/functions/aggregation/take_test.go b/src/query/functions/aggregation/take_test.go index 718c7f861a..08ffcd8cc0 100644 --- a/src/query/functions/aggregation/take_test.go +++ b/src/query/functions/aggregation/take_test.go @@ -32,7 +32,6 @@ import ( "github.com/m3db/m3/src/query/parser" "github.com/m3db/m3/src/query/test" "github.com/m3db/m3/src/query/test/executor" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -83,7 +82,7 @@ func TestTakeInstantFn(t *testing.T) { actualString = fmt.Sprint(actual) expectedString = fmt.Sprint(expectedMax) - assert.EqualValues(t, expectedString, actualString) + require.EqualValues(t, expectedString, actualString) } func TestTakeFn(t *testing.T) { @@ -152,9 +151,9 @@ func TestTakeBottomFunctionFilteringWithoutA(t *testing.T) { } // Should have the same metas as when started - assert.Equal(t, seriesMetas, sink.Metas) + require.Equal(t, seriesMetas, sink.Metas) test.EqualsWithNansWithDelta(t, expected, sink.Values, math.Pow10(-5)) - assert.Equal(t, bounds, sink.Meta.Bounds) + require.Equal(t, bounds, sink.Meta.Bounds) } func TestTakeTopFunctionFilteringWithoutA(t *testing.T) { @@ -177,9 +176,9 @@ func TestTakeTopFunctionFilteringWithoutA(t *testing.T) { } // Should have the same metas as when started - assert.Equal(t, seriesMetas, sink.Metas) + require.Equal(t, seriesMetas, sink.Metas) test.EqualsWithNansWithDelta(t, expected, sink.Values, math.Pow10(-5)) - assert.Equal(t, bounds, sink.Meta.Bounds) + require.Equal(t, bounds, sink.Meta.Bounds) } func TestTakeTopFunctionFilteringWithoutALessThanOne(t *testing.T) { @@ -201,7 +200,7 @@ func TestTakeTopFunctionFilteringWithoutALessThanOne(t *testing.T) { } // Should have the same metas as when started - assert.Equal(t, seriesMetas, sink.Metas) + require.Equal(t, seriesMetas, sink.Metas) test.EqualsWithNansWithDelta(t, expected, sink.Values, math.Pow10(-5)) - assert.Equal(t, bounds, sink.Meta.Bounds) + require.Equal(t, bounds, sink.Meta.Bounds) } From d8a5cc948e0bad506d74233ecb5ed8619cc4e1d8 Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Tue, 20 Oct 2020 15:51:50 +0300 Subject: [PATCH 07/16] removed unused method --- src/query/functions/utils/heap.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index f213a70476..1b44ff5aea 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -129,14 +129,6 @@ func (fh *FloatHeap) Push(value float64, index int) { }) } -func (fh *FloatHeap) Pop() (ValueIndexPair, bool) { - h := fh.floatHeap.heap - if len(h) == 0 { - return ValueIndexPair{}, false - } - return heap.Pop(fh.floatHeap).(ValueIndexPair), true -} - // Len returns the current length of the heap func (fh *FloatHeap) Len() int { return fh.floatHeap.Len() From a7496e1018cd212aff31f223f5d10242f5c4025d Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Wed, 21 Oct 2020 14:59:16 +0300 Subject: [PATCH 08/16] initial implementation --- src/query/functions/linear/sort.go | 175 +++++++++++++++++- src/query/functions/linear/sort_test.go | 38 ++++ src/query/parser/promql/matchers.go | 5 +- .../testdata/functions_custom.test | 34 ++++ 4 files changed, 247 insertions(+), 5 deletions(-) create mode 100644 src/query/functions/linear/sort_test.go create mode 100644 src/query/test/compatibility/testdata/functions_custom.test diff --git a/src/query/functions/linear/sort.go b/src/query/functions/linear/sort.go index b0f7b88665..cd56bb164f 100644 --- a/src/query/functions/linear/sort.go +++ b/src/query/functions/linear/sort.go @@ -20,13 +20,182 @@ package linear -const ( - // NB: Because Prometheus's sort and sort_desc only look at the last value, - // these functions are essentially noops in M3 as we don't support instant queries. +import ( + "fmt" + "math" + "sort" + + "github.com/m3db/m3/src/query/block" + "github.com/m3db/m3/src/query/executor/transform" + "github.com/m3db/m3/src/query/functions/utils" + "github.com/m3db/m3/src/query/models" + "github.com/m3db/m3/src/query/parser" +) +const ( // SortType returns timeseries elements sorted by their values, in ascending order. SortType = "sort" // SortDescType is the same as sort, but sorts in descending order. SortDescType = "sort_desc" ) + +type sortOp struct { + opType string + lessFn lessFn +} + +// OpType for the operator +func (o sortOp) OpType() string { + return o.opType +} + +// String representation +func (o sortOp) String() string { + return fmt.Sprintf("type: %s", o.opType) +} + +type sortNode struct { + op sortOp + controller *transform.Controller +} + +type valueAndMeta struct { + val float64 + seriesMeta block.SeriesMeta +} + +type lessFn func (i, j float64) bool + +func asc(i, j float64) bool { + return i < j || math.IsNaN(j) && !math.IsNaN(i) +} + +func desc(i, j float64) bool { + return i > j || math.IsNaN(j) && !math.IsNaN(i) +} + +// Node creates an execution node +func (o sortOp) Node( + controller *transform.Controller, + _ transform.Options, +) transform.OpNode { + return &sortNode{ + op: o, + controller: controller, + } +} + +func (n *sortNode) Params() parser.Params { + return n.op +} + +func (n *sortNode) Process(queryCtx *models.QueryContext, ID parser.NodeID, b block.Block) error { + return transform.ProcessSimpleBlock(n, n.controller, queryCtx, ID, b) +} + +func (n *sortNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, b block.Block) (block.Block, error) { + stepIter, err := b.StepIter() + if err != nil { + return nil, err + } + + instantaneous := queryCtx.Options.Instantaneous + + meta := b.Meta() + seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta()) + stepCount := stepIter.StepCount() + + if !instantaneous { + builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) + if err != nil { + return nil, err + } + + if err := builder.AddCols(stepCount); err != nil { + return nil, err + } + + for index := 0; stepIter.Next(); index++ { + if err := builder.AppendValues(index, stepIter.Current().Values()); err != nil { + return nil, err + } + } + + if err = stepIter.Err(); err != nil { + return nil, err + } + return builder.Build(), nil + } + + for index := 0; stepIter.Next(); index++ { + if isLastStep(index, stepCount) { + //we only care for the last step values for the instant query + values := stepIter.Current().Values() + valuesToSort := make([]valueAndMeta, len(values)) + for i, value := range values { + valuesToSort[i] = valueAndMeta{ + val: value, + seriesMeta: seriesMetas[i], + } + } + + sort.Slice(valuesToSort, func(i, j int) bool { + return n.op.lessFn(valuesToSort[i].val, valuesToSort[j].val) + }) + + for i := range valuesToSort { + values[i] = valuesToSort[i].val + seriesMetas[i] = valuesToSort[i].seriesMeta + } + + //adjust bounds to contain single step + time, err := meta.Bounds.TimeForIndex(index) + if err != nil { + return nil, err + } + meta.Bounds = models.Bounds{ + Start: time, + Duration: meta.Bounds.StepSize, + StepSize: meta.Bounds.StepSize, + } + + blockBuilder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) + if err != nil { + return nil, err + } + if err = blockBuilder.AddCols(1); err != nil { + return nil, err + } + if err := blockBuilder.AppendValues(0, values); err != nil { + return nil, err + } + if err = stepIter.Err(); err != nil { + return nil, err + } + return blockBuilder.Build(), nil + } + } + + return nil, fmt.Errorf("no data to return: %s", n.op.opType) +} + +func isLastStep(stepIndex int, stepCount int) bool { + return stepIndex == stepCount-1 +} + +func NewSortOp(opType string) (parser.Params, error) { + ascending := opType == SortType + if !ascending && opType != SortDescType { + return nil, fmt.Errorf("operator not supported: %s", opType) + } + + var lessFn lessFn + if ascending { + lessFn = asc + } else { + lessFn = desc + } + + return sortOp{opType, lessFn}, nil +} diff --git a/src/query/functions/linear/sort_test.go b/src/query/functions/linear/sort_test.go new file mode 100644 index 0000000000..44e6222c17 --- /dev/null +++ b/src/query/functions/linear/sort_test.go @@ -0,0 +1,38 @@ +package linear + +import ( + "math" + "sort" + "testing" + + "github.com/m3db/m3/src/query/test" + + "github.com/stretchr/testify/require" +) + +func TestShouldFailWhenOpTypeIsInvalid(t *testing.T) { + _, err := NewSortOp("sortAsc") + require.Error(t, err) +} + +func TestAscSort(t *testing.T) { + actual := []float64{ 5.0, 4.1, math.NaN(), 8.6, 0.1 } + expected := []float64{ 0.1, 4.1, 5.0, 8.6, math.NaN() } + + sort.Slice(actual, func(i, j int) bool { + return asc(actual[i], actual[j]) + }) + + test.EqualsWithNans(t, expected, actual) +} + +func TestDescSort(t *testing.T) { + actual := []float64{ 5.0, 4.1, math.NaN(), 8.6, 0.1 } + expected := []float64{ 8.6, 5.0, 4.1, 0.1, math.NaN() } + + sort.Slice(actual, func(i, j int) bool { + return desc(actual[i], actual[j]) + }) + + test.EqualsWithNans(t, expected, actual) +} \ No newline at end of file diff --git a/src/query/parser/promql/matchers.go b/src/query/parser/promql/matchers.go index af1237cbd3..7a78b1f7b0 100644 --- a/src/query/parser/promql/matchers.go +++ b/src/query/parser/promql/matchers.go @@ -262,10 +262,11 @@ func NewFunctionExpr( p, err = scalar.NewTimeOp(tagOptions) return p, true, err - // NB: no-ops. case linear.SortType, linear.SortDescType: - return nil, false, err + p, err = linear.NewSortOp(name) + return p, true, err + // NB: no-ops. case scalar.ScalarType: return nil, false, err diff --git a/src/query/test/compatibility/testdata/functions_custom.test b/src/query/test/compatibility/testdata/functions_custom.test new file mode 100644 index 0000000000..cd1c9475ba --- /dev/null +++ b/src/query/test/compatibility/testdata/functions_custom.test @@ -0,0 +1,34 @@ +# Tests for sort/sort_desc. +clear +load 5m + http_requests{job="api-server", instance="0", group="production"} 0+10x10 + http_requests{job="api-server", instance="1", group="production"} 0+20x10 + http_requests{job="api-server", instance="0", group="canary"} 0+30x10 + http_requests{job="api-server", instance="1", group="canary"} 0+40x10 + http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN + http_requests{job="app-server", instance="0", group="production"} 0+50x10 + http_requests{job="app-server", instance="1", group="production"} 0+60x10 + http_requests{job="app-server", instance="0", group="canary"} 0+70x10 + http_requests{job="app-server", instance="1", group="canary"} 0+80x10 + +eval_ordered instant at 50m sort(http_requests) + http_requests{group="production", instance="0", job="api-server"} 100 + http_requests{group="production", instance="1", job="api-server"} 200 + http_requests{group="canary", instance="0", job="api-server"} 300 + http_requests{group="canary", instance="1", job="api-server"} 400 + http_requests{group="production", instance="0", job="app-server"} 500 + http_requests{group="production", instance="1", job="app-server"} 600 + http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="canary", instance="1", job="app-server"} 800 + +eval_ordered instant at 50m sort_desc(http_requests) + http_requests{group="canary", instance="1", job="app-server"} 800 + http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="production", instance="1", job="app-server"} 600 + http_requests{group="production", instance="0", job="app-server"} 500 + http_requests{group="canary", instance="1", job="api-server"} 400 + http_requests{group="canary", instance="0", job="api-server"} 300 + http_requests{group="production", instance="1", job="api-server"} 200 + http_requests{group="production", instance="0", job="api-server"} 100 + +clear \ No newline at end of file From 39eb74626a526941dd1670188a80766ad762e8d4 Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Thu, 22 Oct 2020 17:38:57 +0300 Subject: [PATCH 09/16] Fixes NaN issues with m3 queries for sort and topk/bottomk. FloatHeap now supports adding NaN values. Uncommented topk, bottomk and sort compatibility tests (now they pass). --- .../api/v1/handler/prometheus/native/read.go | 13 +++- src/query/block/meta.go | 11 ++++ src/query/functions/aggregation/take.go | 52 ++++++--------- src/query/functions/aggregation/take_test.go | 4 -- src/query/functions/linear/sort.go | 66 +++++++++---------- src/query/functions/linear/sort_test.go | 5 +- src/query/functions/utils/heap.go | 17 +++-- src/query/functions/utils/heap_test.go | 44 +++++++++++++ src/query/parser/promql/parse_test.go | 6 +- .../compatibility/testdata/aggregators.test | 24 +++---- .../compatibility/testdata/functions.test | 43 ++++++------ .../testdata/functions_custom.test | 34 ---------- 12 files changed, 171 insertions(+), 148 deletions(-) delete mode 100644 src/query/test/compatibility/testdata/functions_custom.test diff --git a/src/query/api/v1/handler/prometheus/native/read.go b/src/query/api/v1/handler/prometheus/native/read.go index c5f8a958ba..9a3b0f7136 100644 --- a/src/query/api/v1/handler/prometheus/native/read.go +++ b/src/query/api/v1/handler/prometheus/native/read.go @@ -27,6 +27,7 @@ import ( "github.com/m3db/m3/src/query/api/v1/handler" "github.com/m3db/m3/src/query/api/v1/handler/prometheus/handleroptions" "github.com/m3db/m3/src/query/api/v1/options" + "github.com/m3db/m3/src/query/block" "github.com/m3db/m3/src/query/models" "github.com/m3db/m3/src/query/util/logging" xhttp "github.com/m3db/m3/src/x/net/http" @@ -135,8 +136,16 @@ func (h *promReadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { handleroptions.AddWarningHeaders(w, result.Meta) h.promReadMetrics.fetchSuccess.Inc(1) + keepNans := h.opts.Config().ResultOptions.KeepNans + switch result.Meta.KeepNans { + case block.BoolFalse: + keepNans = false + case block.BoolTrue: + keepNans = true + } + if h.instant { - renderResultsInstantaneousJSON(w, result, h.opts.Config().ResultOptions.KeepNans) + renderResultsInstantaneousJSON(w, result, keepNans) return } @@ -148,7 +157,7 @@ func (h *promReadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { err = RenderResultsJSON(w, result, RenderResultsOptions{ Start: parsedOptions.Params.Start, End: parsedOptions.Params.End, - KeepNaNs: h.opts.Config().ResultOptions.KeepNans, + KeepNaNs: keepNans, }) if err != nil { diff --git a/src/query/block/meta.go b/src/query/block/meta.go index 85bdc98347..38ccc303aa 100644 --- a/src/query/block/meta.go +++ b/src/query/block/meta.go @@ -55,6 +55,15 @@ func (m Metadata) String() string { // Warnings is a slice of warnings. type Warnings []Warning +// Nullable bool +type Bool int + +const ( + BoolNull = iota + BoolFalse + BoolTrue +) + // ResultMetadata describes metadata common to each type of query results, // indicating any additional information about the result. type ResultMetadata struct { @@ -68,6 +77,8 @@ type ResultMetadata struct { Warnings Warnings // Resolutions is a list of resolutions for series obtained by this query. Resolutions []time.Duration + // KeepNans indicates if NaNs should be kept when returning results + KeepNans Bool } // NewResultMetadata creates a new result metadata. diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index 82c535833e..e2605629d0 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -209,22 +209,25 @@ func (n *takeNode) resolveTakeFn(seriesCount int, takeTop bool) takeValuesFunc { } func (n *takeNode) processBlockInstantaneous( - heap utils.FloatHeap, - queryCtx *models.QueryContext, - metadata block.Metadata, - stepIter block.StepIter, - seriesMetas []block.SeriesMeta, - buckets [][]int) (block.Block, error) { + heap utils.FloatHeap, + queryCtx *models.QueryContext, + metadata block.Metadata, + stepIter block.StepIter, + seriesMetas []block.SeriesMeta, + buckets [][]int) (block.Block, error) { stepCount := stepIter.StepCount() - + metadata.ResultMetadata.KeepNans = block.BoolTrue for index := 0; stepIter.Next(); index++ { if isLastStep(index, stepCount) { //we only care for the last step values for the instant query values := stepIter.Current().Values() takenSortedValues := n.op.takeInstantFunc(heap, values, buckets, seriesMetas) + + blockValues := make([]float64, len(takenSortedValues)) + blockSeries := make([]block.SeriesMeta, len(takenSortedValues)) for i := range takenSortedValues { - values[i] = takenSortedValues[i].val - seriesMetas[i] = takenSortedValues[i].seriesMeta + blockValues[i] = takenSortedValues[i].val + blockSeries[i] = takenSortedValues[i].seriesMeta } //adjust bounds to contain single step @@ -238,14 +241,14 @@ func (n *takeNode) processBlockInstantaneous( StepSize: metadata.Bounds.StepSize, } - blockBuilder, err := n.controller.BlockBuilder(queryCtx, metadata, seriesMetas) + blockBuilder, err := n.controller.BlockBuilder(queryCtx, metadata, blockSeries) if err != nil { return nil, err } if err = blockBuilder.AddCols(1); err != nil { return nil, err } - if err := blockBuilder.AppendValues(0, values); err != nil { + if err := blockBuilder.AppendValues(0, blockValues); err != nil { return nil, err } return blockBuilder.Build(), nil @@ -305,35 +308,22 @@ func takeFn(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { } func takeInstantFn(heap utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []valueAndMeta { - var result = make([]valueAndMeta, len(values)) - for i := range result { - result[i] = valueAndMeta{ - val: values[i], - seriesMeta: metas[i], - } - } - + var result []valueAndMeta for _, bucket := range buckets { for _, idx := range bucket { val := values[idx] - if !math.IsNaN(val) { - heap.Push(val, idx) - } + heap.Push(val, idx) } valIndexPairs := heap.OrderedFlush() - for ix, pair := range valIndexPairs { + for _, pair := range valIndexPairs { prevIndex := pair.Index prevMeta := metas[prevIndex] - idx := bucket[ix] - - result[idx].val = pair.Val - result[idx].seriesMeta = prevMeta - } - //clear remaining values - for i := len(valIndexPairs); i < len(bucket); i++ { - result[bucket[i]].val = math.NaN() + result = append(result, valueAndMeta{ + val: pair.Val, + seriesMeta: prevMeta, + }) } } return result diff --git a/src/query/functions/aggregation/take_test.go b/src/query/functions/aggregation/take_test.go index 08ffcd8cc0..3df9d8fbab 100644 --- a/src/query/functions/aggregation/take_test.go +++ b/src/query/functions/aggregation/take_test.go @@ -43,14 +43,12 @@ func TestTakeInstantFn(t *testing.T) { {val: 1.1, seriesMeta: seriesMetasTakeOrdered[0]}, {val: 2.1, seriesMeta: seriesMetasTakeOrdered[1]}, {val: 3.1, seriesMeta: seriesMetasTakeOrdered[2]}, - {val: math.NaN(), seriesMeta: seriesMetasTakeOrdered[3]}, {val: 5.1, seriesMeta: seriesMetasTakeOrdered[4]}, {val: 6.1, seriesMeta: seriesMetasTakeOrdered[5]}, {val: 7.1, seriesMeta: seriesMetasTakeOrdered[6]}, {val: 8.1, seriesMeta: seriesMetasTakeOrdered[7]}, - {val: math.NaN(), seriesMeta: seriesMetasTakeOrdered[8]}, } size := 3 @@ -67,14 +65,12 @@ func TestTakeInstantFn(t *testing.T) { {val: 4.1, seriesMeta: seriesMetasTakeOrdered[3]}, {val: 3.1, seriesMeta: seriesMetasTakeOrdered[2]}, {val: 2.1, seriesMeta: seriesMetasTakeOrdered[1]}, - {val: math.NaN(), seriesMeta: seriesMetasTakeOrdered[3]}, {val: 5.1, seriesMeta: seriesMetasTakeOrdered[4]}, {val: 9.1, seriesMeta: seriesMetasTakeOrdered[8]}, {val: 8.1, seriesMeta: seriesMetasTakeOrdered[7]}, {val: 7.1, seriesMeta: seriesMetasTakeOrdered[6]}, - {val: math.NaN(), seriesMeta: seriesMetasTakeOrdered[8]}, } maxHeap := utils.NewFloatHeap(true, size) diff --git a/src/query/functions/linear/sort.go b/src/query/functions/linear/sort.go index cd56bb164f..314645a0f2 100644 --- a/src/query/functions/linear/sort.go +++ b/src/query/functions/linear/sort.go @@ -22,7 +22,6 @@ package linear import ( "fmt" - "math" "sort" "github.com/m3db/m3/src/query/block" @@ -67,14 +66,6 @@ type valueAndMeta struct { type lessFn func (i, j float64) bool -func asc(i, j float64) bool { - return i < j || math.IsNaN(j) && !math.IsNaN(i) -} - -func desc(i, j float64) bool { - return i > j || math.IsNaN(j) && !math.IsNaN(i) -} - // Node creates an execution node func (o sortOp) Node( controller *transform.Controller, @@ -101,35 +92,19 @@ func (n *sortNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, } instantaneous := queryCtx.Options.Instantaneous - meta := b.Meta() seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta()) - stepCount := stepIter.StepCount() - - if !instantaneous { - builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) - if err != nil { - return nil, err - } - - if err := builder.AddCols(stepCount); err != nil { - return nil, err - } - for index := 0; stepIter.Next(); index++ { - if err := builder.AppendValues(index, stepIter.Current().Values()); err != nil { - return nil, err - } - } - - if err = stepIter.Err(); err != nil { - return nil, err - } - return builder.Build(), nil + if instantaneous { + return n.processInstantBlock(queryCtx, stepIter, meta, seriesMetas) } + return n.processNopBlock(queryCtx, stepIter, meta, seriesMetas) +} +func (n *sortNode) processInstantBlock(queryCtx *models.QueryContext, stepIter block.StepIter, meta block.Metadata, seriesMetas []block.SeriesMeta) (block.Block, error) { for index := 0; stepIter.Next(); index++ { - if isLastStep(index, stepCount) { + if isLastStep(index, stepIter.StepCount()) { + meta.ResultMetadata.KeepNans = block.BoolTrue //we only care for the last step values for the instant query values := stepIter.Current().Values() valuesToSort := make([]valueAndMeta, len(values)) @@ -176,10 +151,31 @@ func (n *sortNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, return blockBuilder.Build(), nil } } - return nil, fmt.Errorf("no data to return: %s", n.op.opType) } +func (n *sortNode) processNopBlock(queryCtx *models.QueryContext, stepIter block.StepIter, meta block.Metadata, seriesMetas []block.SeriesMeta) (block.Block, error) { + builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) + if err != nil { + return nil, err + } + + if err := builder.AddCols(stepIter.StepCount()); err != nil { + return nil, err + } + + for index := 0; stepIter.Next(); index++ { + if err := builder.AppendValues(index, stepIter.Current().Values()); err != nil { + return nil, err + } + } + + if err = stepIter.Err(); err != nil { + return nil, err + } + return builder.Build(), nil +} + func isLastStep(stepIndex int, stepCount int) bool { return stepIndex == stepCount-1 } @@ -192,9 +188,9 @@ func NewSortOp(opType string) (parser.Params, error) { var lessFn lessFn if ascending { - lessFn = asc + lessFn = utils.AscFloat64 } else { - lessFn = desc + lessFn = utils.DescFloat64 } return sortOp{opType, lessFn}, nil diff --git a/src/query/functions/linear/sort_test.go b/src/query/functions/linear/sort_test.go index 44e6222c17..5dfd284fa8 100644 --- a/src/query/functions/linear/sort_test.go +++ b/src/query/functions/linear/sort_test.go @@ -5,6 +5,7 @@ import ( "sort" "testing" + "github.com/m3db/m3/src/query/functions/utils" "github.com/m3db/m3/src/query/test" "github.com/stretchr/testify/require" @@ -20,7 +21,7 @@ func TestAscSort(t *testing.T) { expected := []float64{ 0.1, 4.1, 5.0, 8.6, math.NaN() } sort.Slice(actual, func(i, j int) bool { - return asc(actual[i], actual[j]) + return utils.AscFloat64(actual[i], actual[j]) }) test.EqualsWithNans(t, expected, actual) @@ -31,7 +32,7 @@ func TestDescSort(t *testing.T) { expected := []float64{ 8.6, 5.0, 4.1, 0.1, math.NaN() } sort.Slice(actual, func(i, j int) bool { - return desc(actual[i], actual[j]) + return utils.DescFloat64(actual[i], actual[j]) }) test.EqualsWithNans(t, expected, actual) diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index 1b44ff5aea..49b2d97c0a 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -22,6 +22,7 @@ package utils import ( "container/heap" + "math" "sort" ) @@ -37,14 +38,22 @@ func maxHeapLess(i, j ValueIndexPair) bool { if i.Val == j.Val { return i.Index > j.Index } - return i.Val < j.Val + return math.IsNaN(i.Val) && !math.IsNaN(j.Val) || i.Val < j.Val } func minHeapLess(i, j ValueIndexPair) bool { if i.Val == j.Val { return i.Index > j.Index } - return i.Val > j.Val + return i.Val > j.Val || math.IsNaN(i.Val) && !math.IsNaN(j.Val) +} + +func AscFloat64(i, j float64) bool { + return i < j || math.IsNaN(j) && !math.IsNaN(i) +} + +func DescFloat64(i, j float64) bool { + return i > j || math.IsNaN(j) && !math.IsNaN(i) } func Min(a, b int) int { @@ -107,8 +116,8 @@ func (fh *FloatHeap) Push(value float64, index int) { // NB(arnikola): unfortunately, can't just replace first // element as it may not respect internal order. Need to // run heap.Fix() to rectify this - if fh.isMaxHeap && value > peek.Val || - (!fh.isMaxHeap && value < peek.Val) { + if (fh.isMaxHeap && DescFloat64(value, peek.Val)) || + (!fh.isMaxHeap && AscFloat64(value, peek.Val)) { h.heap[0] = ValueIndexPair{ Val: value, Index: index, diff --git a/src/query/functions/utils/heap_test.go b/src/query/functions/utils/heap_test.go index 05d88c8218..a6fd7554e7 100644 --- a/src/query/functions/utils/heap_test.go +++ b/src/query/functions/utils/heap_test.go @@ -21,6 +21,8 @@ package utils import ( + "fmt" + "math" "math/rand" "sort" "testing" @@ -217,6 +219,12 @@ func TestNegativeCapacityHeap(t *testing.T) { } } +func equalPairs(t *testing.T, expected, actual []ValueIndexPair) { + e := fmt.Sprint(expected) + a := fmt.Sprint(actual) + assert.Equal(t, e, a) +} + func TestFlushOrdered(t *testing.T) { maxHeap := NewFloatHeap(true, 3) @@ -253,10 +261,12 @@ func TestFlushOrdered(t *testing.T) { func TestFlushOrderedWhenRandomInsertionOrder(t *testing.T) { maxHeap := NewFloatHeap(true, 3) + maxHeap.Push(math.NaN(), 4) maxHeap.Push(0.1, 0) maxHeap.Push(2.1, 2) maxHeap.Push(1.1, 1) maxHeap.Push(3.1, 3) + maxHeap.Push(math.NaN(), 5) actualMax := maxHeap.OrderedFlush() @@ -268,10 +278,12 @@ func TestFlushOrderedWhenRandomInsertionOrder(t *testing.T) { assert.Equal(t, 0, maxHeap.Len()) minHeap := NewFloatHeap(false, 3) + maxHeap.Push(math.NaN(), 4) minHeap.Push(0.1, 0) minHeap.Push(2.1, 2) minHeap.Push(1.1, 1) minHeap.Push(3.1, 3) + maxHeap.Push(math.NaN(), 5) actualMin := minHeap.OrderedFlush() @@ -282,3 +294,35 @@ func TestFlushOrderedWhenRandomInsertionOrder(t *testing.T) { }, actualMin) assert.Equal(t, 0, minHeap.Len()) } + +func TestFlushOrderedWhenRandomInsertionOrderAndTakeNaNs(t *testing.T) { + maxHeap := NewFloatHeap(true, 3) + maxHeap.Push(math.NaN(), 4) + maxHeap.Push(1.1, 1) + maxHeap.Push(3.1, 3) + maxHeap.Push(math.NaN(), 5) + + actualMax := maxHeap.OrderedFlush() + + equalPairs(t, []ValueIndexPair{ + {Val: 3.1, Index: 3}, + {Val: 1.1, Index: 1}, + {Val: math.NaN(), Index: 4}, + }, actualMax) + assert.Equal(t, 0, maxHeap.Len()) + + minHeap := NewFloatHeap(false, 3) + minHeap.Push(math.NaN(), 4) + minHeap.Push(0.1, 0) + minHeap.Push(2.1, 2) + minHeap.Push(math.NaN(), 5) + + actualMin := minHeap.OrderedFlush() + + equalPairs(t, []ValueIndexPair{ + {Val: 0.1, Index: 0}, + {Val: 2.1, Index: 2}, + {Val: math.NaN(), Index: 4}, + }, actualMin) + assert.Equal(t, 0, minHeap.Len()) +} diff --git a/src/query/parser/promql/parse_test.go b/src/query/parser/promql/parse_test.go index 0dd831505b..21cfc44544 100644 --- a/src/query/parser/promql/parse_test.go +++ b/src/query/parser/promql/parse_test.go @@ -274,10 +274,12 @@ func TestSort(t *testing.T) { require.NoError(t, err) transforms, edges, err := p.DAG() require.NoError(t, err) - assert.Len(t, transforms, 1) + assert.Len(t, transforms, 2) assert.Equal(t, transforms[0].Op.OpType(), functions.FetchType) assert.Equal(t, transforms[0].ID, parser.NodeID("0")) - assert.Len(t, edges, 0) + assert.Equal(t, transforms[1].Op.OpType(), tt.expectedType) + assert.Equal(t, transforms[1].ID, parser.NodeID("1")) + assert.Len(t, edges, 1) }) } } diff --git a/src/query/test/compatibility/testdata/aggregators.test b/src/query/test/compatibility/testdata/aggregators.test index e6aba9e099..e23067f79c 100644 --- a/src/query/test/compatibility/testdata/aggregators.test +++ b/src/query/test/compatibility/testdata/aggregators.test @@ -199,25 +199,25 @@ eval_ordered instant at 50m bottomk by (group) (2, http_requests{group="producti http_requests{group="production", instance="1", job="api-server"} 200 # Test NaN is sorted away from the top/bottom. -#eval_ordered instant at 50m topk(3, http_requests{job="api-server",group="production"}) -# http_requests{job="api-server", instance="1", group="production"} 200 -# http_requests{job="api-server", instance="0", group="production"} 100 -# http_requests{job="api-server", instance="2", group="production"} NaN +eval_ordered instant at 50m topk(3, http_requests{job="api-server",group="production"}) + http_requests{job="api-server", instance="1", group="production"} 200 + http_requests{job="api-server", instance="0", group="production"} 100 + http_requests{job="api-server", instance="2", group="production"} NaN -#eval_ordered instant at 50m bottomk(3, http_requests{job="api-server",group="production"}) -# http_requests{job="api-server", instance="0", group="production"} 100 -# http_requests{job="api-server", instance="1", group="production"} 200 -# http_requests{job="api-server", instance="2", group="production"} NaN +eval_ordered instant at 50m bottomk(3, http_requests{job="api-server",group="production"}) + http_requests{job="api-server", instance="0", group="production"} 100 + http_requests{job="api-server", instance="1", group="production"} 200 + http_requests{job="api-server", instance="2", group="production"} NaN # Test topk and bottomk allocate min(k, input_vector) for results vector eval_ordered instant at 50m bottomk(9999999999, http_requests{job="app-server",group="canary"}) http_requests{group="canary", instance="0", job="app-server"} 700 http_requests{group="canary", instance="1", job="app-server"} 800 -#eval_ordered instant at 50m topk(9999999999, http_requests{job="api-server",group="production"}) -# http_requests{job="api-server", instance="1", group="production"} 200 -# http_requests{job="api-server", instance="0", group="production"} 100 -# http_requests{job="api-server", instance="2", group="production"} NaN +eval_ordered instant at 50m topk(9999999999, http_requests{job="api-server",group="production"}) + http_requests{job="api-server", instance="1", group="production"} 200 + http_requests{job="api-server", instance="0", group="production"} 100 + http_requests{job="api-server", instance="2", group="production"} NaN # Bug #5276. #eval_ordered instant at 50m topk(scalar(foo), http_requests) diff --git a/src/query/test/compatibility/testdata/functions.test b/src/query/test/compatibility/testdata/functions.test index bef714ed38..1c40209fed 100644 --- a/src/query/test/compatibility/testdata/functions.test +++ b/src/query/test/compatibility/testdata/functions.test @@ -342,28 +342,27 @@ load 5m http_requests{job="app-server", instance="0", group="canary"} 0+70x10 http_requests{job="app-server", instance="1", group="canary"} 0+80x10 -# FAILING issue #28: -#eval_ordered instant at 50m sort(http_requests) -# http_requests{group="production", instance="0", job="api-server"} 100 -# http_requests{group="production", instance="1", job="api-server"} 200 -# http_requests{group="canary", instance="0", job="api-server"} 300 -# http_requests{group="canary", instance="1", job="api-server"} 400 -# http_requests{group="production", instance="0", job="app-server"} 500 -# http_requests{group="production", instance="1", job="app-server"} 600 -# http_requests{group="canary", instance="0", job="app-server"} 700 -# http_requests{group="canary", instance="1", job="app-server"} 800 -# http_requests{group="canary", instance="2", job="api-server"} NaN - -#eval_ordered instant at 50m sort_desc(http_requests) -# http_requests{group="canary", instance="1", job="app-server"} 800 -# http_requests{group="canary", instance="0", job="app-server"} 700 -# http_requests{group="production", instance="1", job="app-server"} 600 -# http_requests{group="production", instance="0", job="app-server"} 500 -# http_requests{group="canary", instance="1", job="api-server"} 400 -# http_requests{group="canary", instance="0", job="api-server"} 300 -# http_requests{group="production", instance="1", job="api-server"} 200 -# http_requests{group="production", instance="0", job="api-server"} 100 -# http_requests{group="canary", instance="2", job="api-server"} NaN +eval_ordered instant at 50m sort(http_requests) + http_requests{group="production", instance="0", job="api-server"} 100 + http_requests{group="production", instance="1", job="api-server"} 200 + http_requests{group="canary", instance="0", job="api-server"} 300 + http_requests{group="canary", instance="1", job="api-server"} 400 + http_requests{group="production", instance="0", job="app-server"} 500 + http_requests{group="production", instance="1", job="app-server"} 600 + http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="canary", instance="1", job="app-server"} 800 + http_requests{group="canary", instance="2", job="api-server"} NaN + +eval_ordered instant at 50m sort_desc(http_requests) + http_requests{group="canary", instance="1", job="app-server"} 800 + http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="production", instance="1", job="app-server"} 600 + http_requests{group="production", instance="0", job="app-server"} 500 + http_requests{group="canary", instance="1", job="api-server"} 400 + http_requests{group="canary", instance="0", job="api-server"} 300 + http_requests{group="production", instance="1", job="api-server"} 200 + http_requests{group="production", instance="0", job="api-server"} 100 + http_requests{group="canary", instance="2", job="api-server"} NaN # Tests for holt_winters clear diff --git a/src/query/test/compatibility/testdata/functions_custom.test b/src/query/test/compatibility/testdata/functions_custom.test deleted file mode 100644 index cd1c9475ba..0000000000 --- a/src/query/test/compatibility/testdata/functions_custom.test +++ /dev/null @@ -1,34 +0,0 @@ -# Tests for sort/sort_desc. -clear -load 5m - http_requests{job="api-server", instance="0", group="production"} 0+10x10 - http_requests{job="api-server", instance="1", group="production"} 0+20x10 - http_requests{job="api-server", instance="0", group="canary"} 0+30x10 - http_requests{job="api-server", instance="1", group="canary"} 0+40x10 - http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN - http_requests{job="app-server", instance="0", group="production"} 0+50x10 - http_requests{job="app-server", instance="1", group="production"} 0+60x10 - http_requests{job="app-server", instance="0", group="canary"} 0+70x10 - http_requests{job="app-server", instance="1", group="canary"} 0+80x10 - -eval_ordered instant at 50m sort(http_requests) - http_requests{group="production", instance="0", job="api-server"} 100 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="app-server"} 600 - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="canary", instance="1", job="app-server"} 800 - -eval_ordered instant at 50m sort_desc(http_requests) - http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="production", instance="1", job="app-server"} 600 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="0", job="api-server"} 100 - -clear \ No newline at end of file From 4d2b567a205640ab4a9c4378ae54faef9cd565d6 Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Thu, 22 Oct 2020 18:12:13 +0300 Subject: [PATCH 10/16] Retain old KeepNans flag for non instant queries --- src/query/api/v1/handler/prometheus/native/read.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/api/v1/handler/prometheus/native/read.go b/src/query/api/v1/handler/prometheus/native/read.go index 9a3b0f7136..1428f7ab6c 100644 --- a/src/query/api/v1/handler/prometheus/native/read.go +++ b/src/query/api/v1/handler/prometheus/native/read.go @@ -157,7 +157,7 @@ func (h *promReadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { err = RenderResultsJSON(w, result, RenderResultsOptions{ Start: parsedOptions.Params.Start, End: parsedOptions.Params.End, - KeepNaNs: keepNans, + KeepNaNs: h.opts.Config().ResultOptions.KeepNans, }) if err != nil { From 2f72c61f756bb727c98bc71dac60716e3ee36a82 Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Fri, 23 Oct 2020 13:52:06 +0300 Subject: [PATCH 11/16] Changes after code review. --- src/cmd/services/m3query/config/config.go | 4 +- .../services/m3query/config/config_test.go | 6 +- .../api/v1/handler/prometheus/native/read.go | 14 +- .../v1/handler/prometheus/native/read_test.go | 4 +- .../v1/handler/prometheus/remote/read_test.go | 4 +- src/query/block/meta.go | 13 +- src/query/functions/aggregation/take.go | 175 ++++++++---------- src/query/functions/aggregation/take_test.go | 55 +++--- src/query/functions/linear/sort.go | 114 +++++------- src/query/functions/linear/sort_test.go | 114 ++++++++++-- src/query/functions/utils/heap.go | 14 +- src/query/functions/utils/heap_test.go | 24 +++ src/query/models/query_context.go | 3 +- 13 files changed, 302 insertions(+), 242 deletions(-) diff --git a/src/cmd/services/m3query/config/config.go b/src/cmd/services/m3query/config/config.go index 118b6c95c5..5c8d9662d2 100644 --- a/src/cmd/services/m3query/config/config.go +++ b/src/cmd/services/m3query/config/config.go @@ -207,9 +207,9 @@ type DeprecatedQueryConversionCacheConfiguration struct { // ResultOptions are the result options for query. type ResultOptions struct { - // KeepNans keeps NaNs before returning query results. + // KeepNaNs keeps NaNs before returning query results. // The default is false, which matches Prometheus - KeepNans bool `yaml:"keepNans"` + KeepNaNs bool `yaml:"keepNans"` } // QueryConfiguration is the query configuration. diff --git a/src/cmd/services/m3query/config/config_test.go b/src/cmd/services/m3query/config/config_test.go index 0143abf2fc..430828ee53 100644 --- a/src/cmd/services/m3query/config/config_test.go +++ b/src/cmd/services/m3query/config/config_test.go @@ -275,10 +275,10 @@ func TestTagOptionsConfig(t *testing.T) { func TestKeepNaNsDefault(t *testing.T) { r := ResultOptions{ - KeepNans: true, + KeepNaNs: true, } - assert.Equal(t, true, r.KeepNans) + assert.Equal(t, true, r.KeepNaNs) r = ResultOptions{} - assert.Equal(t, false, r.KeepNans) + assert.Equal(t, false, r.KeepNaNs) } diff --git a/src/query/api/v1/handler/prometheus/native/read.go b/src/query/api/v1/handler/prometheus/native/read.go index 1428f7ab6c..2c4d071a7b 100644 --- a/src/query/api/v1/handler/prometheus/native/read.go +++ b/src/query/api/v1/handler/prometheus/native/read.go @@ -27,7 +27,6 @@ import ( "github.com/m3db/m3/src/query/api/v1/handler" "github.com/m3db/m3/src/query/api/v1/handler/prometheus/handleroptions" "github.com/m3db/m3/src/query/api/v1/options" - "github.com/m3db/m3/src/query/block" "github.com/m3db/m3/src/query/models" "github.com/m3db/m3/src/query/util/logging" xhttp "github.com/m3db/m3/src/x/net/http" @@ -136,16 +135,13 @@ func (h *promReadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { handleroptions.AddWarningHeaders(w, result.Meta) h.promReadMetrics.fetchSuccess.Inc(1) - keepNans := h.opts.Config().ResultOptions.KeepNans - switch result.Meta.KeepNans { - case block.BoolFalse: - keepNans = false - case block.BoolTrue: - keepNans = true + keepNaNs := h.opts.Config().ResultOptions.KeepNaNs + if !keepNaNs { + keepNaNs = result.Meta.KeepNaNs } if h.instant { - renderResultsInstantaneousJSON(w, result, keepNans) + renderResultsInstantaneousJSON(w, result, keepNaNs) return } @@ -157,7 +153,7 @@ func (h *promReadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { err = RenderResultsJSON(w, result, RenderResultsOptions{ Start: parsedOptions.Params.Start, End: parsedOptions.Params.End, - KeepNaNs: h.opts.Config().ResultOptions.KeepNans, + KeepNaNs: h.opts.Config().ResultOptions.KeepNaNs, }) if err != nil { diff --git a/src/query/api/v1/handler/prometheus/native/read_test.go b/src/query/api/v1/handler/prometheus/native/read_test.go index 2e0709c0f6..fd96833b9c 100644 --- a/src/query/api/v1/handler/prometheus/native/read_test.go +++ b/src/query/api/v1/handler/prometheus/native/read_test.go @@ -192,7 +192,7 @@ func newTestSetup() *testSetup { fetchOptsBuilder := handleroptions.NewFetchOptionsBuilder(fetchOptsBuilderCfg) tagOpts := models.NewTagOptions() limitsConfig := config.LimitsConfiguration{} - keepNans := false + keepNaNs := false opts := options.EmptyHandlerOptions(). SetEngine(engine). @@ -203,7 +203,7 @@ func newTestSetup() *testSetup { SetConfig(config.Configuration{ Limits: limitsConfig, ResultOptions: config.ResultOptions{ - KeepNans: keepNans, + KeepNaNs: keepNaNs, }, }) diff --git a/src/query/api/v1/handler/prometheus/remote/read_test.go b/src/query/api/v1/handler/prometheus/remote/read_test.go index aebaebcda2..88be45ace9 100644 --- a/src/query/api/v1/handler/prometheus/remote/read_test.go +++ b/src/query/api/v1/handler/prometheus/remote/read_test.go @@ -379,7 +379,7 @@ func TestMultipleRead(t *testing.T) { handlerOpts := options.EmptyHandlerOptions().SetEngine(engine). SetConfig(config.Configuration{ ResultOptions: config.ResultOptions{ - KeepNans: true, + KeepNaNs: true, }, }) @@ -457,7 +457,7 @@ func TestReadWithOptions(t *testing.T) { handlerOpts := options.EmptyHandlerOptions().SetEngine(engine). SetConfig(config.Configuration{ ResultOptions: config.ResultOptions{ - KeepNans: true, + KeepNaNs: true, }, }) diff --git a/src/query/block/meta.go b/src/query/block/meta.go index 38ccc303aa..0e58ad24b3 100644 --- a/src/query/block/meta.go +++ b/src/query/block/meta.go @@ -55,15 +55,6 @@ func (m Metadata) String() string { // Warnings is a slice of warnings. type Warnings []Warning -// Nullable bool -type Bool int - -const ( - BoolNull = iota - BoolFalse - BoolTrue -) - // ResultMetadata describes metadata common to each type of query results, // indicating any additional information about the result. type ResultMetadata struct { @@ -77,8 +68,8 @@ type ResultMetadata struct { Warnings Warnings // Resolutions is a list of resolutions for series obtained by this query. Resolutions []time.Duration - // KeepNans indicates if NaNs should be kept when returning results - KeepNans Bool + // KeepNaNs indicates if NaNs should be kept when returning results. + KeepNaNs bool } // NewResultMetadata creates a new result metadata. diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index e2605629d0..e792840ac4 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -53,26 +53,13 @@ func NewTakeOp( opType string, params NodeParams, ) (parser.Params, error) { - var fn takeFunc - var fnInstant takeInstantFunc k := int(params.Parameter) - - if k < 1 { - fn = func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { - return takeNone(values) - } - fnInstant = func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { - return takeInstantNone(values, seriesMetas) - } - } else { - fn = func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { - return takeFn(heap, values, buckets) - } - fnInstant = func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { - return takeInstantFn(heap, values, buckets, seriesMetas) - } + fn := func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { + return takeFn(heap, values, buckets) + } + fnInstant := func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { + return takeInstantFn(heap, values, buckets, seriesMetas) } - return newTakeOp(params, opType, k, fn, fnInstant), nil } @@ -80,7 +67,7 @@ func NewTakeOp( type takeOp struct { params NodeParams opType string - k int + k int takeFunc takeFunc takeInstantFunc takeInstantFunc } @@ -110,7 +97,7 @@ func newTakeOp(params NodeParams, opType string, k int, takeFunc takeFunc, takeI return takeOp{ params: params, opType: opType, - k: k, + k: k, takeFunc: takeFunc, takeInstantFunc: takeInstantFunc, } @@ -159,28 +146,28 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, if instantaneous { heap := utils.NewFloatHeap(takeTop, utils.Min(n.op.k, seriesCount)) return n.processBlockInstantaneous(heap, queryCtx, meta, stepIter, seriesMetas, buckets) - } else { - fnTake := n.resolveTakeFn(seriesCount, takeTop) - builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) - if err != nil { - return nil, err - } + } - if err = builder.AddCols(stepIter.StepCount()); err != nil { - return nil, err - } + fnTake := n.resolveTakeFn(seriesCount, takeTop) + builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) + if err != nil { + return nil, err + } - for index := 0; stepIter.Next(); index++ { - values := stepIter.Current().Values() - if err := builder.AppendValues(index, fnTake(values, buckets)); err != nil { - return nil, err - } - } - if err = stepIter.Err(); err != nil { + if err = builder.AddCols(stepIter.StepCount()); err != nil { + return nil, err + } + + for index := 0; stepIter.Next(); index++ { + values := stepIter.Current().Values() + if err := builder.AppendValues(index, fnTake(values, buckets)); err != nil { return nil, err } - return builder.Build(), nil } + if err = stepIter.Err(); err != nil { + return nil, err + } + return builder.Build(), nil } func maxSeriesCount(buckets [][]int) int { @@ -198,86 +185,77 @@ func maxSeriesCount(buckets [][]int) int { func (n *takeNode) resolveTakeFn(seriesCount int, takeTop bool) takeValuesFunc { if n.op.k < seriesCount { heap := utils.NewFloatHeap(takeTop, n.op.k) - return func (values []float64, buckets [][]int) []float64 { + return func(values []float64, buckets [][]int) []float64 { return n.op.takeFunc(heap, values, buckets) } } - return func (values []float64, buckets [][]int) []float64 { + return func(values []float64, buckets [][]int) []float64 { return values } } func (n *takeNode) processBlockInstantaneous( - heap utils.FloatHeap, - queryCtx *models.QueryContext, - metadata block.Metadata, - stepIter block.StepIter, - seriesMetas []block.SeriesMeta, - buckets [][]int) (block.Block, error) { - stepCount := stepIter.StepCount() - metadata.ResultMetadata.KeepNans = block.BoolTrue - for index := 0; stepIter.Next(); index++ { - if isLastStep(index, stepCount) { - //we only care for the last step values for the instant query - values := stepIter.Current().Values() - takenSortedValues := n.op.takeInstantFunc(heap, values, buckets, seriesMetas) - - blockValues := make([]float64, len(takenSortedValues)) - blockSeries := make([]block.SeriesMeta, len(takenSortedValues)) - for i := range takenSortedValues { - blockValues[i] = takenSortedValues[i].val - blockSeries[i] = takenSortedValues[i].seriesMeta - } - - //adjust bounds to contain single step - time, err := metadata.Bounds.TimeForIndex(index) - if err != nil { - return nil, err - } - metadata.Bounds = models.Bounds{ - Start: time, - Duration: metadata.Bounds.StepSize, - StepSize: metadata.Bounds.StepSize, - } - - blockBuilder, err := n.controller.BlockBuilder(queryCtx, metadata, blockSeries) - if err != nil { - return nil, err - } - if err = blockBuilder.AddCols(1); err != nil { - return nil, err - } - if err := blockBuilder.AppendValues(0, blockValues); err != nil { - return nil, err - } - return blockBuilder.Build(), nil + heap utils.FloatHeap, + queryCtx *models.QueryContext, + metadata block.Metadata, + stepIter block.StepIter, + seriesMetas []block.SeriesMeta, + buckets [][]int) (block.Block, error) { + ixLastStep := stepIter.StepCount() - 1 //we only care for the last step values for the instant query + for i := 0; i <= ixLastStep; i++ { + if !stepIter.Next() { + return nil, fmt.Errorf("invalid step count; expected %d got %d", stepIter.StepCount(), i+1) } } - return nil, fmt.Errorf("no data to return: %s", n.op.opType) -} + metadata.ResultMetadata.KeepNaNs = true + values := stepIter.Current().Values() + takenSortedValues := n.op.takeInstantFunc(heap, values, buckets, seriesMetas) + blockValues, blockSeries := mapToValuesAndSeriesMetas(takenSortedValues) -func isLastStep(stepIndex int, stepCount int) bool { - return stepIndex == stepCount-1 -} + //adjust bounds to contain single step + time, err := metadata.Bounds.TimeForIndex(ixLastStep) + if err != nil { + return nil, err + } + metadata.Bounds = models.Bounds{ + Start: time, + Duration: metadata.Bounds.StepSize, + StepSize: metadata.Bounds.StepSize, + } -// shortcut to return empty when taking <= 0 values -func takeNone(values []float64) []float64 { - util.Memset(values, math.NaN()) - return values + blockBuilder, err := n.controller.BlockBuilder(queryCtx, metadata, blockSeries) + if err != nil { + return nil, err + } + if err = blockBuilder.AddCols(1); err != nil { + return nil, err + } + if err := blockBuilder.AppendValues(0, blockValues); err != nil { + return nil, err + } + if err = stepIter.Err(); err != nil { + return nil, err + } + return blockBuilder.Build(), nil } -func takeInstantNone(values []float64, seriesMetas []block.SeriesMeta) []valueAndMeta { - result := make([]valueAndMeta, len(values)) - for i := range result { - result[i].val = math.NaN() - result[i].seriesMeta = seriesMetas[i] +func mapToValuesAndSeriesMetas(takenSortedValues []valueAndMeta) ([]float64, []block.SeriesMeta) { + blockValues := make([]float64, len(takenSortedValues)) + blockSeries := make([]block.SeriesMeta, len(takenSortedValues)) + for i, sortedValue := range takenSortedValues { + blockValues[i] = sortedValue.val + blockSeries[i] = sortedValue.seriesMeta } - return result + return blockValues, blockSeries } func takeFn(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { capacity := heap.Cap() + if capacity < 1 { + util.Memset(values, math.NaN()) + return values + } for _, bucket := range buckets { // If this bucket's length is less than or equal to the heap's // capacity do not need to clear any values from the input vector, @@ -309,6 +287,9 @@ func takeFn(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { func takeInstantFn(heap utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []valueAndMeta { var result []valueAndMeta + if heap.Cap() < 1 { + return result + } for _, bucket := range buckets { for _, idx := range bucket { val := values[idx] diff --git a/src/query/functions/aggregation/take_test.go b/src/query/functions/aggregation/take_test.go index 3df9d8fbab..854879062c 100644 --- a/src/query/functions/aggregation/take_test.go +++ b/src/query/functions/aggregation/take_test.go @@ -32,6 +32,8 @@ import ( "github.com/m3db/m3/src/query/parser" "github.com/m3db/m3/src/query/test" "github.com/m3db/m3/src/query/test/executor" + + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -39,6 +41,20 @@ func TestTakeInstantFn(t *testing.T) { valuesMin := []float64{1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1} buckets := [][]int{{0, 1, 2, 3}, {4}, {5, 6, 7, 8}} + var ( + seriesMetasTakeOrdered = []block.SeriesMeta{ + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "0"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "1"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "2"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "0"}, {N: "group", V: "canary"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "1"}, {N: "group", V: "canary"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "0"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "1"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "0"}, {N: "group", V: "canary"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "1"}, {N: "group", V: "canary"}})}, + } + ) + expectedMin := []valueAndMeta{ {val: 1.1, seriesMeta: seriesMetasTakeOrdered[0]}, {val: 2.1, seriesMeta: seriesMetasTakeOrdered[1]}, @@ -58,7 +74,7 @@ func TestTakeInstantFn(t *testing.T) { actualString := fmt.Sprint(actual) expectedString := fmt.Sprint(expectedMin) - require.EqualValues(t, expectedString, actualString) + assert.EqualValues(t, expectedString, actualString) valuesMax := []float64{1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1} expectedMax := []valueAndMeta{ @@ -78,7 +94,7 @@ func TestTakeInstantFn(t *testing.T) { actualString = fmt.Sprint(actual) expectedString = fmt.Sprint(expectedMax) - require.EqualValues(t, expectedString, actualString) + assert.EqualValues(t, expectedString, actualString) } func TestTakeFn(t *testing.T) { @@ -105,20 +121,6 @@ func TestTakeFn(t *testing.T) { test.EqualsWithNans(t, 1.1, actualQ) } -var ( - seriesMetasTakeOrdered = []block.SeriesMeta{ - {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "0"}, {N: "group", V: "production"}})}, - {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "1"}, {N: "group", V: "production"}})}, - {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "2"}, {N: "group", V: "production"}})}, - {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "0"}, {N: "group", V: "canary"}})}, - {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "1"}, {N: "group", V: "canary"}})}, - {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "0"}, {N: "group", V: "production"}})}, - {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "1"}, {N: "group", V: "production"}})}, - {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "0"}, {N: "group", V: "canary"}})}, - {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "1"}, {N: "group", V: "canary"}})}, - } -) - func processTakeOp(t *testing.T, op parser.Params) *executor.SinkNode { bl := test.NewBlockFromValuesWithSeriesMeta(bounds, seriesMetas, v) c, sink := executor.NewControllerWithSink(parser.NodeID(1)) @@ -147,9 +149,9 @@ func TestTakeBottomFunctionFilteringWithoutA(t *testing.T) { } // Should have the same metas as when started - require.Equal(t, seriesMetas, sink.Metas) + assert.Equal(t, seriesMetas, sink.Metas) test.EqualsWithNansWithDelta(t, expected, sink.Values, math.Pow10(-5)) - require.Equal(t, bounds, sink.Meta.Bounds) + assert.Equal(t, bounds, sink.Meta.Bounds) } func TestTakeTopFunctionFilteringWithoutA(t *testing.T) { @@ -172,9 +174,9 @@ func TestTakeTopFunctionFilteringWithoutA(t *testing.T) { } // Should have the same metas as when started - require.Equal(t, seriesMetas, sink.Metas) + assert.Equal(t, seriesMetas, sink.Metas) test.EqualsWithNansWithDelta(t, expected, sink.Values, math.Pow10(-5)) - require.Equal(t, bounds, sink.Meta.Bounds) + assert.Equal(t, bounds, sink.Meta.Bounds) } func TestTakeTopFunctionFilteringWithoutALessThanOne(t *testing.T) { @@ -196,7 +198,16 @@ func TestTakeTopFunctionFilteringWithoutALessThanOne(t *testing.T) { } // Should have the same metas as when started - require.Equal(t, seriesMetas, sink.Metas) + assert.Equal(t, seriesMetas, sink.Metas) test.EqualsWithNansWithDelta(t, expected, sink.Values, math.Pow10(-5)) - require.Equal(t, bounds, sink.Meta.Bounds) + assert.Equal(t, bounds, sink.Meta.Bounds) } + +func TestTakeOpParamIsNaN(t *testing.T) { + op, err := NewTakeOp(TopKType, NodeParams{ + Parameter: math.NaN(), + }) + require.NoError(t, err) + assert.True(t, op.(takeOp).k < 0) +} + diff --git a/src/query/functions/linear/sort.go b/src/query/functions/linear/sort.go index 314645a0f2..6f5e8203f7 100644 --- a/src/query/functions/linear/sort.go +++ b/src/query/functions/linear/sort.go @@ -86,98 +86,70 @@ func (n *sortNode) Process(queryCtx *models.QueryContext, ID parser.NodeID, b bl } func (n *sortNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, b block.Block) (block.Block, error) { + if !queryCtx.Options.Instantaneous { + return b, nil + } stepIter, err := b.StepIter() if err != nil { return nil, err } - instantaneous := queryCtx.Options.Instantaneous meta := b.Meta() seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta()) - - if instantaneous { - return n.processInstantBlock(queryCtx, stepIter, meta, seriesMetas) - } - return n.processNopBlock(queryCtx, stepIter, meta, seriesMetas) + return n.processInstantBlock(queryCtx, stepIter, meta, seriesMetas) } func (n *sortNode) processInstantBlock(queryCtx *models.QueryContext, stepIter block.StepIter, meta block.Metadata, seriesMetas []block.SeriesMeta) (block.Block, error) { - for index := 0; stepIter.Next(); index++ { - if isLastStep(index, stepIter.StepCount()) { - meta.ResultMetadata.KeepNans = block.BoolTrue - //we only care for the last step values for the instant query - values := stepIter.Current().Values() - valuesToSort := make([]valueAndMeta, len(values)) - for i, value := range values { - valuesToSort[i] = valueAndMeta{ - val: value, - seriesMeta: seriesMetas[i], - } - } - - sort.Slice(valuesToSort, func(i, j int) bool { - return n.op.lessFn(valuesToSort[i].val, valuesToSort[j].val) - }) - - for i := range valuesToSort { - values[i] = valuesToSort[i].val - seriesMetas[i] = valuesToSort[i].seriesMeta - } - - //adjust bounds to contain single step - time, err := meta.Bounds.TimeForIndex(index) - if err != nil { - return nil, err - } - meta.Bounds = models.Bounds{ - Start: time, - Duration: meta.Bounds.StepSize, - StepSize: meta.Bounds.StepSize, - } - - blockBuilder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) - if err != nil { - return nil, err - } - if err = blockBuilder.AddCols(1); err != nil { - return nil, err - } - if err := blockBuilder.AppendValues(0, values); err != nil { - return nil, err - } - if err = stepIter.Err(); err != nil { - return nil, err - } - return blockBuilder.Build(), nil + ixLastStep := stepIter.StepCount() - 1 //we only care for the last step values for the instant query + for i := 0; i <= ixLastStep; i++ { + if !stepIter.Next() { + return nil, fmt.Errorf("invalid step count; expected %d got %d", stepIter.StepCount(), i+1) } } - return nil, fmt.Errorf("no data to return: %s", n.op.opType) -} + values := stepIter.Current().Values() + meta.ResultMetadata.KeepNaNs = true + valuesToSort := make([]valueAndMeta, len(values)) + for i, value := range values { + valuesToSort[i] = valueAndMeta{ + val: value, + seriesMeta: seriesMetas[i], + } + } + + sort.Slice(valuesToSort, func(i, j int) bool { + return n.op.lessFn(valuesToSort[i].val, valuesToSort[j].val) + }) -func (n *sortNode) processNopBlock(queryCtx *models.QueryContext, stepIter block.StepIter, meta block.Metadata, seriesMetas []block.SeriesMeta) (block.Block, error) { - builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) + for i, sorted := range valuesToSort { + values[i] = sorted.val + seriesMetas[i] = sorted.seriesMeta + } + + //adjust bounds to contain single step + time, err := meta.Bounds.TimeForIndex(ixLastStep) if err != nil { return nil, err } + meta.Bounds = models.Bounds{ + Start: time, + Duration: meta.Bounds.StepSize, + StepSize: meta.Bounds.StepSize, + } - if err := builder.AddCols(stepIter.StepCount()); err != nil { + blockBuilder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) + if err != nil { return nil, err } - - for index := 0; stepIter.Next(); index++ { - if err := builder.AppendValues(index, stepIter.Current().Values()); err != nil { - return nil, err - } + if err = blockBuilder.AddCols(1); err != nil { + return nil, err + } + if err := blockBuilder.AppendValues(0, values); err != nil { + return nil, err } - if err = stepIter.Err(); err != nil { return nil, err } - return builder.Build(), nil -} - -func isLastStep(stepIndex int, stepCount int) bool { - return stepIndex == stepCount-1 + return blockBuilder.Build(), nil } func NewSortOp(opType string) (parser.Params, error) { @@ -188,9 +160,9 @@ func NewSortOp(opType string) (parser.Params, error) { var lessFn lessFn if ascending { - lessFn = utils.AscFloat64 + lessFn = utils.LesserWithNaNs } else { - lessFn = utils.DescFloat64 + lessFn = utils.GreaterWithNaNs } return sortOp{opType, lessFn}, nil diff --git a/src/query/functions/linear/sort_test.go b/src/query/functions/linear/sort_test.go index 5dfd284fa8..8f59c23605 100644 --- a/src/query/functions/linear/sort_test.go +++ b/src/query/functions/linear/sort_test.go @@ -2,12 +2,17 @@ package linear import ( "math" - "sort" "testing" + "time" - "github.com/m3db/m3/src/query/functions/utils" + "github.com/m3db/m3/src/query/block" + "github.com/m3db/m3/src/query/executor/transform" + "github.com/m3db/m3/src/query/models" + "github.com/m3db/m3/src/query/parser" "github.com/m3db/m3/src/query/test" + "github.com/m3db/m3/src/query/test/executor" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -16,24 +21,101 @@ func TestShouldFailWhenOpTypeIsInvalid(t *testing.T) { require.Error(t, err) } -func TestAscSort(t *testing.T) { - actual := []float64{ 5.0, 4.1, math.NaN(), 8.6, 0.1 } - expected := []float64{ 0.1, 4.1, 5.0, 8.6, math.NaN() } +func TestSortAscInstant(t *testing.T) { + op, err := NewSortOp(SortType) + require.NoError(t, err) - sort.Slice(actual, func(i, j int) bool { - return utils.AscFloat64(actual[i], actual[j]) - }) + sink := processSortOp(t, op, true) - test.EqualsWithNans(t, expected, actual) + expected := [][]float64{ + {100}, + {200}, + {300}, + {400}, + {500}, + {600}, + {700}, + {800}, + {math.NaN()}, + } + + assert.Equal(t, seriesMetas, sink.Metas) + test.EqualsWithNansWithDelta(t, expected, sink.Values, math.Pow10(-5)) } -func TestDescSort(t *testing.T) { - actual := []float64{ 5.0, 4.1, math.NaN(), 8.6, 0.1 } - expected := []float64{ 8.6, 5.0, 4.1, 0.1, math.NaN() } +func TestSortDescInstant(t *testing.T) { + op, err := NewSortOp(SortDescType) + require.NoError(t, err) + + sink := processSortOp(t, op, true) + + expected := [][]float64{ + {800}, + {700}, + {600}, + {500}, + {400}, + {300}, + {200}, + {100}, + {math.NaN()}, + } + + assert.Equal(t, seriesMetas, sink.Metas) + test.EqualsWithNansWithDelta(t, expected, sink.Values, math.Pow10(-5)) +} + +func TestSortNop(t *testing.T) { + op, err := NewSortOp(SortDescType) + require.NoError(t, err) + + sink := processSortOp(t, op, false) - sort.Slice(actual, func(i, j int) bool { - return utils.DescFloat64(actual[i], actual[j]) - }) + expected := v + + assert.Equal(t, seriesMetas, sink.Metas) + test.EqualsWithNansWithDelta(t, expected, sink.Values, math.Pow10(-5)) +} + +var ( + seriesMetas = []block.SeriesMeta{ + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "0"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "1"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "2"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "0"}, {N: "group", V: "canary"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "api-server"}, {N: "instance", V: "1"}, {N: "group", V: "canary"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "0"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "1"}, {N: "group", V: "production"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "0"}, {N: "group", V: "canary"}})}, + {Tags: test.StringTagsToTags(test.StringTags{{N: "job", V: "app-server"}, {N: "instance", V: "1"}, {N: "group", V: "canary"}})}, + } + + v = [][]float64{ + {60, 70, 80, 90, 100}, + {150, 160, 170, 180, 200}, + {180, 210, 240, 270, 300}, + {240, 280, 320, 360, 400}, + {math.NaN(), math.NaN(), math.NaN(), math.NaN(), math.NaN()}, + {300, 350, 400, 450, 500}, + {360, 420, 480, 540, 600}, + {320, 390, 460, 530, 700}, + {480, 560, 640, 720, 800}, + } + + bounds = models.Bounds{ + Start: time.Now(), + Duration: time.Minute * 5, + StepSize: time.Minute, + } +) - test.EqualsWithNans(t, expected, actual) +func processSortOp(t *testing.T, op parser.Params, instant bool) *executor.SinkNode { + bl := test.NewBlockFromValuesWithSeriesMeta(bounds, seriesMetas, v) + c, sink := executor.NewControllerWithSink(parser.NodeID(1)) + node := op.(sortOp).Node(c, transform.Options{}) + queryContext := models.NoopQueryContext() + queryContext.Options.Instantaneous = instant + err := node.Process(queryContext, parser.NodeID(0), bl) + require.NoError(t, err) + return sink } \ No newline at end of file diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index 49b2d97c0a..8ac7c7f5fb 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -35,24 +35,26 @@ type ValueIndexPair struct { type lessFn func(ValueIndexPair, ValueIndexPair) bool func maxHeapLess(i, j ValueIndexPair) bool { - if i.Val == j.Val { + if i.Val == j.Val || math.IsNaN(i.Val) && math.IsNaN(j.Val) { return i.Index > j.Index } return math.IsNaN(i.Val) && !math.IsNaN(j.Val) || i.Val < j.Val } func minHeapLess(i, j ValueIndexPair) bool { - if i.Val == j.Val { + if i.Val == j.Val || math.IsNaN(i.Val) && math.IsNaN(j.Val) { return i.Index > j.Index } return i.Val > j.Val || math.IsNaN(i.Val) && !math.IsNaN(j.Val) } -func AscFloat64(i, j float64) bool { +// Compares two float64 values which one is lesser with NaNs. NaNs are always sorted away. +func LesserWithNaNs(i, j float64) bool { return i < j || math.IsNaN(j) && !math.IsNaN(i) } -func DescFloat64(i, j float64) bool { +// Compares two float64 values which one is greater with NaNs. NaNs are always sorted away. +func GreaterWithNaNs(i, j float64) bool { return i > j || math.IsNaN(j) && !math.IsNaN(i) } @@ -116,8 +118,8 @@ func (fh *FloatHeap) Push(value float64, index int) { // NB(arnikola): unfortunately, can't just replace first // element as it may not respect internal order. Need to // run heap.Fix() to rectify this - if (fh.isMaxHeap && DescFloat64(value, peek.Val)) || - (!fh.isMaxHeap && AscFloat64(value, peek.Val)) { + if (fh.isMaxHeap && GreaterWithNaNs(value, peek.Val)) || + (!fh.isMaxHeap && LesserWithNaNs(value, peek.Val)) { h.heap[0] = ValueIndexPair{ Val: value, Index: index, diff --git a/src/query/functions/utils/heap_test.go b/src/query/functions/utils/heap_test.go index a6fd7554e7..f8f517aff3 100644 --- a/src/query/functions/utils/heap_test.go +++ b/src/query/functions/utils/heap_test.go @@ -27,6 +27,8 @@ import ( "sort" "testing" + "github.com/m3db/m3/src/query/test" + "github.com/stretchr/testify/assert" ) @@ -326,3 +328,25 @@ func TestFlushOrderedWhenRandomInsertionOrderAndTakeNaNs(t *testing.T) { }, actualMin) assert.Equal(t, 0, minHeap.Len()) } + +func TestSortLesserWithNaNs(t *testing.T) { + actual := []float64{ 5.0, 4.1, math.NaN(), 8.6, 0.1 } + expected := []float64{ 0.1, 4.1, 5.0, 8.6, math.NaN() } + + sort.Slice(actual, func(i, j int) bool { + return LesserWithNaNs(actual[i], actual[j]) + }) + + test.EqualsWithNans(t, expected, actual) +} + +func TestSortGreaterWithNaNs(t *testing.T) { + actual := []float64{ 5.0, 4.1, math.NaN(), 8.6, 0.1 } + expected := []float64{ 8.6, 5.0, 4.1, 0.1, math.NaN() } + + sort.Slice(actual, func(i, j int) bool { + return GreaterWithNaNs(actual[i], actual[j]) + }) + + test.EqualsWithNans(t, expected, actual) +} diff --git a/src/query/models/query_context.go b/src/query/models/query_context.go index 75a36af2e9..b8088e5b64 100644 --- a/src/query/models/query_context.go +++ b/src/query/models/query_context.go @@ -48,8 +48,9 @@ type QueryContextOptions struct { LimitMaxDocs int // RequireExhaustive results in an error if the query exceeds the series limit. RequireExhaustive bool - // Is instant query + // Instantaneous indicates an instant query. Instantaneous bool + // RestrictFetchType restricts the query fetches. RestrictFetchType *RestrictFetchTypeQueryContextOptions } From 3ce91194e720136a20b0a6fcb19acf10d9dd1ee2 Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Fri, 23 Oct 2020 15:17:41 +0300 Subject: [PATCH 12/16] Small cleanup. --- src/query/functions/aggregation/take.go | 22 ++++++---------------- src/query/functions/linear/sort.go | 4 +--- src/query/functions/utils/heap.go | 2 +- 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index e792840ac4..6549e5cb90 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -46,7 +46,6 @@ type valueAndMeta struct { type takeFunc func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 type takeInstantFunc func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta -type takeValuesFunc func(values []float64, buckets [][]int) []float64 // NewTakeOp creates a new takeK operation func NewTakeOp( @@ -148,7 +147,11 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, return n.processBlockInstantaneous(heap, queryCtx, meta, stepIter, seriesMetas, buckets) } - fnTake := n.resolveTakeFn(seriesCount, takeTop) + if n.op.k >= seriesCount { + return b, nil + } + + heap := utils.NewFloatHeap(takeTop, n.op.k) builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) if err != nil { return nil, err @@ -160,7 +163,7 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, for index := 0; stepIter.Next(); index++ { values := stepIter.Current().Values() - if err := builder.AppendValues(index, fnTake(values, buckets)); err != nil { + if err := builder.AppendValues(index, n.op.takeFunc(heap, values, buckets)); err != nil { return nil, err } } @@ -182,19 +185,6 @@ func maxSeriesCount(buckets [][]int) int { return result } -func (n *takeNode) resolveTakeFn(seriesCount int, takeTop bool) takeValuesFunc { - if n.op.k < seriesCount { - heap := utils.NewFloatHeap(takeTop, n.op.k) - return func(values []float64, buckets [][]int) []float64 { - return n.op.takeFunc(heap, values, buckets) - } - } - - return func(values []float64, buckets [][]int) []float64 { - return values - } -} - func (n *takeNode) processBlockInstantaneous( heap utils.FloatHeap, queryCtx *models.QueryContext, diff --git a/src/query/functions/linear/sort.go b/src/query/functions/linear/sort.go index 6f5e8203f7..9d696b9d04 100644 --- a/src/query/functions/linear/sort.go +++ b/src/query/functions/linear/sort.go @@ -158,11 +158,9 @@ func NewSortOp(opType string) (parser.Params, error) { return nil, fmt.Errorf("operator not supported: %s", opType) } - var lessFn lessFn + lessFn := utils.GreaterWithNaNs if ascending { lessFn = utils.LesserWithNaNs - } else { - lessFn = utils.GreaterWithNaNs } return sortOp{opType, lessFn}, nil diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index 8ac7c7f5fb..36a984212b 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -38,7 +38,7 @@ func maxHeapLess(i, j ValueIndexPair) bool { if i.Val == j.Val || math.IsNaN(i.Val) && math.IsNaN(j.Val) { return i.Index > j.Index } - return math.IsNaN(i.Val) && !math.IsNaN(j.Val) || i.Val < j.Val + return i.Val < j.Val || math.IsNaN(i.Val) && !math.IsNaN(j.Val) } func minHeapLess(i, j ValueIndexPair) bool { From bd3f326bd21383bca9c81fc1f51927a935bf619f Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Mon, 26 Oct 2020 16:28:54 +0200 Subject: [PATCH 13/16] Added copyright header to sort_test.go. equalsPairs now checks individual struct elements instead of their printed values. --- src/query/functions/linear/sort_test.go | 22 +++++++++++++++++++++- src/query/functions/utils/heap_test.go | 9 +++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/query/functions/linear/sort_test.go b/src/query/functions/linear/sort_test.go index 8f59c23605..e1a4d46b3e 100644 --- a/src/query/functions/linear/sort_test.go +++ b/src/query/functions/linear/sort_test.go @@ -1,3 +1,23 @@ +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + package linear import ( @@ -118,4 +138,4 @@ func processSortOp(t *testing.T, op parser.Params, instant bool) *executor.SinkN err := node.Process(queryContext, parser.NodeID(0), bl) require.NoError(t, err) return sink -} \ No newline at end of file +} diff --git a/src/query/functions/utils/heap_test.go b/src/query/functions/utils/heap_test.go index f8f517aff3..79e2a305fa 100644 --- a/src/query/functions/utils/heap_test.go +++ b/src/query/functions/utils/heap_test.go @@ -21,7 +21,6 @@ package utils import ( - "fmt" "math" "math/rand" "sort" @@ -222,9 +221,11 @@ func TestNegativeCapacityHeap(t *testing.T) { } func equalPairs(t *testing.T, expected, actual []ValueIndexPair) { - e := fmt.Sprint(expected) - a := fmt.Sprint(actual) - assert.Equal(t, e, a) + assert.Equal(t, len(expected), len(actual)) + for i, e := range expected { + test.EqualsWithNans(t, e.Val, actual[i].Val) + assert.Equal(t, e.Index, actual[i].Index) + } } func TestFlushOrdered(t *testing.T) { From 7b0216c4ab6069511ffd2d274f250b2af14c9417 Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Mon, 26 Oct 2020 18:23:32 +0200 Subject: [PATCH 14/16] better init --- src/query/functions/aggregation/take.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index 6549e5cb90..fa2697ccf9 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -276,7 +276,7 @@ func takeFn(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { } func takeInstantFn(heap utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []valueAndMeta { - var result []valueAndMeta + var result = make([]valueAndMeta, 0, heap.Cap()) if heap.Cap() < 1 { return result } From 795f181dd343477fd92f6be5a73cd2715385d14d Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Thu, 29 Oct 2020 12:38:59 +0200 Subject: [PATCH 15/16] Removed Min() function in favour of using this logic inline. Used append in place when indices are not needed. Documented and extracted some comparison functions for better readability and clearness. --- src/query/functions/aggregation/take.go | 17 +++++++++------ src/query/functions/utils/heap.go | 28 ++++++++++++++----------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/query/functions/aggregation/take.go b/src/query/functions/aggregation/take.go index fa2697ccf9..098cea41d9 100644 --- a/src/query/functions/aggregation/take.go +++ b/src/query/functions/aggregation/take.go @@ -143,7 +143,12 @@ func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, seriesCount := maxSeriesCount(buckets) if instantaneous { - heap := utils.NewFloatHeap(takeTop, utils.Min(n.op.k, seriesCount)) + heapSize := seriesCount + if n.op.k < seriesCount { + heapSize = n.op.k + } + + heap := utils.NewFloatHeap(takeTop, heapSize) return n.processBlockInstantaneous(heap, queryCtx, meta, stepIter, seriesMetas, buckets) } @@ -231,11 +236,11 @@ func (n *takeNode) processBlockInstantaneous( } func mapToValuesAndSeriesMetas(takenSortedValues []valueAndMeta) ([]float64, []block.SeriesMeta) { - blockValues := make([]float64, len(takenSortedValues)) - blockSeries := make([]block.SeriesMeta, len(takenSortedValues)) - for i, sortedValue := range takenSortedValues { - blockValues[i] = sortedValue.val - blockSeries[i] = sortedValue.seriesMeta + blockValues := make([]float64, 0, len(takenSortedValues)) + blockSeries := make([]block.SeriesMeta, 0, len(takenSortedValues)) + for _, sortedValue := range takenSortedValues { + blockValues = append(blockValues, sortedValue.val) + blockSeries = append(blockSeries, sortedValue.seriesMeta) } return blockValues, blockSeries } diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index 36a984212b..fff38716e1 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -35,17 +35,28 @@ type ValueIndexPair struct { type lessFn func(ValueIndexPair, ValueIndexPair) bool func maxHeapLess(i, j ValueIndexPair) bool { - if i.Val == j.Val || math.IsNaN(i.Val) && math.IsNaN(j.Val) { + if equalWithNaNs(i.Val, j.Val) { return i.Index > j.Index } - return i.Val < j.Val || math.IsNaN(i.Val) && !math.IsNaN(j.Val) + return i.Val < j.Val || lesserIfNaNs(i.Val, j.Val) } func minHeapLess(i, j ValueIndexPair) bool { - if i.Val == j.Val || math.IsNaN(i.Val) && math.IsNaN(j.Val) { + if equalWithNaNs(i.Val, j.Val) { return i.Index > j.Index } - return i.Val > j.Val || math.IsNaN(i.Val) && !math.IsNaN(j.Val) + return i.Val > j.Val || lesserIfNaNs(i.Val, j.Val) +} + +// Compares two floats for equality with NaNs taken into account. +func equalWithNaNs(i,j float64) bool { + return i == j || math.IsNaN(i) && math.IsNaN(j) +} + +// Checks which value is less if first of them is NaN. +// Basically, we do not want to add NaNs to the heap when it has reached it's cap so this fn should be used to prevent this. +func lesserIfNaNs(i,j float64) bool { + return math.IsNaN(i) && !math.IsNaN(j) } // Compares two float64 values which one is lesser with NaNs. NaNs are always sorted away. @@ -58,13 +69,6 @@ func GreaterWithNaNs(i, j float64) bool { return i > j || math.IsNaN(j) && !math.IsNaN(i) } -func Min(a, b int) int { - if a < b { - return a - } - return b -} - // FloatHeap is a heap that can be given a maximum size type FloatHeap struct { isMaxHeap bool @@ -166,7 +170,7 @@ func (fh *FloatHeap) Flush() []ValueIndexPair { func (fh *FloatHeap) OrderedFlush() []ValueIndexPair { flushed := fh.Flush() sort.Slice(flushed, func(i, j int) bool { - return fh.floatHeap.less(flushed[j], flushed[i]) + return !fh.floatHeap.less(flushed[i], flushed[j]) //reverse sort }) return flushed } From 30a71193da52e10e5a02f59aa6b0c8ac0f195323 Mon Sep 17 00:00:00 2001 From: Linas Naginionis Date: Thu, 5 Nov 2020 09:34:56 +0200 Subject: [PATCH 16/16] Updated doc comment --- src/query/functions/utils/heap.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/functions/utils/heap.go b/src/query/functions/utils/heap.go index fff38716e1..1587a1da00 100644 --- a/src/query/functions/utils/heap.go +++ b/src/query/functions/utils/heap.go @@ -53,7 +53,7 @@ func equalWithNaNs(i,j float64) bool { return i == j || math.IsNaN(i) && math.IsNaN(j) } -// Checks which value is less if first of them is NaN. +// Compares NaNs. // Basically, we do not want to add NaNs to the heap when it has reached it's cap so this fn should be used to prevent this. func lesserIfNaNs(i,j float64) bool { return math.IsNaN(i) && !math.IsNaN(j)