From 80953c15520caccea89db2207e7913073c569da2 Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Sun, 13 Feb 2022 21:46:52 -0600 Subject: [PATCH] opt: fix bug in histogram estimation code for multi-column spans This commit fixes a bug in the histogram estimation code, which could cause the optimizer to think that an index scan produced 0 rows, when in fact it produced a large number. This was due to an inaccurate assumption in the histogram filtering code that if a span had an exclusive boundary, the upper bound of the span was excluded from the histogram. However, this failed to account for the fact that we support constraining a histogram with multi-column spans, and we can select different column offsets to use to constrain the histogram. The assumption above is only valid if the column offset corresponds to the last column in the span key. This logic has now been fixed. Fixes #76485 Release note (performance improvement): Fixed a bug in the histogram estimation code that could cause the optimizer to think a scan of a multi-column index would produce 0 rows, when in fact it would produce many rows. This could cause the optimizer to choose a suboptimal plan. This bug has now been fixed, making it less likely for the optimizer to choose a suboptimal plan when multiple multi-column indexes are available. --- pkg/sql/opt/memo/testdata/stats/scan | 55 ++++++++++ pkg/sql/opt/props/histogram.go | 16 ++- pkg/sql/opt/props/histogram_test.go | 157 +++++++++++++++++++++++---- 3 files changed, 202 insertions(+), 26 deletions(-) diff --git a/pkg/sql/opt/memo/testdata/stats/scan b/pkg/sql/opt/memo/testdata/stats/scan index 586169654778..dd2bc1812b8f 100644 --- a/pkg/sql/opt/memo/testdata/stats/scan +++ b/pkg/sql/opt/memo/testdata/stats/scan @@ -2162,3 +2162,58 @@ select │ └── fd: (2)-->(5) └── filters └── st_coveredby(geom:1, '0101000000000000000000F03F000000000000F03F') [type=bool, outer=(1), immutable, constraints=(/1: (/NULL - ])] + +# Regression test for #76485. Ensure that we do not estimate 0 rows +# when filtering a histogram with a multi-column span. + +exec-ddl +CREATE TABLE t76485 ( + a INT, + b INT, + c STRING, + INDEX (a, c), + INDEX (a, b, c) +) +---- + +exec-ddl +ALTER TABLE t76485 INJECT STATISTICS '[ + { + "columns": [ + "a" + ], + "created_at": "2022-02-12 23:43:16.318153", + "distinct_count": 10, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "10" + }, + { + "distinct_range": 9, + "num_eq": 10000, + "num_range": 9, + "upper_bound": "20" + } + ], + "histo_col_type": "INT8", + "histo_version": 1, + "name": "__auto__", + "null_count": 0, + "row_count": 10009 + } +]' +---- + +opt +SELECT * FROM t76485 WHERE a = 20 AND b = 30 AND c < 'foo'; +---- +scan t76485@secondary + ├── columns: a:1(int!null) b:2(int!null) c:3(string!null) + ├── constraint: /1/2/3/4: (/20/30/NULL - /20/30/'foo') + ├── stats: [rows=9.3108651, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=9.3108651, null(3)=0, distinct(1,2)=1, null(1,2)=0, distinct(1-3)=9.3108651, null(1-3)=0] + │ histogram(1)= 0 9.3109 + │ <---- 20 - + └── fd: ()-->(1,2) diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go index 3f3be3429198..e29853a00dc2 100644 --- a/pkg/sql/opt/props/histogram.go +++ b/pkg/sql/opt/props/histogram.go @@ -603,12 +603,18 @@ func getFilteredBucket( // Determine whether this span includes the original upper bound of the // bucket. - isSpanEndBoundaryInclusive := filteredSpan.EndBoundary() == constraint.IncludeBoundary - includesOriginalUpperBound := isSpanEndBoundaryInclusive && cmpSpanEndBucketEnd == 0 - if iter.desc { - isSpanStartBoundaryInclusive := filteredSpan.StartBoundary() == constraint.IncludeBoundary - includesOriginalUpperBound = isSpanStartBoundaryInclusive && cmpSpanStartBucketStart == 0 + var keyLength, cmp int + var keyBoundaryInclusive bool + if !iter.desc { + keyLength = filteredSpan.EndKey().Length() + keyBoundaryInclusive = filteredSpan.EndBoundary() == constraint.IncludeBoundary + cmp = cmpSpanEndBucketEnd + } else { + keyLength = filteredSpan.StartKey().Length() + keyBoundaryInclusive = filteredSpan.StartBoundary() == constraint.IncludeBoundary + cmp = cmpSpanStartBucketStart } + includesOriginalUpperBound := cmp == 0 && ((colOffset < keyLength-1) || keyBoundaryInclusive) // Calculate the new value for numEq. var numEq float64 diff --git a/pkg/sql/opt/props/histogram_test.go b/pkg/sql/opt/props/histogram_test.go index 41fb081741b3..4b0095c15e20 100644 --- a/pkg/sql/opt/props/histogram_test.go +++ b/pkg/sql/opt/props/histogram_test.go @@ -343,7 +343,7 @@ func TestFilterBucket(t *testing.T) { } runTestCase := func( - h *Histogram, span *constraint.Span, desc bool, + h *Histogram, span *constraint.Span, desc bool, colOffset int, ) (actual *cat.HistogramBucket, err error) { defer func() { // Any errors will be propagated as panics. @@ -364,19 +364,19 @@ func TestFilterBucket(t *testing.T) { // mark the lower bound of the second bucket. Set the iterator to point to // the second bucket. iter.setIdx(1) - b := getFilteredBucket(&iter, &keyCtx, span, 0 /* colIdx */) + b := getFilteredBucket(&iter, &keyCtx, span, colOffset) roundBucket(b) return b, nil } - runTest := func(h *Histogram, testData []testCase, typ types.Family) { + runTest := func(h *Histogram, testData []testCase, colOffset int, typs ...types.Family) { for _, testCase := range testData { - span := constraint.ParseSpan(&evalCtx, testCase.span, typ) + span := constraint.ParseSpan(&evalCtx, testCase.span, typs...) ascAndDesc := []constraint.Span{span, makeDescSpan(&span)} // Make sure all test cases work with both ascending and descending columns. for i, span := range ascAndDesc { - actual, err := runTestCase(h, &span, i == 1 /* desc */) + actual, err := runTestCase(h, &span, i == 1 /* desc */, colOffset) if err != nil && !testCase.isError { t.Fatalf("for span %s got error %v", testCase.span, err) } else if err == nil { @@ -437,7 +437,7 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h, testData, types.IntFamily) + runTest(h, testData, 0 /* colOffset */, types.IntFamily) }) t.Run("float", func(t *testing.T) { @@ -472,7 +472,7 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h, testData, types.FloatFamily) + runTest(h, testData, 0 /* colOffset */, types.FloatFamily) }) t.Run("decimal", func(t *testing.T) { @@ -509,7 +509,7 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h, testData, types.DecimalFamily) + runTest(h, testData, 0 /* colOffset */, types.DecimalFamily) }) t.Run("date", func(t *testing.T) { @@ -542,7 +542,7 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h, testData, types.DateFamily) + runTest(h, testData, 0 /* colOffset */, types.DateFamily) }) t.Run("timestamp", func(t *testing.T) { @@ -575,7 +575,7 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h, testData, types.TimestampFamily) + runTest(h, testData, 0 /* colOffset */, types.TimestampFamily) }) t.Run("time", func(t *testing.T) { @@ -608,7 +608,7 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h, testData, types.TimeFamily) + runTest(h, testData, 0 /* colOffset */, types.TimeFamily) }) t.Run("timetz", func(t *testing.T) { @@ -692,8 +692,8 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h1, testData1, types.TimeTZFamily) - runTest(h2, testData2, types.TimeTZFamily) + runTest(h1, testData1, 0 /* colOffset */, types.TimeTZFamily) + runTest(h2, testData2, 0 /* colOffset */, types.TimeTZFamily) }) t.Run("string", func(t *testing.T) { @@ -754,9 +754,9 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h1, t1, types.StringFamily) - runTest(h2, t2, types.StringFamily) - runTest(h3, t3, types.StringFamily) + runTest(h1, t1, 0 /* colOffset */, types.StringFamily) + runTest(h2, t2, 0 /* colOffset */, types.StringFamily) + runTest(h3, t3, 0 /* colOffset */, types.StringFamily) }) t.Run("uuid", func(t *testing.T) { @@ -812,8 +812,8 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h1, t1, types.UuidFamily) - runTest(h2, t2, types.UuidFamily) + runTest(h1, t1, 0 /* colOffset */, types.UuidFamily) + runTest(h2, t2, 0 /* colOffset */, types.UuidFamily) }) t.Run("inet", func(t *testing.T) { @@ -888,9 +888,124 @@ func TestFilterBucket(t *testing.T) { }, } - runTest(h1, t1, types.INetFamily) - runTest(h2, t2, types.INetFamily) - runTest(h3, t3, types.INetFamily) + runTest(h1, t1, 0 /* colOffset */, types.INetFamily) + runTest(h2, t2, 0 /* colOffset */, types.INetFamily) + runTest(h3, t3, 0 /* colOffset */, types.INetFamily) + }) + + t.Run("multi-col", func(t *testing.T) { + h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDInt(0))}, + {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDInt(10)}, + }} + t1 := []testCase{ + { + span: "[/0 - /5/foo)", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 5, DistinctRange: 5, UpperBound: tree.NewDInt(5)}, + }, + { + span: "(/2/foo - /9]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 7, DistinctRange: 7, UpperBound: tree.NewDInt(9)}, + }, + { + span: "[/2 - /10/foo)", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 8, DistinctRange: 8, UpperBound: tree.NewDInt(10)}, + }, + { + span: "[/10/ - /10/foo)", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)}, + }, + { + span: "(/10/foo - /10]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)}, + }, + { + span: "[/10/ - /10/foo]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)}, + }, + { + span: "[/10/foo - /10]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)}, + }, + { + span: "[/10/bar - /10/foo)", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)}, + }, + { + span: "(/10/bar - /10/foo]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)}, + }, + { + span: "[/10/bar - /10/foo]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)}, + }, + { + span: "(/10/bar - /10/foo)", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)}, + }, + } + + h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDString("a"))}, + {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")}, + }} + t2 := []testCase{ + { + span: "[/0/a\x00 - /0/b/foo)", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 4.99, DistinctRange: 4.99, UpperBound: tree.NewDString("b")}, + }, + { + span: "(/0/a\x00/foo - /0/b]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 4.99, DistinctRange: 4.99, UpperBound: tree.NewDString("b")}, + }, + { + span: "(/0/a\x00/foo - /0/c)", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")}, + }, + { + span: "(/0/a\x00/foo - /0/c]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")}, + }, + { + span: "[/0/a\x00 - /0/c/foo)", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")}, + }, + { + span: "[/0/c/ - /0/c/foo)", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")}, + }, + { + span: "(/0/c/foo - /0/c]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")}, + }, + { + span: "[/0/c/ - /0/c/foo]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")}, + }, + { + span: "[/0/c/foo - /0/c]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")}, + }, + { + span: "[/0/c/bar - /0/c/foo)", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")}, + }, + { + span: "(/0/c/bar - /0/c/foo]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")}, + }, + { + span: "[/0/c/bar - /0/c/foo]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")}, + }, + { + span: "(/0/c/bar - /0/c/foo)", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")}, + }, + } + + runTest(h1, t1, 0 /* colOffset */) + runTest(h2, t2, 1 /* colOffset */) }) }