Skip to content

Commit

Permalink
Merge pull request cockroachdb#76555 from rytaft/backport21.2-76486
Browse files Browse the repository at this point in the history
release-21.2: opt: fix bug in histogram estimation code for multi-column spans
  • Loading branch information
rytaft committed Feb 15, 2022
2 parents c7cfac5 + 80953c1 commit df90f09
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 26 deletions.
55 changes: 55 additions & 0 deletions pkg/sql/opt/memo/testdata/stats/scan
Original file line number Diff line number Diff line change
Expand Up @@ -2162,3 +2162,58 @@ select
│ └── fd: (2)-->(5)
└── filters
└── st_coveredby(geom:1, '0101000000000000000000F03F000000000000F03F') [type=bool, outer=(1), immutable, constraints=(/1: (/NULL - ])]

# Regression test for #76485. Ensure that we do not estimate 0 rows
# when filtering a histogram with a multi-column span.

exec-ddl
CREATE TABLE t76485 (
a INT,
b INT,
c STRING,
INDEX (a, c),
INDEX (a, b, c)
)
----

exec-ddl
ALTER TABLE t76485 INJECT STATISTICS '[
{
"columns": [
"a"
],
"created_at": "2022-02-12 23:43:16.318153",
"distinct_count": 10,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "10"
},
{
"distinct_range": 9,
"num_eq": 10000,
"num_range": 9,
"upper_bound": "20"
}
],
"histo_col_type": "INT8",
"histo_version": 1,
"name": "__auto__",
"null_count": 0,
"row_count": 10009
}
]'
----

opt
SELECT * FROM t76485 WHERE a = 20 AND b = 30 AND c < 'foo';
----
scan t76485@secondary
├── columns: a:1(int!null) b:2(int!null) c:3(string!null)
├── constraint: /1/2/3/4: (/20/30/NULL - /20/30/'foo')
├── stats: [rows=9.3108651, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=9.3108651, null(3)=0, distinct(1,2)=1, null(1,2)=0, distinct(1-3)=9.3108651, null(1-3)=0]
│ histogram(1)= 0 9.3109
│ <---- 20 -
└── fd: ()-->(1,2)
16 changes: 11 additions & 5 deletions pkg/sql/opt/props/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -603,12 +603,18 @@ func getFilteredBucket(

// Determine whether this span includes the original upper bound of the
// bucket.
isSpanEndBoundaryInclusive := filteredSpan.EndBoundary() == constraint.IncludeBoundary
includesOriginalUpperBound := isSpanEndBoundaryInclusive && cmpSpanEndBucketEnd == 0
if iter.desc {
isSpanStartBoundaryInclusive := filteredSpan.StartBoundary() == constraint.IncludeBoundary
includesOriginalUpperBound = isSpanStartBoundaryInclusive && cmpSpanStartBucketStart == 0
var keyLength, cmp int
var keyBoundaryInclusive bool
if !iter.desc {
keyLength = filteredSpan.EndKey().Length()
keyBoundaryInclusive = filteredSpan.EndBoundary() == constraint.IncludeBoundary
cmp = cmpSpanEndBucketEnd
} else {
keyLength = filteredSpan.StartKey().Length()
keyBoundaryInclusive = filteredSpan.StartBoundary() == constraint.IncludeBoundary
cmp = cmpSpanStartBucketStart
}
includesOriginalUpperBound := cmp == 0 && ((colOffset < keyLength-1) || keyBoundaryInclusive)

// Calculate the new value for numEq.
var numEq float64
Expand Down
157 changes: 136 additions & 21 deletions pkg/sql/opt/props/histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ func TestFilterBucket(t *testing.T) {
}

runTestCase := func(
h *Histogram, span *constraint.Span, desc bool,
h *Histogram, span *constraint.Span, desc bool, colOffset int,
) (actual *cat.HistogramBucket, err error) {
defer func() {
// Any errors will be propagated as panics.
Expand All @@ -364,19 +364,19 @@ func TestFilterBucket(t *testing.T) {
// mark the lower bound of the second bucket. Set the iterator to point to
// the second bucket.
iter.setIdx(1)
b := getFilteredBucket(&iter, &keyCtx, span, 0 /* colIdx */)
b := getFilteredBucket(&iter, &keyCtx, span, colOffset)
roundBucket(b)
return b, nil
}

runTest := func(h *Histogram, testData []testCase, typ types.Family) {
runTest := func(h *Histogram, testData []testCase, colOffset int, typs ...types.Family) {
for _, testCase := range testData {
span := constraint.ParseSpan(&evalCtx, testCase.span, typ)
span := constraint.ParseSpan(&evalCtx, testCase.span, typs...)
ascAndDesc := []constraint.Span{span, makeDescSpan(&span)}

// Make sure all test cases work with both ascending and descending columns.
for i, span := range ascAndDesc {
actual, err := runTestCase(h, &span, i == 1 /* desc */)
actual, err := runTestCase(h, &span, i == 1 /* desc */, colOffset)
if err != nil && !testCase.isError {
t.Fatalf("for span %s got error %v", testCase.span, err)
} else if err == nil {
Expand Down Expand Up @@ -437,7 +437,7 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h, testData, types.IntFamily)
runTest(h, testData, 0 /* colOffset */, types.IntFamily)
})

t.Run("float", func(t *testing.T) {
Expand Down Expand Up @@ -472,7 +472,7 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h, testData, types.FloatFamily)
runTest(h, testData, 0 /* colOffset */, types.FloatFamily)
})

t.Run("decimal", func(t *testing.T) {
Expand Down Expand Up @@ -509,7 +509,7 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h, testData, types.DecimalFamily)
runTest(h, testData, 0 /* colOffset */, types.DecimalFamily)
})

t.Run("date", func(t *testing.T) {
Expand Down Expand Up @@ -542,7 +542,7 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h, testData, types.DateFamily)
runTest(h, testData, 0 /* colOffset */, types.DateFamily)
})

t.Run("timestamp", func(t *testing.T) {
Expand Down Expand Up @@ -575,7 +575,7 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h, testData, types.TimestampFamily)
runTest(h, testData, 0 /* colOffset */, types.TimestampFamily)
})

t.Run("time", func(t *testing.T) {
Expand Down Expand Up @@ -608,7 +608,7 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h, testData, types.TimeFamily)
runTest(h, testData, 0 /* colOffset */, types.TimeFamily)
})

t.Run("timetz", func(t *testing.T) {
Expand Down Expand Up @@ -692,8 +692,8 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h1, testData1, types.TimeTZFamily)
runTest(h2, testData2, types.TimeTZFamily)
runTest(h1, testData1, 0 /* colOffset */, types.TimeTZFamily)
runTest(h2, testData2, 0 /* colOffset */, types.TimeTZFamily)
})

t.Run("string", func(t *testing.T) {
Expand Down Expand Up @@ -754,9 +754,9 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h1, t1, types.StringFamily)
runTest(h2, t2, types.StringFamily)
runTest(h3, t3, types.StringFamily)
runTest(h1, t1, 0 /* colOffset */, types.StringFamily)
runTest(h2, t2, 0 /* colOffset */, types.StringFamily)
runTest(h3, t3, 0 /* colOffset */, types.StringFamily)
})

t.Run("uuid", func(t *testing.T) {
Expand Down Expand Up @@ -812,8 +812,8 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h1, t1, types.UuidFamily)
runTest(h2, t2, types.UuidFamily)
runTest(h1, t1, 0 /* colOffset */, types.UuidFamily)
runTest(h2, t2, 0 /* colOffset */, types.UuidFamily)
})

t.Run("inet", func(t *testing.T) {
Expand Down Expand Up @@ -888,9 +888,124 @@ func TestFilterBucket(t *testing.T) {
},
}

runTest(h1, t1, types.INetFamily)
runTest(h2, t2, types.INetFamily)
runTest(h3, t3, types.INetFamily)
runTest(h1, t1, 0 /* colOffset */, types.INetFamily)
runTest(h2, t2, 0 /* colOffset */, types.INetFamily)
runTest(h3, t3, 0 /* colOffset */, types.INetFamily)
})

t.Run("multi-col", func(t *testing.T) {
h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{
{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDInt(0))},
{NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDInt(10)},
}}
t1 := []testCase{
{
span: "[/0 - /5/foo)",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 5, DistinctRange: 5, UpperBound: tree.NewDInt(5)},
},
{
span: "(/2/foo - /9]",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 7, DistinctRange: 7, UpperBound: tree.NewDInt(9)},
},
{
span: "[/2 - /10/foo)",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 8, DistinctRange: 8, UpperBound: tree.NewDInt(10)},
},
{
span: "[/10/ - /10/foo)",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)},
},
{
span: "(/10/foo - /10]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)},
},
{
span: "[/10/ - /10/foo]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)},
},
{
span: "[/10/foo - /10]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)},
},
{
span: "[/10/bar - /10/foo)",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)},
},
{
span: "(/10/bar - /10/foo]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)},
},
{
span: "[/10/bar - /10/foo]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)},
},
{
span: "(/10/bar - /10/foo)",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)},
},
}

h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{
{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDString("a"))},
{NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")},
}}
t2 := []testCase{
{
span: "[/0/a\x00 - /0/b/foo)",
expected: &cat.HistogramBucket{NumEq: 0, NumRange: 4.99, DistinctRange: 4.99, UpperBound: tree.NewDString("b")},
},
{
span: "(/0/a\x00/foo - /0/b]",
expected: &cat.HistogramBucket{NumEq: 0, NumRange: 4.99, DistinctRange: 4.99, UpperBound: tree.NewDString("b")},
},
{
span: "(/0/a\x00/foo - /0/c)",
expected: &cat.HistogramBucket{NumEq: 0, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")},
},
{
span: "(/0/a\x00/foo - /0/c]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")},
},
{
span: "[/0/a\x00 - /0/c/foo)",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")},
},
{
span: "[/0/c/ - /0/c/foo)",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")},
},
{
span: "(/0/c/foo - /0/c]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")},
},
{
span: "[/0/c/ - /0/c/foo]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")},
},
{
span: "[/0/c/foo - /0/c]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")},
},
{
span: "[/0/c/bar - /0/c/foo)",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")},
},
{
span: "(/0/c/bar - /0/c/foo]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")},
},
{
span: "[/0/c/bar - /0/c/foo]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")},
},
{
span: "(/0/c/bar - /0/c/foo)",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("c")},
},
}

runTest(h1, t1, 0 /* colOffset */)
runTest(h2, t2, 1 /* colOffset */)
})

}
Expand Down

0 comments on commit df90f09

Please sign in to comment.