Skip to content

Commit

Permalink
stats: do not split excluded lower value ranges (#12009) (#12172)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx authored and sre-bot committed Sep 13, 2019
1 parent 9059790 commit c3c04c6
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 101 deletions.
20 changes: 13 additions & 7 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,15 +309,21 @@ func buildBucketFeedback(h *Histogram, feedback *QueryFeedback) (map[int]*Bucket
if skip {
continue
}
idx, _ := h.Bounds.LowerBound(0, fb.lower)
idx := h.Bounds.UpperBound(0, fb.lower)
bktIdx := 0
// The last bucket also stores the feedback that falls outside the upper bound.
if idx >= h.Bounds.NumRows()-2 {
if idx >= h.Bounds.NumRows()-1 {
bktIdx = h.Len() - 1
} else if h.Len() == 1 {
bktIdx = 0
} else {
bktIdx = idx / 2
if idx == 0 {
bktIdx = 0
} else {
bktIdx = (idx - 1) / 2
}
// Make sure that this feedback lies within the bucket.
if chunk.Compare(h.Bounds.GetRow(2*bktIdx+1), 0, fb.upper) < 0 {
if chunk.Compare(h.Bounds.GetRow(2*(bktIdx+1)), 0, fb.upper) < 0 {
continue
}
}
Expand Down Expand Up @@ -954,11 +960,11 @@ func formatBuckets(hg *Histogram, lowBkt, highBkt, idxCols int) string {
return hg.bucketToString(lowBkt, idxCols)
}
if lowBkt+1 == highBkt {
return fmt.Sprintf("%s, %s", hg.bucketToString(lowBkt, 0), hg.bucketToString(highBkt, 0))
return fmt.Sprintf("%s, %s", hg.bucketToString(lowBkt, idxCols), hg.bucketToString(highBkt, idxCols))
}
// do not care the middle buckets
return fmt.Sprintf("%s, (%d buckets, total count %d), %s", hg.bucketToString(lowBkt, 0),
highBkt-lowBkt-1, hg.Buckets[highBkt-1].Count-hg.Buckets[lowBkt].Count, hg.bucketToString(highBkt, 0))
return fmt.Sprintf("%s, (%d buckets, total count %d), %s", hg.bucketToString(lowBkt, idxCols),
highBkt-lowBkt-1, hg.Buckets[highBkt-1].Count-hg.Buckets[lowBkt].Count, hg.bucketToString(highBkt, idxCols))
}

func colRangeToStr(c *Column, ran *ranger.Range, actual int64, factor float64) string {
Expand Down
15 changes: 7 additions & 8 deletions statistics/feedback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,13 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) {
defaultBucketCount = 7
defer func() { defaultBucketCount = originBucketCount }()
c.Assert(UpdateHistogram(q.Hist(), q).ToString(0), Equals,
"column:0 ndv:10058 totColSize:0\n"+
"num: 10000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
"num: 9 lower_bound: 2 upper_bound: 7 repeats: 0\n"+
"num: 11 lower_bound: 8 upper_bound: 19 repeats: 0\n"+
"num: 0 lower_bound: 20 upper_bound: 20 repeats: 0\n"+
"num: 18 lower_bound: 21 upper_bound: 39 repeats: 0\n"+
"num: 18 lower_bound: 40 upper_bound: 58 repeats: 0\n"+
"num: 2 lower_bound: 59 upper_bound: 60 repeats: 0")
"column:0 ndv:10053 totColSize:0\n"+
"num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0\n"+
"num: 7 lower_bound: 2 upper_bound: 5 repeats: 0\n"+
"num: 4 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
"num: 11 lower_bound: 10 upper_bound: 20 repeats: 0\n"+
"num: 19 lower_bound: 30 upper_bound: 49 repeats: 0\n"+
"num: 11 lower_bound: 50 upper_bound: 60 repeats: 0")
}

func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
Expand Down
57 changes: 31 additions & 26 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -559,41 +559,43 @@ func (hg *Histogram) typeMatch(ranges []*ranger.Range) bool {
return true
}

// SplitRange splits the range according to the histogram upper bound. Note that we treat last bucket's upper bound
// as inf, so all the split ranges will totally fall in one of the (-inf, u(0)], (u(0), u(1)],...(u(n-3), u(n-2)],
// (u(n-2), +inf), where n is the number of buckets, u(i) is the i-th bucket's upper bound.
// SplitRange splits the range according to the histogram lower bound. Note that we treat first bucket's lower bound
// as -inf and last bucket's upper bound as +inf, so all the split ranges will totally fall in one of the (-inf, l(1)),
// [l(1), l(2)),...[l(n-2), l(n-1)), [l(n-1), +inf), where n is the number of buckets, l(i) is the i-th bucket's lower bound.
func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool) {
if !hg.typeMatch(oldRanges) {
return oldRanges, false
}
// Treat the only buckets as (-inf, +inf), so we do not need split it.
if hg.Len() == 1 {
return oldRanges, true
}
ranges := make([]*ranger.Range, 0, len(oldRanges))
for _, ran := range oldRanges {
ranges = append(ranges, ran.Clone())
}
split := make([]*ranger.Range, 0, len(ranges))
for len(ranges) > 0 {
// Find the last bound that greater or equal to the LowVal.
// Find the first bound that greater than the LowVal.
idx := hg.Bounds.UpperBound(0, &ranges[0].LowVal[0])
if !ranges[0].LowExclude && idx > 0 {
cmp := chunk.Compare(hg.Bounds.GetRow(idx-1), 0, &ranges[0].LowVal[0])
if cmp == 0 {
idx--
}
}
// Treat last bucket's upper bound as inf, so we do not need split any more.
if idx >= hg.Bounds.NumRows()-2 {
// Treat last bucket's upper bound as +inf, so we do not need split any more.
if idx >= hg.Bounds.NumRows()-1 {
split = append(split, ranges...)
break
}
// Get the corresponding upper bound.
if idx%2 == 0 {
// Treat first buckets's lower bound as -inf, just increase it to the next lower bound.
if idx == 0 {
idx = 2
}
// Get the next lower bound.
if idx%2 == 1 {
idx++
}
upperBound := hg.Bounds.GetRow(idx)
lowerBound := hg.Bounds.GetRow(idx)
var i int
// Find the first range that need to be split by the upper bound.
// Find the first range that need to be split by the lower bound.
for ; i < len(ranges); i++ {
if chunk.Compare(upperBound, 0, &ranges[i].HighVal[0]) < 0 {
if chunk.Compare(lowerBound, 0, &ranges[i].HighVal[0]) <= 0 {
break
}
}
Expand All @@ -602,17 +604,20 @@ func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*range
if len(ranges) == 0 {
break
}
// Split according to the upper bound.
cmp := chunk.Compare(upperBound, 0, &ranges[0].LowVal[0])
if cmp > 0 || (cmp == 0 && !ranges[0].LowExclude) {
upper := upperBound.GetDatum(0, hg.tp)
split = append(split, &ranger.Range{
// Split according to the lower bound.
cmp := chunk.Compare(lowerBound, 0, &ranges[0].LowVal[0])
if cmp > 0 {
lower := lowerBound.GetDatum(0, hg.tp)
newRange := &ranger.Range{
LowExclude: ranges[0].LowExclude,
LowVal: []types.Datum{ranges[0].LowVal[0]},
HighVal: []types.Datum{upper},
HighExclude: false})
ranges[0].LowVal[0] = upper
ranges[0].LowExclude = true
HighVal: []types.Datum{lower},
HighExclude: true}
if validRange(sc, newRange, encoded) {
split = append(split, newRange)
}
ranges[0].LowVal[0] = lower
ranges[0].LowExclude = false
if !validRange(sc, ranges[0], encoded) {
ranges = ranges[1:]
}
Expand Down
Loading

0 comments on commit c3c04c6

Please sign in to comment.