Skip to content

Commit

Permalink
minor reset
Browse files Browse the repository at this point in the history
  • Loading branch information
terry1purcell committed May 17, 2024
1 parent f5ac93e commit fc7136c
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions pkg/statistics/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,18 +326,23 @@ func BuildHistAndTopN(
continue
}
// case 2, meet a different value: counting for the "current" is complete
// case 2-1, now topn is empty: append the "current" count directly
// case 2-1, do not add a count of 1
if curCnt == 1 {
cur, curCnt = sampleBytes, 1
continue
}
// case 2-2, now topn is empty: append the "current" count directly if the value is greater than 1
if len(topNList) == 0 {
topNList = append(topNList, TopNMeta{Encoded: cur, Count: uint64(curCnt)})
cur, curCnt = sampleBytes, 1
continue
}
// case 2-2, now topn is full, and the "current" count is less than the least count in the topn: no need to insert the "current"
// case 2-3, now topn is full, and the "current" count is less than the least count in the topn: no need to insert the "current"
if len(topNList) >= numTopN && uint64(curCnt) <= topNList[len(topNList)-1].Count {
cur, curCnt = sampleBytes, 1
continue
}
// case 2-3, now topn is not full, or the "current" count is larger than the least count in the topn: need to find a slot to insert the "current"
// case 2-4, now topn is not full, or the "current" count is larger than the least count in the topn: need to find a slot to insert the "current"
j := len(topNList)
for ; j > 0; j-- {
if uint64(curCnt) < topNList[j-1].Count {
Expand Down Expand Up @@ -454,8 +459,7 @@ func BuildHistAndTopN(
//
// We assume that the ones not in the top-n list's selectivity is 1/remained_ndv which is the internal implementation of EqualRowCount
func pruneTopNItem(topns []TopNMeta, ndv, nullCount, sampleRows, totalRows int64) []TopNMeta {
// If the sampleRows holds all rows, or NDV of samples equals to actual NDV, we just return the TopN directly.
if sampleRows == totalRows || totalRows <= 1 || int64(len(topns)) >= ndv || len(topns) == 0 {
if totalRows <= 1 || len(topns) == 0 {
return topns
}
// Sum the occurrence except the least common one from the top-n list. To check whether the lest common one is worth
Expand Down

0 comments on commit fc7136c

Please sign in to comment.