From fc7136ca3c28df5273f5024bdfe40116e7996564 Mon Sep 17 00:00:00 2001 From: terry1purcell Date: Fri, 17 May 2024 16:21:32 -0700 Subject: [PATCH] minor reset --- pkg/statistics/builder.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pkg/statistics/builder.go b/pkg/statistics/builder.go index c71247f9bb078..1d4c3ffe0b3c5 100644 --- a/pkg/statistics/builder.go +++ b/pkg/statistics/builder.go @@ -326,18 +326,23 @@ func BuildHistAndTopN( continue } // case 2, meet a different value: counting for the "current" is complete - // case 2-1, now topn is empty: append the "current" count directly + // case 2-1, do not add a count of 1 + if curCnt == 1 { + cur, curCnt = sampleBytes, 1 + continue + } + // case 2-2, now topn is empty: append the "current" count directly if the value is greater than 1 if len(topNList) == 0 { topNList = append(topNList, TopNMeta{Encoded: cur, Count: uint64(curCnt)}) cur, curCnt = sampleBytes, 1 continue } - // case 2-2, now topn is full, and the "current" count is less than the least count in the topn: no need to insert the "current" + // case 2-3, now topn is full, and the "current" count is less than the least count in the topn: no need to insert the "current" if len(topNList) >= numTopN && uint64(curCnt) <= topNList[len(topNList)-1].Count { cur, curCnt = sampleBytes, 1 continue } - // case 2-3, now topn is not full, or the "current" count is larger than the least count in the topn: need to find a slot to insert the "current" + // case 2-4, now topn is not full, or the "current" count is larger than the least count in the topn: need to find a slot to insert the "current" j := len(topNList) for ; j > 0; j-- { if uint64(curCnt) < topNList[j-1].Count { @@ -454,8 +459,7 @@ func BuildHistAndTopN( // // We assume that the ones not in the top-n list's selectivity is 1/remained_ndv which is the internal implementation of EqualRowCount func pruneTopNItem(topns []TopNMeta, ndv, nullCount, sampleRows, totalRows int64) []TopNMeta { - // If the sampleRows holds all rows, or NDV of samples equals to actual NDV, we just return the TopN directly. - if sampleRows == totalRows || totalRows <= 1 || int64(len(topns)) >= ndv || len(topns) == 0 { + if totalRows <= 1 || len(topns) == 0 { return topns } // Sum the occurrence except the least common one from the top-n list. To check whether the lest common one is worth