pingcap · zz-jason · Jan 14, 2019 · Oct 16, 2018 · Oct 16, 2018 · Oct 16, 2018
diff --git a/distsql/request_builder.go b/distsql/request_builder.go
@@ -173,7 +173,8 @@ func TableRangesToKVRanges(tid int64, ranges []*ranger.Range, fb *statistics.Que
 	if fb == nil || fb.Hist() == nil {
 		return tableRangesToKVRangesWithoutSplit(tid, ranges)
 	}
-	ranges = fb.Hist().SplitRange(ranges)
+	// EncodeInt don't need *statement.Context.
+	ranges = fb.Hist().SplitRange(nil, ranges, false)
 	krs := make([]kv.KeyRange, 0, len(ranges))
 	feedbackRanges := make([]*ranger.Range, 0, len(ranges))
 	for _, ran := range ranges {
@@ -260,7 +261,7 @@ func IndexRangesToKVRanges(sc *stmtctx.StatementContext, tid, idxID int64, range
 		feedbackRanges = append(feedbackRanges, &ranger.Range{LowVal: []types.Datum{types.NewBytesDatum(low)},
 			HighVal: []types.Datum{types.NewBytesDatum(high)}, LowExclude: false, HighExclude: true})
 	}
-	feedbackRanges = fb.Hist().SplitRange(feedbackRanges)
+	feedbackRanges = fb.Hist().SplitRange(sc, feedbackRanges, true)
 	krs := make([]kv.KeyRange, 0, len(feedbackRanges))
 	for _, ran := range feedbackRanges {
 		low, high := ran.LowVal[0].GetBytes(), ran.HighVal[0].GetBytes()

diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go
@@ -459,7 +459,7 @@ func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Col
 	var rowCount float64
 	pkHist, ok := ds.statisticTable.Columns[pk.ID]
 	if ok && !ds.statisticTable.Pseudo {
-		rowCount = pkHist.AvgCountPerValue(ds.statisticTable.Count)
+		rowCount = pkHist.AvgCountPerNotNullValue(ds.statisticTable.Count)
 	} else {
 		rowCount = ds.statisticTable.PseudoAvgCountPerValue()
 	}
@@ -506,7 +506,7 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn
 	var rowCount float64
 	idxHist, ok := ds.statisticTable.Indices[idx.ID]
 	if ok && !ds.statisticTable.Pseudo {
-		rowCount = idxHist.AvgCountPerValue(ds.statisticTable.Count)
+		rowCount = idxHist.AvgCountPerNotNullValue(ds.statisticTable.Count)
 	} else {
 		rowCount = ds.statisticTable.PseudoAvgCountPerValue()
 	}

diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
@@ -448,7 +448,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
 	if corColInAccessConds {
 		idxHist, ok := ds.stats.HistColl.Indices[path.index.ID]
 		if ok && !ds.stats.HistColl.Pseudo {
-			path.countAfterAccess = idxHist.AvgCountPerValue(ds.statisticTable.Count)
+			path.countAfterAccess = idxHist.AvgCountPerNotNullValue(ds.statisticTable.Count)
 		} else {
 			path.countAfterAccess = ds.statisticTable.PseudoAvgCountPerValue()
 		}
@@ -459,7 +459,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
 		path.countAfterAccess = math.Min(ds.stats.RowCount/selectionFactor, float64(ds.statisticTable.Count))
 	}
 	if path.indexFilters != nil {
-		selectivity, err := ds.stats.HistColl.Selectivity(ds.ctx, path.indexFilters)
+		selectivity, _, err := ds.stats.HistColl.Selectivity(ds.ctx, path.indexFilters)
 		if err != nil {
 			log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
 			selectivity = selectionFactor

diff --git a/planner/core/stats.go b/planner/core/stats.go
@@ -18,6 +18,7 @@ import (
 
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/planner/property"
+	"github.com/pingcap/tidb/statistics"
 	log "github.com/sirupsen/logrus"
 )
 
@@ -74,7 +75,7 @@ func (p *baseLogicalPlan) DeriveStats(childStats []*property.StatsInfo) (*proper
 	return profile, nil
 }
 
-func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *property.StatsInfo {
+func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) (*property.StatsInfo, *statistics.HistColl) {
 	profile := &property.StatsInfo{
 		RowCount:       float64(ds.statisticTable.Count),
 		Cardinality:    make([]float64, len(ds.Columns)),
@@ -91,12 +92,16 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *property.Stat
 		}
 	}
 	ds.stats = profile
-	selectivity, err := profile.HistColl.Selectivity(ds.ctx, conds)
+	selectivity, nodes, err := profile.HistColl.Selectivity(ds.ctx, conds)
 	if err != nil {
 		log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
 		selectivity = selectionFactor
 	}
-	return profile.Scale(selectivity)
+	if ds.ctx.GetSessionVars().OptimizerSelectivityLevel >= 1 && ds.stats.HistColl != nil {
+		finalHist := ds.stats.HistColl.NewHistCollBySelectivity(ds.ctx.GetSessionVars().StmtCtx, nodes)
+		return profile, finalHist
+	}
+	return profile.Scale(selectivity), nil
 }
 
 // DeriveStats implement LogicalPlan DeriveStats interface.
@@ -105,7 +110,8 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
 	for i, expr := range ds.pushedDownConds {
 		ds.pushedDownConds[i] = expression.PushDownNot(nil, expr, false)
 	}
-	ds.stats = ds.getStatsByFilter(ds.pushedDownConds)
+	var finalHist *statistics.HistColl
+	ds.stats, finalHist = ds.getStatsByFilter(ds.pushedDownConds)
 	for _, path := range ds.possibleAccessPaths {
 		if path.isTablePath {
 			noIntervalRanges, err := ds.deriveTablePathStats(path)
@@ -131,6 +137,9 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
 			break
 		}
 	}
+	if ds.ctx.GetSessionVars().OptimizerSelectivityLevel >= 1 {
+		ds.stats.HistColl = finalHist
+	}
 	return ds.stats, nil
 }
 

diff --git a/planner/property/stats_info.go b/planner/property/stats_info.go
@@ -24,7 +24,7 @@ type StatsInfo struct {
 	RowCount    float64
 	Cardinality []float64
 
-	HistColl statistics.HistColl
+	HistColl *statistics.HistColl
 	// UsePseudoStats indicates whether the StatsInfo is calculated using the
 	// pseudo statistics on a table.
 	UsePseudoStats bool

diff --git a/statistics/feedback.go b/statistics/feedback.go
@@ -295,7 +295,7 @@ func buildBucketFeedback(h *Histogram, feedback *QueryFeedback) (map[int]*Bucket
 	total := 0
 	sc := &stmtctx.StatementContext{TimeZone: time.UTC}
 	kind := feedback.feedback[0].lower.Kind()
-	min, max := getMinValue(kind, h.tp), getMaxValue(kind, h.tp)
+	min, max := getMinValue(kind, h.Tp), getMaxValue(kind, h.Tp)
 	for _, fb := range feedback.feedback {
 		skip, err := fb.adjustFeedbackBoundaries(sc, &min, &max)
 		if err != nil {
@@ -607,7 +607,7 @@ func UpdateCMSketch(c *CMSketch, eqFeedbacks []feedback) *CMSketch {
 }
 
 func buildNewHistogram(h *Histogram, buckets []bucket) *Histogram {
-	hist := NewHistogram(h.ID, h.NDV, h.NullCount, h.LastUpdateVersion, h.tp, len(buckets), h.TotColSize)
+	hist := NewHistogram(h.ID, h.NDV, h.NullCount, h.LastUpdateVersion, h.Tp, len(buckets), h.TotColSize)
 	preCount := int64(0)
 	for _, bkt := range buckets {
 		hist.AppendBucket(bkt.lower, bkt.upper, bkt.count+preCount, bkt.repeat)
@@ -623,7 +623,7 @@ type queryFeedback struct {
 	HashValues  []uint64
 	IndexRanges [][]byte
 	// Counts is the number of scan keys in each range. It first stores the count for `IntRanges`, `IndexRanges` or `ColumnRanges`.
-	// After that, it stores the ranges for `HashValues`.
+	// After that, it stores the Ranges for `HashValues`.
 	Counts       []int64
 	ColumnRanges [][]byte
 }
@@ -808,7 +808,7 @@ func (q *QueryFeedback) recalculateExpectCount(h *Handle) error {
 	if tablePseudo == false {
 		return nil
 	}
-	isIndex := q.hist.tp.Tp == mysql.TypeBlob
+	isIndex := q.hist.Tp.Tp == mysql.TypeBlob
 	id := q.hist.ID
 	if isIndex && (t.Indices[id] == nil || t.Indices[id].NotAccurate() == false) {
 		return nil
@@ -1050,7 +1050,7 @@ func dumpFeedbackForIndex(h *Handle, q *QueryFeedback, t *Table) error {
 		equalityCount, rangeCount = getNewCountForIndex(equalityCount, rangeCount, float64(t.Count), float64(q.feedback[i].count))
 		value := types.NewBytesDatum(bytes)
 		q.feedback[i] = feedback{lower: &value, upper: &value, count: int64(equalityCount)}
-		err = rangeFB.dumpRangeFeedback(h, &rang, rangeCount)
+		err = rangeFB.dumpRangeFeedback(sc, h, &rang, rangeCount)
 		if err != nil {
 			log.Debug("dump range feedback failed:", err)
 			continue
@@ -1059,9 +1059,8 @@ func dumpFeedbackForIndex(h *Handle, q *QueryFeedback, t *Table) error {
 	return errors.Trace(h.dumpFeedbackToKV(q))
 }
 
-func (q *QueryFeedback) dumpRangeFeedback(h *Handle, ran *ranger.Range, rangeCount float64) error {
+func (q *QueryFeedback) dumpRangeFeedback(sc *stmtctx.StatementContext, h *Handle, ran *ranger.Range, rangeCount float64) error {
 	if q.tp == indexType {
-		sc := &stmtctx.StatementContext{TimeZone: time.UTC}
 		lower, err := codec.EncodeKey(sc, nil, ran.LowVal[0])
 		if err != nil {
 			return errors.Trace(err)
@@ -1078,13 +1077,13 @@ func (q *QueryFeedback) dumpRangeFeedback(h *Handle, ran *ranger.Range, rangeCou
 			return nil
 		}
 		if ran.LowVal[0].Kind() == types.KindMinNotNull {
-			ran.LowVal[0] = getMinValue(k, q.hist.tp)
+			ran.LowVal[0] = getMinValue(k, q.hist.Tp)
 		}
 		if ran.HighVal[0].Kind() == types.KindMaxValue {
-			ran.HighVal[0] = getMaxValue(k, q.hist.tp)
+			ran.HighVal[0] = getMaxValue(k, q.hist.Tp)
 		}
 	}
-	ranges := q.hist.SplitRange([]*ranger.Range{ran})
+	ranges := q.hist.SplitRange(sc, []*ranger.Range{ran}, q.tp == indexType)
 	counts := make([]float64, 0, len(ranges))
 	sum := 0.0
 	for _, r := range ranges {

diff --git a/statistics/feedback_test.go b/statistics/feedback_test.go
@@ -202,7 +202,7 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
 		}
 		defaultBucketCount = t.bucketCount
 		bkts = mergeBuckets(bkts, t.isNewBuckets, float64(totalCount))
-		result := buildNewHistogram(&Histogram{tp: types.NewFieldType(mysql.TypeLong)}, bkts).ToString(0)
+		result := buildNewHistogram(&Histogram{Tp: types.NewFieldType(mysql.TypeLong)}, bkts).ToString(0)
 		c.Assert(result, Equals, t.result)
 	}
 }
@@ -228,7 +228,7 @@ func (s *testFeedbackSuite) TestFeedbackEncoding(c *C) {
 	}
 	c.Assert(q.Equal(rq), IsTrue)
 
-	hist.tp = types.NewFieldType(mysql.TypeBlob)
+	hist.Tp = types.NewFieldType(mysql.TypeBlob)
 	q = &QueryFeedback{hist: hist}
 	q.feedback = append(q.feedback, feedback{encodeInt(0), encodeInt(3), 1, 0})
 	q.feedback = append(q.feedback, feedback{encodeInt(0), encodeInt(1), 1, 0})