Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner, statistics: build new histogram using range information #7921

Merged
merged 32 commits into from
Jan 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a9f7ade
planner, statistics: build new histogram using range information
winoros Oct 16, 2018
c2b1708
Merge branch 'master' into selectivity
winoros Oct 16, 2018
14be004
fix check
winoros Oct 16, 2018
763f5c1
add unit-test file.
winoros Oct 16, 2018
5b61c12
address comments
winoros Oct 24, 2018
caea198
Merge branch 'master' into selectivity
winoros Oct 25, 2018
e9df1a3
fix strange build failed error
winoros Oct 25, 2018
37dbb1f
fix and add test when there's null, -inf or +inf
winoros Oct 25, 2018
80819b8
Merge branch 'master' into selectivity
winoros Nov 5, 2018
c26f85f
fix bug during merging
winoros Nov 6, 2018
63db3e3
address comments for column type
winoros Nov 9, 2018
51a6a5c
clean code
winoros Nov 12, 2018
27886bb
address comments
winoros Nov 15, 2018
c53f289
deal with null
winoros Nov 15, 2018
bd9944a
modify unit test
winoros Nov 29, 2018
6ba1148
address comments
winoros Nov 29, 2018
28970e1
Update statistics/histogram.go
zz-jason Dec 10, 2018
d55c26e
address comments
winoros Dec 12, 2018
8de9cc7
fix lint
winoros Dec 13, 2018
e679269
tiny change
winoros Dec 13, 2018
1293ffd
Merge branch 'master' into selectivity
winoros Jan 2, 2019
4d3cc20
remove useless err check
winoros Jan 7, 2019
13a6d77
address comment
winoros Jan 8, 2019
cefba96
address comment
winoros Jan 11, 2019
8a7bb77
Merge branch 'master' into selectivity
winoros Jan 11, 2019
3a9aa6c
fix merge error
winoros Jan 11, 2019
1d0a21d
Merge branch 'master' into selectivity
zz-jason Jan 14, 2019
8006aee
fix feedback error
winoros Jan 14, 2019
79a211b
Merge branch 'selectivity' of https://github.com/winoros/tidb into se…
winoros Jan 14, 2019
b6f57cf
remove debug log
winoros Jan 14, 2019
0da8d23
Merge branch 'master' into selectivity
alivxxx Jan 14, 2019
2ee47a1
Merge branch 'master' into selectivity
zz-jason Jan 14, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions distsql/request_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ func TableRangesToKVRanges(tid int64, ranges []*ranger.Range, fb *statistics.Que
if fb == nil || fb.Hist() == nil {
return tableRangesToKVRangesWithoutSplit(tid, ranges)
}
ranges = fb.Hist().SplitRange(ranges)
krs := make([]kv.KeyRange, 0, len(ranges))
feedbackRanges := make([]*ranger.Range, 0, len(ranges))
for _, ran := range ranges {
Expand Down Expand Up @@ -260,7 +259,7 @@ func IndexRangesToKVRanges(sc *stmtctx.StatementContext, tid, idxID int64, range
feedbackRanges = append(feedbackRanges, &ranger.Range{LowVal: []types.Datum{types.NewBytesDatum(low)},
HighVal: []types.Datum{types.NewBytesDatum(high)}, LowExclude: false, HighExclude: true})
}
feedbackRanges = fb.Hist().SplitRange(feedbackRanges)
feedbackRanges = fb.Hist().SplitRange(sc, feedbackRanges, true)
krs := make([]kv.KeyRange, 0, len(feedbackRanges))
for _, ran := range feedbackRanges {
low, high := ran.LowVal[0].GetBytes(), ran.HighVal[0].GetBytes()
Expand Down
4 changes: 4 additions & 0 deletions executor/table_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ func (e *TableReaderExecutor) Open(ctx context.Context) error {
}

e.resultHandler = &tableResultHandler{}
if e.feedback != nil && e.feedback.Hist() != nil {
// EncodeInt don't need *statement.Context.
e.ranges = e.feedback.Hist().SplitRange(nil, e.ranges, false)
}
firstPartRanges, secondPartRanges := splitRanges(e.ranges, e.keepOrder)
firstResult, err := e.buildResp(ctx, firstPartRanges)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Col
var rowCount float64
pkHist, ok := ds.statisticTable.Columns[pk.ID]
if ok && !ds.statisticTable.Pseudo {
rowCount = pkHist.AvgCountPerValue(ds.statisticTable.Count)
rowCount = pkHist.AvgCountPerNotNullValue(ds.statisticTable.Count)
} else {
rowCount = ds.statisticTable.PseudoAvgCountPerValue()
}
Expand Down Expand Up @@ -506,7 +506,7 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn
var rowCount float64
idxHist, ok := ds.statisticTable.Indices[idx.ID]
if ok && !ds.statisticTable.Pseudo {
rowCount = idxHist.AvgCountPerValue(ds.statisticTable.Count)
rowCount = idxHist.AvgCountPerNotNullValue(ds.statisticTable.Count)
} else {
rowCount = ds.statisticTable.PseudoAvgCountPerValue()
}
Expand Down
4 changes: 2 additions & 2 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
if corColInAccessConds {
idxHist, ok := ds.stats.HistColl.Indices[path.index.ID]
if ok && !ds.stats.HistColl.Pseudo {
path.countAfterAccess = idxHist.AvgCountPerValue(ds.statisticTable.Count)
path.countAfterAccess = idxHist.AvgCountPerNotNullValue(ds.statisticTable.Count)
} else {
path.countAfterAccess = ds.statisticTable.PseudoAvgCountPerValue()
}
Expand All @@ -461,7 +461,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
path.countAfterAccess = math.Min(ds.stats.RowCount/selectionFactor, float64(ds.statisticTable.Count))
}
if path.indexFilters != nil {
selectivity, err := ds.stats.HistColl.Selectivity(ds.ctx, path.indexFilters)
selectivity, _, err := ds.stats.HistColl.Selectivity(ds.ctx, path.indexFilters)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
Expand Down
17 changes: 15 additions & 2 deletions planner/core/rule_column_pruning.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/model"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/infoschema"
)
Expand Down Expand Up @@ -155,7 +156,15 @@ func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column) {
// PruneColumns implements LogicalPlan interface.
func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column) {
used := getUsedList(parentUsedCols, ds.schema)
var (
handleCol *expression.Column
handleColInfo *model.ColumnInfo
)
for i := len(used) - 1; i >= 0; i-- {
if ds.tableInfo.PKIsHandle && mysql.HasPriKeyFlag(ds.Columns[i].Flag) {
handleCol = ds.schema.Columns[i]
handleColInfo = ds.Columns[i]
}
if !used[i] {
ds.schema.Columns = append(ds.schema.Columns[:i], ds.schema.Columns[i+1:]...)
ds.Columns = append(ds.Columns[:i], ds.Columns[i+1:]...)
Expand All @@ -169,8 +178,12 @@ func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column) {
// For SQL like `select 1 from t`, tikv's response will be empty if no column is in schema.
// So we'll force to push one if schema doesn't have any column.
if ds.schema.Len() == 0 && !infoschema.IsMemoryDB(ds.DBName.L) {
ds.Columns = append(ds.Columns, model.NewExtraHandleColInfo())
ds.schema.Append(ds.newExtraHandleSchemaCol())
if handleCol == nil {
handleCol = ds.newExtraHandleSchemaCol()
handleColInfo = model.NewExtraHandleColInfo()
}
ds.Columns = append(ds.Columns, handleColInfo)
ds.schema.Append(handleCol)
}
}

Expand Down
17 changes: 13 additions & 4 deletions planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/statistics"
log "github.com/sirupsen/logrus"
)

Expand Down Expand Up @@ -74,7 +75,7 @@ func (p *baseLogicalPlan) DeriveStats(childStats []*property.StatsInfo) (*proper
return profile, nil
}

func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *property.StatsInfo {
func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) (*property.StatsInfo, *statistics.HistColl) {
profile := &property.StatsInfo{
RowCount: float64(ds.statisticTable.Count),
Cardinality: make([]float64, len(ds.Columns)),
Expand All @@ -91,12 +92,16 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *property.Stat
}
}
ds.stats = profile
selectivity, err := profile.HistColl.Selectivity(ds.ctx, conds)
selectivity, nodes, err := profile.HistColl.Selectivity(ds.ctx, conds)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
}
return profile.Scale(selectivity)
if ds.ctx.GetSessionVars().OptimizerSelectivityLevel >= 1 && ds.stats.HistColl != nil {
finalHist := ds.stats.HistColl.NewHistCollBySelectivity(ds.ctx.GetSessionVars().StmtCtx, nodes)
return profile, finalHist
}
return profile.Scale(selectivity), nil
}

// DeriveStats implement LogicalPlan DeriveStats interface.
Expand All @@ -105,7 +110,8 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
for i, expr := range ds.pushedDownConds {
ds.pushedDownConds[i] = expression.PushDownNot(nil, expr, false)
}
ds.stats = ds.getStatsByFilter(ds.pushedDownConds)
var finalHist *statistics.HistColl
ds.stats, finalHist = ds.getStatsByFilter(ds.pushedDownConds)
for _, path := range ds.possibleAccessPaths {
if path.isTablePath {
noIntervalRanges, err := ds.deriveTablePathStats(path)
Expand All @@ -131,6 +137,9 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
break
}
}
if ds.ctx.GetSessionVars().OptimizerSelectivityLevel >= 1 {
ds.stats.HistColl = finalHist
eurekaka marked this conversation as resolved.
Show resolved Hide resolved
}
return ds.stats, nil
}

Expand Down
2 changes: 1 addition & 1 deletion planner/property/stats_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ type StatsInfo struct {
RowCount float64
Cardinality []float64

HistColl statistics.HistColl
HistColl *statistics.HistColl
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it may be nil now? We should check all the places that use it to avoid nil pointer reference.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this pr, it's safe. DataSource always holds a histogram.

// UsePseudoStats indicates whether the StatsInfo is calculated using the
// pseudo statistics on a table.
UsePseudoStats bool
Expand Down
21 changes: 10 additions & 11 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ func buildBucketFeedback(h *Histogram, feedback *QueryFeedback) (map[int]*Bucket
}
total := 0
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
min, max := getMinValue(h.tp), getMaxValue(h.tp)
min, max := getMinValue(h.Tp), getMaxValue(h.Tp)
for _, fb := range feedback.feedback {
skip, err := fb.adjustFeedbackBoundaries(sc, &min, &max)
if err != nil {
Expand Down Expand Up @@ -606,7 +606,7 @@ func UpdateCMSketch(c *CMSketch, eqFeedbacks []feedback) *CMSketch {
}

func buildNewHistogram(h *Histogram, buckets []bucket) *Histogram {
hist := NewHistogram(h.ID, h.NDV, h.NullCount, h.LastUpdateVersion, h.tp, len(buckets), h.TotColSize)
hist := NewHistogram(h.ID, h.NDV, h.NullCount, h.LastUpdateVersion, h.Tp, len(buckets), h.TotColSize)
preCount := int64(0)
for _, bkt := range buckets {
hist.AppendBucket(bkt.lower, bkt.upper, bkt.count+preCount, bkt.repeat)
Expand All @@ -622,7 +622,7 @@ type queryFeedback struct {
HashValues []uint64
IndexRanges [][]byte
// Counts is the number of scan keys in each range. It first stores the count for `IntRanges`, `IndexRanges` or `ColumnRanges`.
// After that, it stores the ranges for `HashValues`.
// After that, it stores the Ranges for `HashValues`.
Counts []int64
ColumnRanges [][]byte
}
Expand Down Expand Up @@ -814,7 +814,7 @@ func (q *QueryFeedback) recalculateExpectCount(h *Handle) error {
if tablePseudo == false {
return nil
}
isIndex := q.hist.tp.Tp == mysql.TypeBlob
isIndex := q.hist.Tp.Tp == mysql.TypeBlob
id := q.hist.ID
if isIndex && (t.Indices[id] == nil || t.Indices[id].NotAccurate() == false) {
return nil
Expand Down Expand Up @@ -1056,7 +1056,7 @@ func dumpFeedbackForIndex(h *Handle, q *QueryFeedback, t *Table) error {
equalityCount, rangeCount = getNewCountForIndex(equalityCount, rangeCount, float64(t.Count), float64(q.feedback[i].count))
value := types.NewBytesDatum(bytes)
q.feedback[i] = feedback{lower: &value, upper: &value, count: int64(equalityCount)}
err = rangeFB.dumpRangeFeedback(h, &rang, rangeCount)
err = rangeFB.dumpRangeFeedback(sc, h, &rang, rangeCount)
if err != nil {
log.Debug("dump range feedback failed:", err)
continue
Expand All @@ -1065,9 +1065,8 @@ func dumpFeedbackForIndex(h *Handle, q *QueryFeedback, t *Table) error {
return errors.Trace(h.dumpFeedbackToKV(q))
}

func (q *QueryFeedback) dumpRangeFeedback(h *Handle, ran *ranger.Range, rangeCount float64) error {
func (q *QueryFeedback) dumpRangeFeedback(sc *stmtctx.StatementContext, h *Handle, ran *ranger.Range, rangeCount float64) error {
if q.tp == indexType {
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
lower, err := codec.EncodeKey(sc, nil, ran.LowVal[0])
if err != nil {
return errors.Trace(err)
Expand All @@ -1079,17 +1078,17 @@ func (q *QueryFeedback) dumpRangeFeedback(h *Handle, ran *ranger.Range, rangeCou
ran.LowVal[0].SetBytes(lower)
ran.HighVal[0].SetBytes(upper)
} else {
if !supportColumnType(q.hist.tp) {
if !supportColumnType(q.hist.Tp) {
return nil
}
if ran.LowVal[0].Kind() == types.KindMinNotNull {
ran.LowVal[0] = getMinValue(q.hist.tp)
ran.LowVal[0] = getMinValue(q.hist.Tp)
}
if ran.HighVal[0].Kind() == types.KindMaxValue {
ran.HighVal[0] = getMaxValue(q.hist.tp)
ran.HighVal[0] = getMaxValue(q.hist.Tp)
}
}
ranges := q.hist.SplitRange([]*ranger.Range{ran})
ranges := q.hist.SplitRange(sc, []*ranger.Range{ran}, q.tp == indexType)
counts := make([]float64, 0, len(ranges))
sum := 0.0
for _, r := range ranges {
Expand Down
4 changes: 2 additions & 2 deletions statistics/feedback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
}
defaultBucketCount = t.bucketCount
bkts = mergeBuckets(bkts, t.isNewBuckets, float64(totalCount))
result := buildNewHistogram(&Histogram{tp: types.NewFieldType(mysql.TypeLong)}, bkts).ToString(0)
result := buildNewHistogram(&Histogram{Tp: types.NewFieldType(mysql.TypeLong)}, bkts).ToString(0)
c.Assert(result, Equals, t.result)
}
}
Expand All @@ -228,7 +228,7 @@ func (s *testFeedbackSuite) TestFeedbackEncoding(c *C) {
}
c.Assert(q.Equal(rq), IsTrue)

hist.tp = types.NewFieldType(mysql.TypeBlob)
hist.Tp = types.NewFieldType(mysql.TypeBlob)
q = &QueryFeedback{hist: hist}
q.feedback = append(q.feedback, feedback{encodeInt(0), encodeInt(3), 1, 0})
q.feedback = append(q.feedback, feedback{encodeInt(0), encodeInt(1), 1, 0})
Expand Down
Loading