Skip to content

Commit

Permalink
optimize stats when table not flushed yet (#21008)
Browse files Browse the repository at this point in the history
optimize stats when table not flushed yet

Approved by: @ouyuanning, @heni02, @sukki37
  • Loading branch information
badboynt1 authored Dec 30, 2024
1 parent 8d91524 commit 634912a
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 10 deletions.
22 changes: 13 additions & 9 deletions pkg/sql/plan/apply_indices.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,18 +399,22 @@ func (builder *QueryBuilder) applyIndicesForFiltersRegularIndex(nodeID int32, no
}
}

if catalog.IsFakePkName(node.TableDef.Pkey.PkeyColName) {
// for cluster by table, make it less prone to go index
if node.Stats.Selectivity >= InFilterSelectivityLimit/2 || node.Stats.Outcnt >= InFilterCardLimitNonPK {
//default stats means this table maybe not flushed yet, then we don't skip the index
ignoreStats := IsDefaultStats(node.Stats)
if !ignoreStats {
if catalog.IsFakePkName(node.TableDef.Pkey.PkeyColName) {
// for cluster by table, make it less prone to go index
if node.Stats.Selectivity >= InFilterSelectivityLimit/2 || node.Stats.Outcnt >= InFilterCardLimitNonPK {
return nodeID
}
}
if node.Stats.Selectivity > InFilterSelectivityLimit || node.Stats.Outcnt > float64(GetInFilterCardLimitOnPK(builder.compCtx.GetProcess().GetService(), node.Stats.TableCnt)) {
return nodeID
}
}
if node.Stats.Selectivity > InFilterSelectivityLimit || node.Stats.Outcnt > float64(GetInFilterCardLimitOnPK(builder.compCtx.GetProcess().GetService(), node.Stats.TableCnt)) {
return nodeID
}

// Apply unique/secondary indices for point select
idxToChoose, filterIdx := builder.getMostSelectiveIndexForPointSelect(indexes, node)
idxToChoose, filterIdx := builder.getMostSelectiveIndexForPointSelect(indexes, node, ignoreStats)
if idxToChoose != -1 {
retID, idxTableNodeID := builder.applyIndexJoin(indexes[idxToChoose], node, EqualIndexCondition, filterIdx, scanSnapshot)
builder.applyExtraFiltersOnIndex(indexes[idxToChoose], node, builder.qry.Nodes[idxTableNodeID], filterIdx)
Expand Down Expand Up @@ -848,7 +852,7 @@ func (builder *QueryBuilder) applyIndexJoin(idxDef *IndexDef, node *plan.Node, f
return joinNodeID, idxTableNodeID
}

func (builder *QueryBuilder) getMostSelectiveIndexForPointSelect(indexes []*IndexDef, node *plan.Node) (int, []int32) {
func (builder *QueryBuilder) getMostSelectiveIndexForPointSelect(indexes []*IndexDef, node *plan.Node, ignoreStats bool) (int, []int32) {
currentSel := 1.0
currentIdx := -1
savedFilterIdx := make([]int32, 0)
Expand Down Expand Up @@ -889,7 +893,7 @@ func (builder *QueryBuilder) getMostSelectiveIndexForPointSelect(indexes []*Inde
}
filterIdx = append(filterIdx, idx)
filter := node.FilterList[idx]
if filter.Selectivity <= InFilterSelectivityLimit && node.Stats.TableCnt*filter.Selectivity <= float64(GetInFilterCardLimitOnPK(builder.compCtx.GetProcess().GetService(), node.Stats.TableCnt)) {
if ignoreStats || (filter.Selectivity <= InFilterSelectivityLimit && node.Stats.TableCnt*filter.Selectivity <= float64(GetInFilterCardLimitOnPK(builder.compCtx.GetProcess().GetService(), node.Stats.TableCnt))) {
usePartialIndex = true
}
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/plan/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,10 @@ func DefaultBigStats() *plan.Stats {
return stats
}

func IsDefaultStats(stats *plan.Stats) bool {
return stats.Cost == 1000 && stats.TableCnt == 1000 && stats.Outcnt == 1000 && stats.Selectivity == 1 && stats.BlockNum == 1 && stats.Rowsize == 100
}

func DefaultStats() *plan.Stats {
stats := new(Stats)
stats.TableCnt = 1000
Expand Down
14 changes: 14 additions & 0 deletions test/distributed/cases/optimizer/index.result
Original file line number Diff line number Diff line change
Expand Up @@ -272,4 +272,18 @@ drop table t1;
create table t1(a bigint, b bigint default null, c int, primary key(a), key(b));
insert into t1(a,b,c) select result,result%10, result from generate_series(1,3000000)g;
delete from t1 where b = 1;
drop table t1;
create table t1(c1 int, c2 int, c3 int, key(c1));
explain select * from t1 where c1=1;
TP QUERY PLAN
Project
-> Join
Join Type: INDEX
Join Cond: (t1.__mo_fake_pk_col = #[1,0])
Runtime Filter Build: #[-1,0]
-> Table Scan on d1.t1 [ForceOneCN]
Filter Cond: (t1.c1 = 1)
Runtime Filter Probe: t1.__mo_fake_pk_col
-> Index Table Scan on t1.c1 [ForceOneCN]
Filter Cond: prefix_eq(#[0,0])
drop database d1;
6 changes: 5 additions & 1 deletion test/distributed/cases/optimizer/index.test
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,8 @@ drop table t1;
create table t1(a bigint, b bigint default null, c int, primary key(a), key(b));
insert into t1(a,b,c) select result,result%10, result from generate_series(1,3000000)g;
delete from t1 where b = 1;
drop database d1;
drop table t1;
create table t1(c1 int, c2 int, c3 int, key(c1));
-- @separator:table
explain select * from t1 where c1=1;
drop database d1;

0 comments on commit 634912a

Please sign in to comment.