Skip to content

Commit

Permalink
planner, sessionctx: reintroduce #41996 through optimizer fix control (
Browse files Browse the repository at this point in the history
  • Loading branch information
time-and-fate authored Jun 26, 2023
1 parent 90c05ae commit bc80cf9
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 23 deletions.
84 changes: 74 additions & 10 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ import (
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/plancodec"
"github.com/pingcap/tidb/util/ranger"
"github.com/pingcap/tidb/util/set"
Expand Down Expand Up @@ -949,7 +951,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
maxOneRow = ok && (sf.FuncName.L == ast.EQ)
}
}
innerTask := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
innerTask := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, helper.idxOff2KeyOff, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
failpoint.Inject("MockOnlyEnableIndexHashJoin", func(val failpoint.Value) {
if val.(bool) && !p.ctx.GetSessionVars().InRestrictedSQL {
failpoint.Return(p.constructIndexHashJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager))
Expand All @@ -964,7 +966,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
innerTask2 := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, helper.idxOff2KeyOff, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
if innerTask2 != nil {
joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
}
Expand Down Expand Up @@ -1152,13 +1154,63 @@ func (*LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader Physica
return physicalUnionScan
}

// getColsNDVLowerBoundFromHistColl tries to get a lower bound of the NDV of columns (whose uniqueIDs are colUIDs).
func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.HistColl) int64 {
if len(colUIDs) == 0 || histColl == nil {
return -1
}

// 1. Try to get NDV from column stats if it's a single column.
if len(colUIDs) == 1 && histColl.Columns != nil {
uid := colUIDs[0]
if colStats, ok := histColl.Columns[uid]; ok && colStats != nil && colStats.IsStatsInitialized() {
return colStats.NDV
}
}

slices.Sort(colUIDs)

// 2. Try to get NDV from index stats.
// Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
for idxID, idxCols := range histColl.Idx2ColumnIDs {
if len(idxCols) != len(colUIDs) {
continue
}
orderedIdxCols := make([]int64, len(idxCols))
copy(orderedIdxCols, idxCols)
slices.Sort(orderedIdxCols)
if !slices.Equal(orderedIdxCols, colUIDs) {
continue
}
if idxStats, ok := histColl.Indices[idxID]; ok && idxStats != nil && idxStats.IsStatsInitialized() {
return idxStats.NDV
}
}

// TODO: if there's an index that contains the expected columns, we can also make use of its NDV.
// For example, NDV(a,b,c) / NDV(c) is a safe lower bound of NDV(a,b).

// 3. If we still haven't got an NDV, we use the maximum NDV in the column stats as a lower bound.
maxNDV := int64(-1)
for _, uid := range colUIDs {
colStats := histColl.Columns[uid]
if colStats == nil || !colStats.IsStatsInitialized() {
continue
}
maxNDV = mathutil.Max(maxNDV, colStats.NDV)
}
return maxNDV
}

// constructInnerIndexScanTask is specially used to construct the inner plan for PhysicalIndexJoin.
func (p *LogicalJoin) constructInnerIndexScanTask(
wrapper *indexJoinInnerChildWrapper,
path *util.AccessPath,
ranges ranger.Ranges,
filterConds []expression.Expression,
_ []*expression.Column,
idxOffset2joinKeyOffset []int,
rangeInfo string,
keepOrder bool,
desc bool,
Expand Down Expand Up @@ -1250,18 +1302,30 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
is.initSchema(append(path.FullIdxCols, ds.commonHandleCols...), cop.tablePlan != nil)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens)

// Note: due to a regression in JOB workload, we need to revert the logic below for now.
// Note: due to a regression in JOB workload, we use the optimizer fix control to enable this for now.
//
// Because we are estimating an average row count of the inner side corresponding to each row from the outer side,
// the estimated row count of the IndexScan should be no larger than (total row count / NDV of join key columns).
// We use it as an upper bound here.
// We can calculate the lower bound of the NDV therefore we can get an upper bound of the row count here.
rowCountUpperBound := -1.0
//if ds.tableStats != nil {
// joinKeyNDV := getColsNDVLowerBoundFromHistColl(innerJoinKeys, ds.tableStats.HistColl)
// if joinKeyNDV > 0 {
// rowCountUpperBound = ds.tableStats.RowCount / float64(joinKeyNDV)
// }
//}
fixValue, ok := ds.ctx.GetSessionVars().GetOptimizerFixControlValue(variable.TiDBOptFixControl44855)
if ok && variable.TiDBOptOn(fixValue) && ds.tableStats != nil {
usedColIDs := make([]int64, 0)
// We only consider columns in this index that (1) are used to probe as join key,
// and (2) are not prefix column in the index (for which we can't easily get a lower bound)
for idxOffset, joinKeyOffset := range idxOffset2joinKeyOffset {
if joinKeyOffset < 0 ||
path.FullIdxColLens[idxOffset] != types.UnspecifiedLength ||
path.FullIdxCols[idxOffset] == nil {
continue
}
usedColIDs = append(usedColIDs, path.FullIdxCols[idxOffset].UniqueID)
}
joinKeyNDV := getColsNDVLowerBoundFromHistColl(usedColIDs, ds.tableStats.HistColl)
if joinKeyNDV > 0 {
rowCountUpperBound = ds.tableStats.RowCount / float64(joinKeyNDV)
}
}

if rowCountUpperBound > 0 {
rowCount = math.Min(rowCount, rowCountUpperBound)
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1527,6 +1527,9 @@ var (
TiDBOptFixControl44830 uint64 = 44830
// TiDBOptFixControl44823 controls the maximum number of parameters for a query that can be cached in the Plan Cache.
TiDBOptFixControl44823 uint64 = 44823
// TiDBOptFixControl44855 controls whether to use a more accurate upper bound when estimating row count of index
// range scan under inner side of index join.
TiDBOptFixControl44855 uint64 = 44855
)

// GetOptimizerFixControlValue returns the specified value of the optimizer fix control.
Expand Down
40 changes: 27 additions & 13 deletions statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -777,19 +777,33 @@ func TestIndexJoinInnerRowCountUpperBound(t *testing.T) {
stat := h.GetTableStats(tblInfo)
stat.HistColl = mockStatsTbl.HistColl

testKit.MustQuery("explain format = 'brief' " +
"select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b").
Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 500000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 500000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 500000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))
query := "explain format = 'brief' " +
"select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b"

testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 500000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 500000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 500000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))

testKit.MustExec("set @@tidb_opt_fix_control = '44855:ON'")
testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 1000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 1000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 1000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))
}

func TestOrderingIdxSelectivityThreshold(t *testing.T) {
Expand Down

0 comments on commit bc80cf9

Please sign in to comment.