Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#44865
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
  • Loading branch information
time-and-fate authored and ti-chi-bot committed Jun 26, 2023
1 parent 38ef1f9 commit f60f5bd
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 23 deletions.
75 changes: 65 additions & 10 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ import (
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/plancodec"
"github.com/pingcap/tidb/util/ranger"
"github.com/pingcap/tidb/util/set"
Expand Down Expand Up @@ -947,7 +949,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
maxOneRow = ok && (sf.FuncName.L == ast.EQ)
}
}
innerTask := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
innerTask := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, helper.idxOff2KeyOff, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
failpoint.Inject("MockOnlyEnableIndexHashJoin", func(val failpoint.Value) {
if val.(bool) && !p.ctx.GetSessionVars().InRestrictedSQL {
failpoint.Return(p.constructIndexHashJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager))
Expand All @@ -962,7 +964,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
innerTask2 := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, helper.idxOff2KeyOff, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
if innerTask2 != nil {
joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
}
Expand Down Expand Up @@ -1150,6 +1152,7 @@ func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader Physi
return physicalUnionScan
}

<<<<<<< HEAD
func getColsNDVLowerBoundFromHistColl(cols []*expression.Column, histColl *statistics.HistColl) int64 {
if len(cols) == 0 || histColl == nil {
return -1
Expand All @@ -1161,21 +1164,39 @@ func getColsNDVLowerBoundFromHistColl(cols []*expression.Column, histColl *stati

// Note that we don't need to specially handle prefix index in this function, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
=======
// getColsNDVLowerBoundFromHistColl tries to get a lower bound of the NDV of columns (whose uniqueIDs are colUIDs).
func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.HistColl) int64 {
if len(colUIDs) == 0 || histColl == nil {
return -1
}
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))

// 1. Try to get NDV from column stats if it's a single column.
if len(colUIDs) == 1 && histColl.Columns != nil {
uid := colUIDs[0]
<<<<<<< HEAD
if colStats, ok := histColl.Columns[uid]; ok && colStats != nil {
=======
if colStats, ok := histColl.Columns[uid]; ok && colStats != nil && colStats.IsStatsInitialized() {
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
return colStats.NDV
}
}

slices.Sort(colUIDs)
<<<<<<< HEAD
if histColl.Indices == nil || histColl.Idx2ColumnIDs == nil {
return -1
}

// 2. Try to get NDV from index stats.
=======

// 2. Try to get NDV from index stats.
// Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
for idxID, idxCols := range histColl.Idx2ColumnIDs {
if len(idxCols) != len(colUIDs) {
continue
Expand All @@ -1186,14 +1207,19 @@ func getColsNDVLowerBoundFromHistColl(cols []*expression.Column, histColl *stati
if !slices.Equal(orderedIdxCols, colUIDs) {
continue
}
<<<<<<< HEAD
if idxStats, ok := histColl.Indices[idxID]; ok && idxStats != nil {
=======
if idxStats, ok := histColl.Indices[idxID]; ok && idxStats != nil && idxStats.IsStatsInitialized() {
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
return idxStats.NDV
}
}

// TODO: if there's an index that contains the expected columns, we can also make use of its NDV.
// For example, NDV(a,b,c) / NDV(c) is a safe lower bound of NDV(a,b).

<<<<<<< HEAD
// 3. If we still haven't got an NDV, we use the minimal NDV in the column stats as a lower bound.
// This would happen when len(cols) > 1 and no proper index stats are available.
minNDV := int64(-1)
Expand All @@ -1211,6 +1237,18 @@ func getColsNDVLowerBoundFromHistColl(cols []*expression.Column, histColl *stati
}
}
return minNDV
=======
// 3. If we still haven't got an NDV, we use the maximum NDV in the column stats as a lower bound.
maxNDV := int64(-1)
for _, uid := range colUIDs {
colStats := histColl.Columns[uid]
if colStats == nil || !colStats.IsStatsInitialized() {
continue
}
maxNDV = mathutil.Max(maxNDV, colStats.NDV)
}
return maxNDV
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
}

// constructInnerIndexScanTask is specially used to construct the inner plan for PhysicalIndexJoin.
Expand All @@ -1219,7 +1257,12 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
path *util.AccessPath,
ranges ranger.Ranges,
filterConds []expression.Expression,
<<<<<<< HEAD
innerJoinKeys []*expression.Column,
=======
_ []*expression.Column,
idxOffset2joinKeyOffset []int,
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
rangeInfo string,
keepOrder bool,
desc bool,
Expand Down Expand Up @@ -1311,18 +1354,30 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
is.initSchema(append(path.FullIdxCols, ds.commonHandleCols...), cop.tablePlan != nil)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens)

// Note: due to a regression in JOB workload, we need to revert the logic below for now.
// Note: due to a regression in JOB workload, we use the optimizer fix control to enable this for now.
//
// Because we are estimating an average row count of the inner side corresponding to each row from the outer side,
// the estimated row count of the IndexScan should be no larger than (total row count / NDV of join key columns).
// We use it as an upper bound here.
// We can calculate the lower bound of the NDV therefore we can get an upper bound of the row count here.
rowCountUpperBound := -1.0
//if ds.tableStats != nil {
// joinKeyNDV := getColsNDVLowerBoundFromHistColl(innerJoinKeys, ds.tableStats.HistColl)
// if joinKeyNDV > 0 {
// rowCountUpperBound = ds.tableStats.RowCount / float64(joinKeyNDV)
// }
//}
fixValue, ok := ds.ctx.GetSessionVars().GetOptimizerFixControlValue(variable.TiDBOptFixControl44855)
if ok && variable.TiDBOptOn(fixValue) && ds.tableStats != nil {
usedColIDs := make([]int64, 0)
// We only consider columns in this index that (1) are used to probe as join key,
// and (2) are not prefix column in the index (for which we can't easily get a lower bound)
for idxOffset, joinKeyOffset := range idxOffset2joinKeyOffset {
if joinKeyOffset < 0 ||
path.FullIdxColLens[idxOffset] != types.UnspecifiedLength ||
path.FullIdxCols[idxOffset] == nil {
continue
}
usedColIDs = append(usedColIDs, path.FullIdxCols[idxOffset].UniqueID)
}
joinKeyNDV := getColsNDVLowerBoundFromHistColl(usedColIDs, ds.tableStats.HistColl)
if joinKeyNDV > 0 {
rowCountUpperBound = ds.tableStats.RowCount / float64(joinKeyNDV)
}
}

if rowCountUpperBound > 0 {
rowCount = math.Min(rowCount, rowCountUpperBound)
Expand Down
32 changes: 32 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1481,6 +1481,38 @@ type SessionVars struct {
// Whether to lock duplicate keys in INSERT IGNORE and REPLACE statements,
// or unchanged unique keys in UPDATE statements, see PR #42210 and #42713
LockUnchangedKeys bool
<<<<<<< HEAD
=======

// AnalyzeSkipColumnTypes indicates the column types whose statistics would not be collected when executing the ANALYZE command.
AnalyzeSkipColumnTypes map[string]struct{}
}

var (
// variables below are for the optimizer fix control.

// TiDBOptFixControl44262 controls whether to allow to use dynamic-mode to access partitioning tables without global-stats (#44262).
TiDBOptFixControl44262 uint64 = 44262
// TiDBOptFixControl44389 controls whether to consider non-point ranges of some CNF item when building ranges.
TiDBOptFixControl44389 uint64 = 44389
// TiDBOptFixControl44830 controls whether to allow to cache Batch/PointGet from some complex scenarios.
// See #44830 for more details.
TiDBOptFixControl44830 uint64 = 44830
// TiDBOptFixControl44823 controls the maximum number of parameters for a query that can be cached in the Plan Cache.
TiDBOptFixControl44823 uint64 = 44823
// TiDBOptFixControl44855 controls whether to use a more accurate upper bound when estimating row count of index
// range scan under inner side of index join.
TiDBOptFixControl44855 uint64 = 44855
)

// GetOptimizerFixControlValue returns the specified value of the optimizer fix control.
func (s *SessionVars) GetOptimizerFixControlValue(key uint64) (value string, exist bool) {
if s.OptimizerFixControl == nil {
return "", false
}
value, exist = s.OptimizerFixControl[key]
return
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
}

// planReplayerSessionFinishedTaskKeyLen is used to control the max size for the finished plan replayer task key in session
Expand Down
40 changes: 27 additions & 13 deletions statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -781,19 +781,33 @@ func TestIndexJoinInnerRowCountUpperBound(t *testing.T) {
stat := h.GetTableStats(tblInfo)
stat.HistColl = mockStatsTbl.HistColl

testKit.MustQuery("explain format = 'brief' " +
"select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b").
Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 500000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 500000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 500000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))
query := "explain format = 'brief' " +
"select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b"

testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 500000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 500000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 500000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))

testKit.MustExec("set @@tidb_opt_fix_control = '44855:ON'")
testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 1000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 1000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 1000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))
}

func TestOrderingIdxSelectivityThreshold(t *testing.T) {
Expand Down

0 comments on commit f60f5bd

Please sign in to comment.