Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: cherry-pick tidb_opt_fix_control and fix 44855 to v6.1.5 #44968

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 91 additions & 3 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,18 @@ import (
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/plancodec"
"github.com/pingcap/tidb/util/ranger"
"github.com/pingcap/tidb/util/set"
"go.uber.org/zap"
"golang.org/x/exp/slices"
)

func (p *LogicalUnionScan) exhaustPhysicalPlans(prop *property.PhysicalProperty) ([]PhysicalPlan, bool, error) {
Expand Down Expand Up @@ -855,7 +858,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
maxOneRow = ok && (sf.FuncName.L == ast.EQ)
}
}
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, outerJoinKeys, helper.idxOff2KeyOff, us, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
failpoint.Inject("MockOnlyEnableIndexHashJoin", func(val failpoint.Value) {
if val.(bool) {
failpoint.Return(p.constructIndexHashJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager))
Expand All @@ -870,7 +873,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, outerJoinKeys, helper.idxOff2KeyOff, us, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
if innerTask2 != nil {
joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
}
Expand Down Expand Up @@ -1015,13 +1018,63 @@ func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader Physi
return physicalUnionScan
}

// getColsNDVLowerBoundFromHistColl tries to get a lower bound of the NDV of columns (whose uniqueIDs are colUIDs).
func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.HistColl) int64 {
if len(colUIDs) == 0 || histColl == nil {
return -1
}

// 1. Try to get NDV from column stats if it's a single column.
if len(colUIDs) == 1 && histColl.Columns != nil {
uid := colUIDs[0]
if colStats, ok := histColl.Columns[uid]; ok && colStats != nil && colStats.Histogram.NDV > 0 {
return colStats.Histogram.NDV
}
}

slices.Sort(colUIDs)

// 2. Try to get NDV from index stats.
// Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
for idxID, idxCols := range histColl.Idx2ColumnIDs {
if len(idxCols) != len(colUIDs) {
continue
}
orderedIdxCols := make([]int64, len(idxCols))
copy(orderedIdxCols, idxCols)
slices.Sort(orderedIdxCols)
if !slices.Equal(orderedIdxCols, colUIDs) {
continue
}
if idxStats, ok := histColl.Indices[idxID]; ok && idxStats != nil && idxStats.NDV > 0 {
return idxStats.NDV
}
}

// TODO: if there's an index that contains the expected columns, we can also make use of its NDV.
// For example, NDV(a,b,c) / NDV(c) is a safe lower bound of NDV(a,b).

// 3. If we still haven't got an NDV, we use the maximum NDV in the column stats as a lower bound.
maxNDV := int64(-1)
for _, uid := range colUIDs {
colStats := histColl.Columns[uid]
if colStats == nil || !(colStats.Histogram.NDV > 0) {
continue
}
maxNDV = mathutil.Max(maxNDV, colStats.Histogram.NDV)
}
return maxNDV
}

// constructInnerIndexScanTask is specially used to construct the inner plan for PhysicalIndexJoin.
func (p *LogicalJoin) constructInnerIndexScanTask(
ds *DataSource,
path *util.AccessPath,
ranges ranger.Ranges,
filterConds []expression.Expression,
outerJoinKeys []*expression.Column,
_ []*expression.Column,
idxOffset2joinKeyOffset []int,
us *LogicalUnionScan,
rangeInfo string,
keepOrder bool,
Expand Down Expand Up @@ -1107,6 +1160,35 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
}
is.initSchema(append(path.FullIdxCols, ds.commonHandleCols...), cop.tablePlan != nil)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)

// Note: due to a regression in JOB workload, we use the optimizer fix control to enable this for now.
//
// Because we are estimating an average row count of the inner side corresponding to each row from the outer side,
// the estimated row count of the IndexScan should be no larger than (total row count / NDV of join key columns).
// We can calculate the lower bound of the NDV therefore we can get an upper bound of the row count here.
rowCountUpperBound := -1.0
fixValue, ok := ds.ctx.GetSessionVars().GetOptimizerFixControlValue(variable.TiDBOptFixControl44855)
if ok && variable.TiDBOptOn(fixValue) && ds.tableStats != nil {
usedColIDs := make([]int64, 0)
// We only consider columns in this index that (1) are used to probe as join key,
// and (2) are not prefix column in the index (for which we can't easily get a lower bound)
for idxOffset, joinKeyOffset := range idxOffset2joinKeyOffset {
if joinKeyOffset < 0 ||
path.FullIdxColLens[idxOffset] != types.UnspecifiedLength ||
path.FullIdxCols[idxOffset] == nil {
continue
}
usedColIDs = append(usedColIDs, path.FullIdxCols[idxOffset].UniqueID)
}
joinKeyNDV := getColsNDVLowerBoundFromHistColl(usedColIDs, ds.tableStats.HistColl)
if joinKeyNDV > 0 {
rowCountUpperBound = ds.tableStats.RowCount / float64(joinKeyNDV)
}
}

if rowCountUpperBound > 0 {
rowCount = math.Min(rowCount, rowCountUpperBound)
}
if maxOneRow {
// Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger
// than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect
Expand All @@ -1129,6 +1211,9 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
// rowCount is computed from result row count of join, which has already accounted the filters on DataSource,
// i.e, rowCount equals to `countAfterIndex * selectivity`.
cnt := rowCount / selectivity
if rowCountUpperBound > 0 {
cnt = math.Min(cnt, rowCountUpperBound)
}
if maxOneRow {
cnt = math.Min(cnt, 1.0)
}
Expand All @@ -1142,6 +1227,9 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
selectivity = SelectionFactor
}
cnt := tmpPath.CountAfterIndex / selectivity
if rowCountUpperBound > 0 {
cnt = math.Min(cnt, rowCountUpperBound)
}
if maxOneRow {
cnt = math.Min(cnt, 1.0)
}
Expand Down
20 changes: 20 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,9 @@ type SessionVars struct {
// EnablePseudoForOutdatedStats if using pseudo for outdated stats
EnablePseudoForOutdatedStats bool

// OptimizerFixControl control some details of the optimizer behavior through the tidb_opt_fix_control variable.
OptimizerFixControl map[uint64]string

// RegardNULLAsPoint if regard NULL as Point
RegardNULLAsPoint bool

Expand Down Expand Up @@ -1057,6 +1060,23 @@ type SessionVars struct {
EnableAnalyzeSnapshot bool
}

var (
// variables below are for the optimizer fix control.

// TiDBOptFixControl44855 controls whether to use a more accurate upper bound when estimating row count of index
// range scan under inner side of index join.
TiDBOptFixControl44855 uint64 = 44855
)

// GetOptimizerFixControlValue returns the specified value of the optimizer fix control.
func (s *SessionVars) GetOptimizerFixControlValue(key uint64) (value string, exist bool) {
if s.OptimizerFixControl == nil {
return "", false
}
value, exist = s.OptimizerFixControl[key]
return
}

// InitStatementContext initializes a StatementContext, the object is reused to reduce allocation.
func (s *SessionVars) InitStatementContext() *stmtctx.StatementContext {
sc := &s.cachedStmtCtx[0]
Expand Down
28 changes: 28 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -1543,6 +1543,34 @@ var defaultSysVars = []*SysVar{
s.BatchPendingTiFlashCount = b
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBOptFixControl, Value: "", Type: TypeStr, IsHintUpdatable: true,
SetSession: func(s *SessionVars, val string) error {
newMap := make(map[uint64]string)
for _, singleFixCtrl := range strings.Split(val, ",") {
if len(singleFixCtrl) == 0 {
continue
}
colonIdx := strings.Index(singleFixCtrl, ":")
if colonIdx < 0 {
return errors.New("invalid fix control: colon not found")
}
k := strings.TrimSpace(singleFixCtrl[0:colonIdx])
v := strings.TrimSpace(singleFixCtrl[colonIdx+1:])
num, err := strconv.ParseUint(k, 10, 64)
if err != nil {
return err
}
originalV, ok := newMap[num]
if ok {
s.StmtCtx.AppendWarning(
errors.Errorf("found repeated fix control: %d:%s is overwritten with %s", num, originalV, v))
}
newMap[num] = v
}
s.OptimizerFixControl = newMap
return nil
},
},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBIgnorePreparedCacheCloseStmt, Value: BoolToOnOff(DefTiDBIgnorePreparedCacheCloseStmt), Type: TypeBool,
SetSession: func(vars *SessionVars, s string) error {
vars.IgnorePreparedCacheCloseStmt = TiDBOptOn(s)
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,9 @@ const (
// TiDBEnablePseudoForOutdatedStats indicates whether use pseudo for outdated stats
TiDBEnablePseudoForOutdatedStats = "tidb_enable_pseudo_for_outdated_stats"

// TiDBOptFixControl makes the user able to control some details of the optimizer behavior.
TiDBOptFixControl = "tidb_opt_fix_control"

// TiDBRegardNULLAsPoint indicates whether regard NULL as point when optimizing
TiDBRegardNULLAsPoint = "tidb_regard_null_as_point"

Expand Down
63 changes: 63 additions & 0 deletions statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ import (

"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/statistics/handle"
"github.com/pingcap/tidb/testkit"
"github.com/pingcap/tidb/testkit/testdata"
"github.com/pingcap/tidb/types"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -642,3 +644,64 @@ func TestCrossValidationSelectivity(t *testing.T) {
"└─Selection 0.00 cop[tikv] gt(test.t.c, 1000)",
" └─TableRangeScan 2.00 cop[tikv] table:t range:(1 0,1 1000), keep order:false"))
}

func TestIndexJoinInnerRowCountUpperBound(t *testing.T) {
store, dom, clean := testkit.CreateMockStoreAndDomain(t)
defer clean()
testKit := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, index idx(b))")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
is := dom.InfoSchema()
tb, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tb.Meta()
// Mock the stats:
// The two columns are the same.
// From 0 to 499, each value has 1000 rows. Therefore, NDV is 500 and total row count is 500000.
mockStatsTbl := mockStatsTable(tblInfo, 500000)
colValues, err := generateIntDatum(1, 500)
require.NoError(t, err)
for i := 1; i <= 2; i++ {
mockStatsTbl.Columns[int64(i)] = &statistics.Column{
Histogram: *mockStatsHistogram(int64(i), colValues, 1000, types.NewFieldType(mysql.TypeLonglong)),
Count: 500000,
Info: tblInfo.Columns[i-1],
Loaded: true,
StatsVer: 2,
}
}
generateMapsForMockStatsTbl(mockStatsTbl)
stat := h.GetTableStats(tblInfo)
stat.HistColl = mockStatsTbl.HistColl

query := "explain format = 'brief' " +
"select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b"

testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000.00 root ",
" ├─Selection(Build) 500000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 500000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 500000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))

testKit.MustExec("set @@tidb_opt_fix_control = '44855:ON'")
testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000.00 root ",
" ├─Selection(Build) 1000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 1000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 1000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))
}