Skip to content

Commit

Permalink
planner: fix index range intersection for in-list and other predicates (
Browse files Browse the repository at this point in the history
#58029)

close #57694
  • Loading branch information
ghazalfamilyusa authored Dec 10, 2024
1 parent 7e5096e commit 2a1f646
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pkg/planner/core/casetest/index/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ func TestRangeIntersection(t *testing.T) {
tk.MustExec("use test")
tk.MustExec(`set @@tidb_opt_fix_control = "54337:ON"`)
tk.MustExec("create table t1 (a1 int, b1 int, c1 int, key pkx (a1,b1));")

tk.MustExec("create table t_inlist_test(a1 int,b1 int,c1 varbinary(767) DEFAULT NULL, KEY twoColIndex (a1,b1));")
tk.MustExec("insert into t1 values (1,1,1);")
tk.MustExec("insert into t1 values (null,1,1);")
tk.MustExec("insert into t1 values (1,null,1);")
Expand Down
4 changes: 4 additions & 0 deletions pkg/planner/core/casetest/index/testdata/index_range_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
"select /*+ USE_INDEX(tablename,PKK) */ count(*) from tablename where (primary_key,secondary_key,timestamp) >= ('1primary_key_start','3secondary_key_start','1707885658544000000') and (primary_key,secondary_key,timestamp) <= ('2primary_key_end','4secondary_key_end','2707885658544000000');",
"SELECT /*+ USE_INDEX(t,PK) */ a FROM tnull WHERE a IN (42) OR (a IS TRUE AND a IS NULL);",
"SELECT id7 FROM tkey_string WHERE id7 > 'large' AND id7 < 'x-small';",
// IN list and range intersection
"SELECT 1 FROM t_inlist_test FORCE INDEX (twoColIndex) WHERE a1 IN (44, 70, 76) AND (a1 > 70 OR (a1 = 70 AND b1 > 41));",
"SELECT 1 FROM t_inlist_test FORCE INDEX (twoColIndex) WHERE a1 IN (44,45) AND (a1 > 70 OR (a1 = 70 AND b1 > 41));",
"SELECT 1 FROM t_inlist_test FORCE INDEX (twoColIndex) WHERE a1 IN (70, 73, 76) AND (a1 > 70 OR (a1 = 70 AND b1 > 41));",
// Empty intersections.
"select count(*) from t1 where (a1, b1) > (1, 10) and (a1, b1) < (0, 20)",
"select count(*) from t1 where (a1, b1) > (1, 10) and (a1, b1) < (2, 20) and b1 <5",
Expand Down
26 changes: 26 additions & 0 deletions pkg/planner/core/casetest/index/testdata/index_range_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,32 @@
"x-large"
]
},
{
"SQL": "SELECT 1 FROM t_inlist_test FORCE INDEX (twoColIndex) WHERE a1 IN (44, 70, 76) AND (a1 > 70 OR (a1 = 70 AND b1 > 41));",
"Plan": [
"Projection 43.33 root 1->Column#5",
"└─IndexReader 54.17 root index:IndexRangeScan",
" └─IndexRangeScan 54.17 cop[tikv] table:t_inlist_test, index:twoColIndex(a1, b1) range:(70 41,70 +inf], [76,76], keep order:false, stats:pseudo"
],
"Result": null
},
{
"SQL": "SELECT 1 FROM t_inlist_test FORCE INDEX (twoColIndex) WHERE a1 IN (44,45) AND (a1 > 70 OR (a1 = 70 AND b1 > 41));",
"Plan": [
"Projection 6.71 root 1->Column#5",
"└─TableDual 6.71 root rows:0"
],
"Result": null
},
{
"SQL": "SELECT 1 FROM t_inlist_test FORCE INDEX (twoColIndex) WHERE a1 IN (70, 73, 76) AND (a1 > 70 OR (a1 = 70 AND b1 > 41));",
"Plan": [
"Projection 53.33 root 1->Column#5",
"└─IndexReader 53.33 root index:IndexRangeScan",
" └─IndexRangeScan 53.33 cop[tikv] table:t_inlist_test, index:twoColIndex(a1, b1) range:(70 41,70 +inf], [73,73], [76,76], keep order:false, stats:pseudo"
],
"Result": null
},
{
"SQL": "select count(*) from t1 where (a1, b1) > (1, 10) and (a1, b1) < (0, 20)",
"Plan": [
Expand Down
28 changes: 28 additions & 0 deletions pkg/util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,30 @@ func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo {
return lhs
}

// Check which detach result is more selective. This function is called to choose between point ranges and the best CNF ranges.
// This is needed because sometimes the best CNF has full intersection and is more selective,
// and other times it is not when the intersection is not applied.
func chooseBetweenRangeAndPoint(sctx *rangerctx.RangerContext, r1 *DetachRangeResult, r2 *cnfItemRangeResult) {
if fixcontrol.GetBoolWithDefault(sctx.OptimizerFixControl, fixcontrol.Fix54337, false) {
if r1 != nil && len(r1.Ranges) > 0 && r2 != nil && r2.rangeResult != nil {
r1Minusr2 := removeConditions(sctx.ExprCtx.GetEvalCtx(), r1.AccessConds, r2.rangeResult.AccessConds)
r2Minusr1 := removeConditions(sctx.ExprCtx.GetEvalCtx(), r2.rangeResult.AccessConds, r1.AccessConds)
// r2 is considered more selective (and more useful) than r1 if its AccessConds are a superset of r1's AccessConds.
// This means that r1.AccessConds minus r2.AccessConds should result in an empty set.
// The function `removeConditions` is used to perform this subtraction.
// For example, if A = {t1.a1 IN (44, 70, 76)} and B = {t1.a1 IN (44, 70, 76), (t1.a1 > 70 OR (t1.a1 = 70 AND t1.b1 > 41))},
// then A-B is empty and therefore B is a superset of A.
// Avoid the case when both r1 and r2 have the same AccessConds (r2Minusr1 is not empty).
if len(r1Minusr2) == 0 && len(r2Minusr1) > 0 {
// Update final result and just update: Ranges, AccessConds and RemainedConds
r1.RemainedConds = removeConditions(sctx.ExprCtx.GetEvalCtx(), r1.RemainedConds, r2.rangeResult.AccessConds)
r1.Ranges = r2.rangeResult.Ranges
r1.AccessConds = r2.rangeResult.AccessConds
}
}
}
}

// detachCNFCondAndBuildRangeForIndex will detach the index filters from table filters. These conditions are connected with `and`
// It will first find the point query column and then extract the range query column.
// considerDNF is true means it will try to extract access conditions from the DNF expressions.
Expand Down Expand Up @@ -510,6 +534,10 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
return res, nil
}
res.RemainedConds = append(res.RemainedConds, tailRes.RemainedConds...)
// Check if `bestCNFItemRes` is more selective than the ranges derived from the IN list.
// This can occur if `bestCNFItemRes` represents the intersection of the IN list values
// and additional conditions, resulting in a more restrictive filter.
chooseBetweenRangeAndPoint(d.sctx, res, bestCNFItemRes)
return res, nil
}
// `eqOrInCount` must be 0 when coming here.
Expand Down

0 comments on commit 2a1f646

Please sign in to comment.