Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util/ranger: support use like to build range for new collation columns | tidb-test=pr/2247 (#48522) #50907

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions pkg/planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2221,14 +2221,14 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1(a int, b varchar(10), c varchar(10), index idx_a_b(a, b))")
tk.MustExec("create table t2(d int)")
tk.MustExec("set @@tidb_opt_range_max_size=1275")
// 1275 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
tk.MustExec("set @@tidb_opt_range_max_size=1260")
// 1260 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
rows := tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('a', 'b', 'c')").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, a, b, c)]"))
tk.MustQuery("show warnings").Check(testkit.Rows())
rows = tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('aaaaaa', 'bbbbbb', 'cccccc');").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]"))
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]")
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))

tk.MustExec("prepare stmt1 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c'")
Expand All @@ -2243,13 +2243,13 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.Session().SetSessionManager(&testkit.MockSessionManager{PS: ps})
rows = tk.MustQuery(fmt.Sprintf("explain for connection %d", tkProcess.ID)).Rows()
// We don't limit range mem usage when rebuilding index join ranges for the cached plan. So [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc] can be built.
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]")

// Test the plan with range fallback would not be put into cache.
tk.MustExec("prepare stmt2 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c', @d='d', @e='e'")
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
"Warning 1105 skip prepared plan-cache: in-list is too long"))
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("0"))
Expand Down
18 changes: 13 additions & 5 deletions pkg/planner/core/testdata/index_merge_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@
"IndexMerge 0.00 root type: intersection",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is1(s1) range:[\"Abc\",\"Abc\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t5, index:is2(s2) range:(\"zzz\",+inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"B啊a\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CcC\",\"CcC\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"\\x0eJ\\xfb@\\xd5J\\x0e3\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CCC\",\"CCC\"], keep order:false, stats:pseudo",
"└─TableRowIDScan(Probe) 0.00 cop[tikv] table:t5 keep order:false, stats:pseudo"
],
"Result": [
Expand All @@ -144,7 +144,7 @@
"Plan": [
"IndexMerge 0.03 root type: intersection",
"├─IndexRangeScan(Build) 33.33 cop[tikv] table:t6, index:PRIMARY(s1, s2) range:(\"Abc\" \"zzz\",\"Abc\" +inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"A啊a\",\"A啊a\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"\\x0e3\\xfb@\\xd5J\\x0e3\",\"\\x0e3\\xfb@\\xd5J\\x0e3\"], keep order:false, stats:pseudo",
"└─Selection(Probe) 0.03 cop[tikv] gt(test.t6.s2, \"zzz\"), not(like(test.t6.s4, \"Cd_\", 92))",
" └─TableRowIDScan 0.03 cop[tikv] table:t6 keep order:false, stats:pseudo"
],
Expand Down Expand Up @@ -172,13 +172,21 @@
{
"SQL": "select /*+ use_index_merge(t8, primary,is2,is3,is4,is5) */ * from t8 where s1 like '啊A%' and s2 > 'abc' and s3 > 'cba' and s4 in ('aA', '??') and s5 = 'test,2'",
"Plan": [
<<<<<<< HEAD
"Selection 1.42 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.59 root type: intersection",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(0x616263,+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(0x636261,+inf], keep order:false, stats:pseudo",
=======
"Selection 0.04 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.06 root type: intersection",
" ├─IndexRangeScan(Build) 250.00 cop[tikv] table:t8, index:PRIMARY(s1) range:[\"UJ\\x00A\",\"UJ\\x00B\"), keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(\"abc\",+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(\"cba\",+inf], keep order:false, stats:pseudo",
>>>>>>> e053c27f068 (util/ranger: support use `like` to build range for new collation columns (#48522))
" ├─IndexRangeScan(Build) 20.00 cop[tikv] table:t8, index:is4(s4) range:[\"aA\",\"aA\"], [\"??\",\"??\"], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.59 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 2.22 cop[tikv] table:t8 keep order:false, stats:pseudo"
" └─Selection(Probe) 0.06 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 0.06 cop[tikv] table:t8 keep order:false, stats:pseudo"
],
"Result": [
"啊aabbccdd abcc cccc aA tEsT,2"
Expand Down
4 changes: 4 additions & 0 deletions pkg/util/ranger/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ go_library(
"//pkg/util/codec",
"//pkg/util/collate",
"//pkg/util/dbterror",
<<<<<<< HEAD
"//pkg/util/mathutil",
=======
"//pkg/util/hack",
>>>>>>> e053c27f068 (util/ranger: support use `like` to build range for new collation columns (#48522))
"@com_github_pingcap_errors//:errors",
],
)
Expand Down
10 changes: 0 additions & 10 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,6 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction

func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isAccessCond, shouldReserve bool) {
_, collation := scalar.CharsetAndCollation()
if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) {
// The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte.
// However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order.
// For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61).
// Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key.
// Finally, the range comes to be [`, A], which is actually an empty range.
// See https://github.com/pingcap/tidb/issues/31174 for more details.
// In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range.
return false, true
}
if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) {
return false, true
}
Expand Down
31 changes: 25 additions & 6 deletions pkg/util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIs
// e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2))
// ((a,b,c) in (1,1,1),(2,2,2)) would be extracted.
func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*cnfItemRangeResult, []*valueInfo, error) {
lengths []int, rangeMaxSize int64, convertToSortKey bool) (*cnfItemRangeResult, []*valueInfo, error) {
if len(conds) < 2 {
return nil, nil, nil
}
Expand All @@ -261,7 +261,7 @@ func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expres
// We build ranges for `(a,b) in ((1,1),(1,2))` and get `[1 1, 1 1] [1 2, 1 2]`, which are point ranges and we can
// append `c = 1` to the point ranges. However, if we choose to merge consecutive ranges here, we get `[1 1, 1 2]`,
// which are not point ranges, and we cannot append `c = 1` anymore.
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize)
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize, convertToSortKey)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -376,7 +376,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan,
}
if considerDNF {
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize)
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize, d.convertToSortKey)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -627,12 +627,22 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
}
// Multiple Eq/In conditions for one column in CNF, apply intersection on them
// Lazily compute the points for the previously visited Eq/In
newTp := newFieldType(cols[offset].GetType())
collator := collate.GetCollator(cols[offset].GetType().GetCollate())
if mergedAccesses[offset] == nil {
mergedAccesses[offset] = accesses[offset]
<<<<<<< HEAD
points[offset] = rb.build(accesses[offset], collator)
}
points[offset] = rb.intersection(points[offset], rb.build(cond, collator), collator)
=======
// Note that this is a relatively special usage of build(). We will restore the points back to Expression for
// later use and may build the Expression to points again.
// We need to keep the original value here, which means we neither cut prefix nor convert to sort key.
points[offset] = rb.build(accesses[offset], newTp, types.UnspecifiedLength, false)
}
points[offset] = rb.intersection(points[offset], rb.build(cond, newTp, types.UnspecifiedLength, false), collator)
>>>>>>> e053c27f068 (util/ranger: support use `like` to build range for new collation columns (#48522))
if len(points[offset]) == 0 { // Early termination if false expression found
if expression.MaybeOverOptimized4PlanCache(sctx, conditions) {
// `a>@x and a<@y` --> `invalid-range if @x>=@y`
Expand Down Expand Up @@ -772,9 +782,14 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression
if shouldReserve {
hasResidual = true
}
<<<<<<< HEAD
points := rb.build(item, collate.GetCollator(newTpSlice[0].GetCollate()))
=======
points := rb.build(item, newTpSlice[0], d.lengths[0], d.convertToSortKey)
tmpNewTp := convertStringFTToBinaryCollate(newTpSlice[0])
>>>>>>> e053c27f068 (util/ranger: support use `like` to build range for new collation columns (#48522))
// TODO: restrict the mem usage of ranges
ranges, rangeFallback, err := points2Ranges(d.sctx, points, newTpSlice[0], d.rangeMaxSize)
ranges, rangeFallback, err := points2Ranges(d.sctx, points, tmpNewTp, d.rangeMaxSize)
if err != nil {
return nil, nil, nil, false, errors.Trace(err)
}
Expand Down Expand Up @@ -870,6 +885,7 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
cols: cols,
lengths: lengths,
mergeConsecutive: true,
convertToSortKey: true,
rangeMaxSize: rangeMaxSize,
}
return d.detachCondAndBuildRangeForCols()
Expand All @@ -878,13 +894,14 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
// detachCondAndBuildRangeWithoutMerging detaches the index filters from table filters and uses them to build ranges.
// When building ranges, it doesn't merge consecutive ranges.
func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) {
lengths []int, rangeMaxSize int64, convertToSortKey bool) (*DetachRangeResult, error) {
d := &rangeDetacher{
sctx: sctx,
allConds: conditions,
cols: cols,
lengths: lengths,
mergeConsecutive: false,
convertToSortKey: convertToSortKey,
rangeMaxSize: rangeMaxSize,
}
return d.detachCondAndBuildRangeForCols()
Expand All @@ -896,7 +913,7 @@ func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions [
// The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation.
func DetachCondAndBuildRangeForPartition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) {
return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize)
return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize, false)
}

type rangeDetacher struct {
Expand All @@ -905,6 +922,7 @@ type rangeDetacher struct {
cols []*expression.Column
lengths []int
mergeConsecutive bool
convertToSortKey bool
rangeMaxSize int64
}

Expand Down Expand Up @@ -951,6 +969,7 @@ func DetachSimpleCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions [
cols: cols,
lengths: lengths,
mergeConsecutive: true,
convertToSortKey: true,
rangeMaxSize: rangeMaxSize,
}
res, err := d.detachCNFCondAndBuildRangeForIndex(conditions, newTpSlice, false)
Expand Down
Loading