Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: support use like to build range for new collation columns #51164

Merged
merged 11 commits into from
Feb 20, 2024
6 changes: 4 additions & 2 deletions pkg/executor/point_get.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,11 +540,13 @@ func EncodeUniqueIndexValuesForKey(ctx sessionctx.Context, tblInfo *model.TableI
colInfo := tblInfo.Columns[idxInfo.Columns[i].Offset]
// table.CastValue will append 0x0 if the string value's length is smaller than the BINARY column's length.
// So we don't use CastValue for string value for now.
// TODO: merge two if branch.
// TODO: The first if branch should have been removed, because the functionality of set the collation of the datum
// have been moved to util/ranger (normal path) and getNameValuePairs/getPointGetValue (fast path). But this change
// will be cherry-picked to a hotfix, so we choose to be a bit conservative and keep this for now.
if colInfo.GetType() == mysql.TypeString || colInfo.GetType() == mysql.TypeVarString || colInfo.GetType() == mysql.TypeVarchar {
var str string
str, err = idxVals[i].ToString()
idxVals[i].SetString(str, colInfo.FieldType.GetCollate())
idxVals[i].SetString(str, idxVals[i].Collation())
} else if colInfo.GetType() == mysql.TypeEnum && (idxVals[i].Kind() == types.KindString || idxVals[i].Kind() == types.KindBytes || idxVals[i].Kind() == types.KindBinaryLiteral) {
var str string
var e types.Enum
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/casetest/index/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ func TestIndexMergeSingleCaseCouldFeelIndexMergeHint(t *testing.T) {
tk.MustQuery("explain format=\"brief\" SELECT /*+ use_index_merge(t, nslc) */ * FROM t WHERE 57260686 member of (fpi) AND \"OC8p1763XTkt.org/s/link\" member of (nslc) LIMIT 1;").Check(
testkit.Rows("Limit 1.00 root offset:0, count:1",
"└─IndexMerge 1.00 root type: union",
" ├─IndexRangeScan(Build) 1.00 cop[tikv] table:t, index:nslc(cast(`nslc` as char(1000) array), point_of_sale_country) range:[0x4F4338703137363358546B742E6F72672F732F6C696E6B,0x4F4338703137363358546B742E6F72672F732F6C696E6B], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 1.00 cop[tikv] table:t, index:nslc(cast(`nslc` as char(1000) array), point_of_sale_country) range:[\"OC8p1763XTkt.org/s/link\",\"OC8p1763XTkt.org/s/link\"], keep order:false, stats:pseudo",
" └─Limit(Probe) 1.00 cop[tikv] offset:0, count:1",
" └─Selection 1.00 cop[tikv] json_memberof(cast(57260686, json BINARY), test.t.fpi)",
" └─TableRowIDScan 1.00 cop[tikv] table:t keep order:false, stats:pseudo"))
Expand Down
12 changes: 6 additions & 6 deletions pkg/planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2230,14 +2230,14 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1(a int, b varchar(10), c varchar(10), index idx_a_b(a, b))")
tk.MustExec("create table t2(d int)")
tk.MustExec("set @@tidb_opt_range_max_size=1275")
// 1275 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
tk.MustExec("set @@tidb_opt_range_max_size=1260")
// 1260 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
rows := tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('a', 'b', 'c')").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, a, b, c)]"))
tk.MustQuery("show warnings").Check(testkit.Rows())
rows = tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('aaaaaa', 'bbbbbb', 'cccccc');").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]"))
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]")
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))

tk.MustExec("prepare stmt1 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c'")
Expand All @@ -2252,13 +2252,13 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.Session().SetSessionManager(&testkit.MockSessionManager{PS: ps})
rows = tk.MustQuery(fmt.Sprintf("explain for connection %d", tkProcess.ID)).Rows()
// We don't limit range mem usage when rebuilding index join ranges for the cached plan. So [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc] can be built.
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]")

// Test the plan with range fallback would not be put into cache.
tk.MustExec("prepare stmt2 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c', @d='d', @e='e'")
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
"Warning 1105 skip prepared plan-cache: in-list is too long"))
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("0"))
Expand Down
15 changes: 14 additions & 1 deletion pkg/planner/core/point_get_plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -1438,7 +1438,15 @@ func getNameValuePairs(ctx sessionctx.Context, tbl *model.TableInfo, tblName mod
col := model.FindColumnInfo(tbl.Cols(), colName.Name.Name.L)
if col == nil { // Handling the case when the column is _tidb_rowid.
return append(nvPairs, nameValuePair{colName: colName.Name.Name.L, colFieldType: types.NewFieldType(mysql.TypeLonglong), value: d, con: con}), false
} else if col.GetType() == mysql.TypeString && col.GetCollate() == charset.CollationBin { // This type we needn't to pad `\0` in here.
}

// As in buildFromBinOp in util/ranger, when we build key from the expression to do range scan or point get on
// a string column, we should set the collation of the string datum to collation of the column.
if col.FieldType.EvalType() == types.ETString && (d.Kind() == types.KindString || d.Kind() == types.KindBinaryLiteral) {
d.SetString(d.GetString(), col.FieldType.GetCollate())
}

if col.GetType() == mysql.TypeString && col.GetCollate() == charset.CollationBin { // This type we needn't to pad `\0` in here.
return append(nvPairs, nameValuePair{colName: colName.Name.Name.L, colFieldType: &col.FieldType, value: d, con: con}), false
}
if !checkCanConvertInPointGet(col, d) {
Expand Down Expand Up @@ -1468,6 +1476,11 @@ func getPointGetValue(stmtCtx *stmtctx.StatementContext, col *model.ColumnInfo,
if !checkCanConvertInPointGet(col, *d) {
return nil
}
// As in buildFromBinOp in util/ranger, when we build key from the expression to do range scan or point get on
// a string column, we should set the collation of the string datum to collation of the column.
if col.FieldType.EvalType() == types.ETString && (d.Kind() == types.KindString || d.Kind() == types.KindBinaryLiteral) {
d.SetString(d.GetString(), col.FieldType.GetCollate())
}
dVal, err := d.ConvertTo(stmtCtx, &col.FieldType)
if err != nil {
return nil
Expand Down
19 changes: 10 additions & 9 deletions pkg/planner/core/testdata/index_merge_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@
"IndexMerge 0.00 root type: intersection",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is1(s1) range:[\"Abc\",\"Abc\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t5, index:is2(s2) range:(\"zzz\",+inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"B啊a\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CcC\",\"CcC\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"\\x0eJ\\xfb@\\xd5J\\x0e3\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CCC\",\"CCC\"], keep order:false, stats:pseudo",
"└─TableRowIDScan(Probe) 0.00 cop[tikv] table:t5 keep order:false, stats:pseudo"
],
"Result": [
Expand All @@ -144,7 +144,7 @@
"Plan": [
"IndexMerge 0.03 root type: intersection",
"├─IndexRangeScan(Build) 33.33 cop[tikv] table:t6, index:PRIMARY(s1, s2) range:(\"Abc\" \"zzz\",\"Abc\" +inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"A啊a\",\"A啊a\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"\\x0e3\\xfb@\\xd5J\\x0e3\",\"\\x0e3\\xfb@\\xd5J\\x0e3\"], keep order:false, stats:pseudo",
"└─Selection(Probe) 0.03 cop[tikv] gt(test.t6.s2, \"zzz\"), not(like(test.t6.s4, \"Cd_\", 92))",
" └─TableRowIDScan 0.03 cop[tikv] table:t6 keep order:false, stats:pseudo"
],
Expand Down Expand Up @@ -172,13 +172,14 @@
{
"SQL": "select /*+ use_index_merge(t8, primary,is2,is3,is4,is5) */ * from t8 where s1 like '啊A%' and s2 > 'abc' and s3 > 'cba' and s4 in ('aA', '??') and s5 = 'test,2'",
"Plan": [
"Selection 1.42 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.59 root type: intersection",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(0x616263,+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(0x636261,+inf], keep order:false, stats:pseudo",
"Selection 0.04 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.06 root type: intersection",
" ├─IndexRangeScan(Build) 250.00 cop[tikv] table:t8, index:PRIMARY(s1) range:[\"UJ\\x00A\",\"UJ\\x00B\"), keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(\"abc\",+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(\"cba\",+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 20.00 cop[tikv] table:t8, index:is4(s4) range:[\"aA\",\"aA\"], [\"??\",\"??\"], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.59 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 2.22 cop[tikv] table:t8 keep order:false, stats:pseudo"
" └─Selection(Probe) 0.06 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 0.06 cop[tikv] table:t8 keep order:false, stats:pseudo"
],
"Result": [
"啊aabbccdd abcc cccc aA tEsT,2"
Expand Down
1 change: 1 addition & 0 deletions pkg/util/ranger/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ go_library(
"//pkg/util/codec",
"//pkg/util/collate",
"//pkg/util/dbterror",
"//pkg/util/hack",
"//pkg/util/mathutil",
"@com_github_pingcap_errors//:errors",
],
Expand Down
12 changes: 2 additions & 10 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ import (
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/collate"
)

// conditionChecker checks if this condition can be pushed to index planner.
type conditionChecker struct {
ctx sessionctx.Context
checkerCol *expression.Column
length int
optPrefixIndexSingleScan bool
Expand Down Expand Up @@ -139,16 +141,6 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction

func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isAccessCond, shouldReserve bool) {
_, collation := scalar.CharsetAndCollation()
if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) {
// The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte.
// However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order.
// For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61).
// Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key.
// Finally, the range comes to be [`, A], which is actually an empty range.
// See https://github.com/pingcap/tidb/issues/31174 for more details.
// In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range.
return false, true
}
if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) {
return false, true
}
Expand Down
Loading