diff --git a/planner/core/BUILD.bazel b/planner/core/BUILD.bazel index 1a7ec1196f0f1..b7f37923547be 100644 --- a/planner/core/BUILD.bazel +++ b/planner/core/BUILD.bazel @@ -171,6 +171,7 @@ go_test( "flat_plan_test.go", "fragment_test.go", "indexmerge_intersection_test.go", + "indexmerge_path_test.go", "indexmerge_test.go", "integration_partition_test.go", "integration_test.go", diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 639bc15dbdc98..f14e343610b5d 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1419,6 +1419,12 @@ func (ds *DataSource) addSelection4PlanCache(task *rootTask, stats *property.Sta // convertToIndexScan converts the DataSource to index scan with idx. func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candidate *candidatePath, _ *physicalOptimizeOp) (task task, err error) { + if candidate.path.Index.MVIndex { + // MVIndex is special since different index rows may return the same _row_id and this can break some assumptions of IndexReader. + // Currently only support using IndexMerge to access MVIndex instead of IndexReader. + // TODO: make IndexReader support accessing MVIndex directly. + return invalidTask, nil + } if !candidate.path.IsSingleScan { // If it's parent requires single read task, return max cost. if prop.TaskTp == property.CopSingleReadTaskType { diff --git a/planner/core/indexmerge_path.go b/planner/core/indexmerge_path.go index 51f950bf7d61a..f0ecf02a00231 100644 --- a/planner/core/indexmerge_path.go +++ b/planner/core/indexmerge_path.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/planner/util" + "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/ranger" "go.uber.org/zap" @@ -63,7 +64,19 @@ func (ds *DataSource) generateIndexMergePath() error { _, remaining := expression.PushDownExprs(stmtCtx, indexMergeConds, ds.ctx.GetClient(), kv.UnSpecified) stmtCtx.SetWarnings(warnings) stmtCtx.SetExtraWarnings(extraWarnings) - if len(remaining) != 0 { + + remainingExpr := 0 + for _, expr := range remaining { + // Handle these 3 functions specially since they can be used to access MVIndex. + if sf, ok := expr.(*expression.ScalarFunction); ok { + if sf.FuncName.L == ast.JSONMemberOf || sf.FuncName.L == ast.JSONOverlaps || + sf.FuncName.L == ast.JSONContains { + continue + } + } + remainingExpr++ + } + if remainingExpr > 0 { needConsiderIndexMerge = false } } @@ -435,8 +448,16 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio if indexMergeAndPath != nil { ds.possibleAccessPaths = append(ds.possibleAccessPaths, indexMergeAndPath) } + // 3. Generate possible IndexMerge paths for MVIndex. + mvIndexMergePath, err := ds.generateIndexMergeJSONMVIndexPath(regularPathCount, indexMergeConds) + if err != nil { + return err + } + if mvIndexMergePath != nil { + ds.possibleAccessPaths = append(ds.possibleAccessPaths, mvIndexMergePath...) + } - // 3. If needed, append a warning if no IndexMerge is generated. + // 4. If needed, append a warning if no IndexMerge is generated. // If without hints, it means that `enableIndexMerge` is true if len(ds.indexMergeHints) == 0 { @@ -467,3 +488,146 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio } return nil } + +// generateIndexMergeJSONMVIndexPath generates paths for (json_member_of / json_overlaps / json_contains) on multi-valued index. +/* + 1. select * from t where 1 member of (a) + IndexMerge(AND) + IndexRangeScan(a, [1,1]) + TableRowIdScan(t) + 2. select * from t where json_contains(a, '[1, 2, 3]') + IndexMerge(AND) + IndexRangeScan(a, [1,1]) + IndexRangeScan(a, [2,2]) + IndexRangeScan(a, [3,3]) + TableRowIdScan(t) + 3. select * from t where json_overlap(a, '[1, 2, 3]') + IndexMerge(OR) + IndexRangeScan(a, [1,1]) + IndexRangeScan(a, [2,2]) + IndexRangeScan(a, [3,3]) + TableRowIdScan(t) +*/ +func (ds *DataSource) generateIndexMergeJSONMVIndexPath(normalPathCnt int, filters []expression.Expression) (mvIndexPaths []*util.AccessPath, err error) { + for idx := 0; idx < normalPathCnt; idx++ { + if ds.possibleAccessPaths[idx].IsTablePath() || ds.possibleAccessPaths[idx].Index == nil || !ds.possibleAccessPaths[idx].Index.MVIndex { + continue // not a MVIndex path + } + if !ds.isSpecifiedInIndexMergeHints(ds.possibleAccessPaths[idx].Index.Name.L) { + continue // for safety, only consider using MVIndex when there is a `use_index_merge` hint now. + // TODO: remove this limitation + } + + // Step 1. Extract the underlying JSON column from MVIndex Info. + mvIndex := ds.possibleAccessPaths[idx].Index + if len(mvIndex.Columns) != 1 { + // only support single-column MVIndex now: idx((cast(a->'$.zip' as signed array))) + // TODO: support composite MVIndex idx((x, cast(a->'$.zip' as int array), z)) + continue + } + mvVirColOffset := mvIndex.Columns[0].Offset + mvVirColMeta := ds.table.Meta().Cols()[mvVirColOffset] + + var virCol *expression.Column + for _, ce := range ds.TblCols { + if ce.ID == mvVirColMeta.ID { + virCol = ce.Clone().(*expression.Column) + virCol.RetType = ce.GetType().ArrayType() // use the underlying type directly: JSON-ARRAY(INT) --> INT + break + } + } + // unwrap the outside cast: cast(json_extract(test.t.a, $.zip), JSON) --> json_extract(test.t.a, $.zip) + targetJSONPath, ok := unwrapJSONCast(virCol.VirtualExpr) + if !ok { + continue + } + + // Step 2. Iterate all filters and generate corresponding IndexMerge paths. + for filterIdx, filter := range filters { + // Step 2.1. Extract jsonPath and vals from json_member / json_overlaps / json_contains functions. + sf, ok := filter.(*expression.ScalarFunction) + if !ok { + continue + } + + var jsonPath expression.Expression + var vals []expression.Expression + switch sf.FuncName.L { + case ast.JSONMemberOf: // (1 member of a->'$.zip') + jsonPath = sf.GetArgs()[1] + v, ok := unwrapJSONCast(sf.GetArgs()[0]) // cast(1 as json) --> 1 + if !ok { + continue + } + vals = append(vals, v) + case ast.JSONOverlaps: // (json_overlaps(a->'$.zip', '[1, 2, 3]') + continue // TODO: support json_overlaps + case ast.JSONContains: // (json_contains(a->'$.zip', '[1, 2, 3]') + continue // TODO: support json_contains + default: + continue + } + + // Step 2.2. Check some limitations. + if jsonPath == nil || len(vals) == 0 { + continue + } + if !jsonPath.Equal(ds.ctx, targetJSONPath) { + continue // not on the same JSON col + } + // only support INT now + // TODO: support more types + if jsonPath.GetType().EvalType() == types.ETInt { + continue + } + allInt := true + // TODO: support using IndexLookUp to handle single-value cases. + for _, v := range vals { + if v.GetType().EvalType() != types.ETInt { + allInt = false + } + } + if !allInt { + continue + } + + // Step 2.3. Generate a IndexMerge Path of this filter on the current MVIndex. + var partialPaths []*util.AccessPath + for _, v := range vals { + partialPath := &util.AccessPath{Index: mvIndex} + partialPath.Ranges = ranger.FullRange() + // TODO: get the actual column length of this virtual column + partialPath.IdxCols, partialPath.IdxColLens = []*expression.Column{virCol}, []int{types.UnspecifiedLength} + partialPath.FullIdxCols, partialPath.FullIdxColLens = []*expression.Column{virCol}, []int{types.UnspecifiedLength} + + // calculate the path range with the condition `a->'$.zip' = 1`. + eq, err := expression.NewFunction(ds.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), virCol, v) + if err != nil { + return nil, err + } + if err = ds.detachCondAndBuildRangeForPath(partialPath, []expression.Expression{eq}); err != nil { + return nil, err + } + + partialPaths = append(partialPaths, partialPath) + } + indexMergePath := ds.buildIndexMergeOrPath(filters, partialPaths, filterIdx) + mvIndexPaths = append(mvIndexPaths, indexMergePath) + } + } + return +} + +func unwrapJSONCast(expr expression.Expression) (expression.Expression, bool) { + if expr == nil { + return nil, false + } + sf, ok := expr.(*expression.ScalarFunction) + if !ok { + return nil, false + } + if sf == nil || sf.FuncName.L != ast.Cast || sf.GetType().EvalType() != types.ETJson { + return nil, false + } + return sf.GetArgs()[0], true +} diff --git a/planner/core/indexmerge_path_test.go b/planner/core/indexmerge_path_test.go new file mode 100644 index 0000000000000..fcb0d27903c64 --- /dev/null +++ b/planner/core/indexmerge_path_test.go @@ -0,0 +1,53 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core_test + +import ( + "testing" + + "github.com/pingcap/tidb/planner/core" + "github.com/pingcap/tidb/testkit" + "github.com/pingcap/tidb/testkit/testdata" +) + +func TestIndexMergeJSONMemberOf(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec(`create table t( +a int, j0 json, j1 json, +index j0_0((cast(j0->'$.path0' as signed array))), +index j0_1((cast(j0->'$.path1' as signed array))), +index j1((cast(j1 as signed array))))`) + + var input []string + var output []struct { + SQL string + Plan []string + } + planSuiteData := core.GetIndexMergeSuiteData() + planSuiteData.LoadTestCases(t, &input, &output) + + for i, query := range input { + testdata.OnRecord(func() { + output[i].SQL = query + }) + result := tk.MustQuery("explain format = 'brief' " + query) + testdata.OnRecord(func() { + output[i].Plan = testdata.ConvertRowsToStrings(result.Rows()) + }) + result.Check(testkit.Rows(output[i].Plan...)) + } +} diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 6bc8a677bc6de..f60c1f617e467 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -4673,7 +4673,10 @@ func (b *PlanBuilder) buildDataSource(ctx context.Context, tn *ast.TableName, as if i < len(columns) { if columns[i].IsGenerated() && !columns[i].GeneratedStored { var err error + originVal := b.allowBuildCastArray + b.allowBuildCastArray = true expr, _, err = b.rewrite(ctx, columns[i].GeneratedExpr, ds, nil, true) + b.allowBuildCastArray = originVal if err != nil { return nil, err } diff --git a/planner/core/testdata/index_merge_suite_in.json b/planner/core/testdata/index_merge_suite_in.json index db7ebacdb29c7..2841de33bae0c 100644 --- a/planner/core/testdata/index_merge_suite_in.json +++ b/planner/core/testdata/index_merge_suite_in.json @@ -1,4 +1,14 @@ [ + { + "name": "TestIndexMergeJSONMemberOf", + "cases": [ + "select /*+ use_index_merge(t, j0_0) */ * from t where (1 member of (j0->'$.path0'))", + "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and a<10", + "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.XXX')) and a<10", + "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10", + "select /*+ use_index_merge(t, j1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10" + ] + }, { "name": "TestIndexMergePathGeneration", "cases": [ diff --git a/planner/core/testdata/index_merge_suite_out.json b/planner/core/testdata/index_merge_suite_out.json index 3d67e5e372251..31427fbf4c7e0 100644 --- a/planner/core/testdata/index_merge_suite_out.json +++ b/planner/core/testdata/index_merge_suite_out.json @@ -1,4 +1,57 @@ [ + { + "Name": "TestIndexMergeJSONMemberOf", + "Cases": [ + { + "SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where (1 member of (j0->'$.path0'))", + "Plan": [ + "Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path0\"))", + "└─IndexMerge 0.00 root type: union", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and a<10", + "Plan": [ + "Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path1\"))", + "└─IndexMerge 0.00 root type: union", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_1(cast(json_extract(`j0`, _utf8mb4'$.path1') as signed array)) range:[1,1], keep order:false, stats:pseudo", + " └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)", + " └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.XXX')) and a<10", + "Plan": [ + "Selection 2658.67 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.XXX\"))", + "└─TableReader 3323.33 root data:Selection", + " └─Selection 3323.33 cop[tikv] lt(test.t.a, 10)", + " └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10", + "Plan": [ + "Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path1\")), json_memberof(cast(2, json BINARY), test.t.j1)", + "└─IndexMerge 0.00 root type: union", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_1(cast(json_extract(`j0`, _utf8mb4'$.path1') as signed array)) range:[1,1], keep order:false, stats:pseudo", + " └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)", + " └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, j1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10", + "Plan": [ + "Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path1\")), json_memberof(cast(2, json BINARY), test.t.j1)", + "└─IndexMerge 0.00 root type: union", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j1(cast(`j1` as signed array)) range:[2,2], keep order:false, stats:pseudo", + " └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)", + " └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + } + ] + }, { "Name": "TestIndexMergePathGeneration", "Cases": [