From c8dd552f284ff92a27e7936f7b604f91632aa9ce Mon Sep 17 00:00:00 2001 From: Harshit Gangal Date: Tue, 20 Jul 2021 22:57:47 +0530 Subject: [PATCH 1/9] distinct planning to use ordered_aggregates primitive than distinct primitive to support varchar type columns Signed-off-by: Harshit Gangal --- go/vt/vtgate/planbuilder/horizon_planning.go | 88 +++++++++++++++++++ go/vt/vtgate/planbuilder/route_planning.go | 37 +------- .../planbuilder/testdata/aggr_cases.txt | 33 ++++--- .../planbuilder/testdata/select_cases.txt | 43 ++++++++- 4 files changed, 156 insertions(+), 45 deletions(-) diff --git a/go/vt/vtgate/planbuilder/horizon_planning.go b/go/vt/vtgate/planbuilder/horizon_planning.go index dc27578b58b..05be96ab024 100644 --- a/go/vt/vtgate/planbuilder/horizon_planning.go +++ b/go/vt/vtgate/planbuilder/horizon_planning.go @@ -457,3 +457,91 @@ func allLeft(orderExprs []abstract.OrderBy, semTable *semantics.SemTable, lhsTab } return true } + +func (hp *horizonPlanning) planDistinct() error { + if !hp.qp.NeedsDistinct() { + return nil + } + switch p := hp.plan.(type) { + case *route: + // we always make the underlying query distinct, + // and then we might also add a distinct operator on top if it is needed + p.Select.MakeDistinct() + if !p.isSingleShard() && !selectHasUniqueVindex(hp.vschema, hp.semTable, hp.qp.SelectExprs) { + return hp.pushDistinct() + } + return nil + case *joinGen4: + return hp.pushDistinct() + case *orderedAggregate: + eaggr := &engine.OrderedAggregate{} + oa := &orderedAggregate{ + resultsBuilder: resultsBuilder{ + logicalPlanCommon: newBuilderCommon(hp.plan), + weightStrings: make(map[*resultColumn]int), + truncater: eaggr, + }, + eaggr: eaggr, + } + for _, sExpr := range hp.qp.SelectExprs { + found := false + for _, grpParam := range p.eaggr.GroupByKeys { + if sqlparser.EqualsExpr(sExpr.Col.Expr, grpParam.Expr) { + found = true + eaggr.GroupByKeys = append(eaggr.GroupByKeys, grpParam) + break + } + } + if found { + continue + } + for _, aggrParam := range p.eaggr.Aggregates { + if sqlparser.EqualsExpr(sExpr.Col.Expr, aggrParam.Expr) { + found = true + eaggr.GroupByKeys = append(eaggr.GroupByKeys, engine.GroupByParams{KeyCol: aggrParam.Col, WeightStringCol: -1}) + break + } + } + if !found { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "[BUG] unable to plan distinct query as the column is not projected: %s", sqlparser.String(sExpr.Col)) + } + } + hp.plan = oa + return nil + default: + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unknown plan type for DISTINCT %T", hp.plan) + } +} + +func (hp *horizonPlanning) pushDistinct() error { + eaggr := &engine.OrderedAggregate{} + oa := &orderedAggregate{ + resultsBuilder: resultsBuilder{ + logicalPlanCommon: newBuilderCommon(hp.plan), + weightStrings: make(map[*resultColumn]int), + truncater: eaggr, + }, + eaggr: eaggr, + } + for index, sExpr := range hp.qp.SelectExprs { + grpParam := engine.GroupByParams{KeyCol: index, WeightStringCol: -1} + _, wOffset, added, err := wrapAndPushExpr(sExpr.Col.Expr, sExpr.Col.Expr, hp.plan, hp.semTable) + if err != nil { + return err + } + hp.needsTruncation = hp.needsTruncation || added + grpParam.WeightStringCol = wOffset + eaggr.GroupByKeys = append(eaggr.GroupByKeys, grpParam) + } + hp.plan = oa + return nil +} + +func selectHasUniqueVindex(vschema ContextVSchema, semTable *semantics.SemTable, sel []abstract.SelectExpr) bool { + for _, expr := range sel { + if exprHasUniqueVindex(vschema, semTable, expr.Col.Expr) { + return true + } + } + return false +} diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index 92033cd6a34..9270e395050 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -238,16 +238,14 @@ func (hp horizonPlanning) planHorizon() (logicalPlan, error) { } } - err = hp.truncateColumnsIfNeeded() + err = hp.planDistinct() if err != nil { return nil, err } - if hp.qp.NeedsDistinct() { - hp.plan, err = pushDistinct(hp.plan, hp.semTable, hp.vschema, hp.qp) - if err != nil { - return nil, err - } + err = hp.truncateColumnsIfNeeded() + if err != nil { + return nil, err } return hp.plan, nil @@ -272,33 +270,6 @@ func (hp horizonPlanning) truncateColumnsIfNeeded() error { return nil } -func pushDistinct(plan logicalPlan, semTable *semantics.SemTable, vschema ContextVSchema, qp *abstract.QueryProjection) (logicalPlan, error) { - switch p := plan.(type) { - case *route: - // we always make the underlying query distinct, - // and then we might also add a distinct operator on top if it is needed - p.Select.MakeDistinct() - if !p.isSingleShard() && !selectHasUniqueVindex(vschema, semTable, qp.SelectExprs) { - plan = newDistinct(plan) - } - return plan, nil - case *orderedAggregate, *joinGen4: - return newDistinct(plan), nil - - default: - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unknown plan type for DISTINCT %T", plan) - } -} - -func selectHasUniqueVindex(vschema ContextVSchema, semTable *semantics.SemTable, sel []abstract.SelectExpr) bool { - for _, expr := range sel { - if exprHasUniqueVindex(vschema, semTable, expr.Col.Expr) { - return true - } - } - return false -} - func exprHasUniqueVindex(vschema ContextVSchema, semTable *semantics.SemTable, expr sqlparser.Expr) bool { col, isCol := expr.(*sqlparser.ColName) if !isCol { diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 373cde13a9b..65a3c6ddc13 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -398,7 +398,10 @@ Gen4 plan same as above "QueryType": "SELECT", "Original": "select distinct col1, col2 from user group by col1", "Instructions": { - "OperatorType": "Distinct", + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "(0|2), (1|3)", + "ResultColumns": 2, "Inputs": [ { "OperatorType": "Route", @@ -407,8 +410,8 @@ Gen4 plan same as above "Name": "user", "Sharded": true }, - "FieldQuery": "select col1, col2 from `user` where 1 != 1", - "Query": "select distinct col1, col2 from `user`", + "FieldQuery": "select col1, col2, weight_string(col1), weight_string(col2) from `user` where 1 != 1", + "Query": "select distinct col1, col2, weight_string(col1), weight_string(col2) from `user`", "Table": "`user`" } ] @@ -811,7 +814,10 @@ Gen4 error: In aggregated query without GROUP BY, expression of SELECT list cont "QueryType": "SELECT", "Original": "select distinct col from user", "Instructions": { - "OperatorType": "Distinct", + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "(0|1)", + "ResultColumns": 1, "Inputs": [ { "OperatorType": "Route", @@ -820,8 +826,8 @@ Gen4 error: In aggregated query without GROUP BY, expression of SELECT list cont "Name": "user", "Sharded": true }, - "FieldQuery": "select col from `user` where 1 != 1", - "Query": "select distinct col from `user`", + "FieldQuery": "select col, weight_string(col) from `user` where 1 != 1", + "Query": "select distinct col, weight_string(col) from `user`", "Table": "`user`" } ] @@ -1767,14 +1773,16 @@ Gen4 error: In aggregated query without GROUP BY, expression of SELECT list cont "QueryType": "SELECT", "Original": "select distinct a, count(*) from user group by a", "Instructions": { - "OperatorType": "Distinct", + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "(0|2), 1", + "ResultColumns": 2, "Inputs": [ { "OperatorType": "Aggregate", "Variant": "Ordered", "Aggregates": "count(1)", "GroupBy": "(0|2)", - "ResultColumns": 2, "Inputs": [ { "OperatorType": "Route", @@ -1838,7 +1846,10 @@ Gen4 plan same as above "QueryType": "SELECT", "Original": "select distinct col1, col2 from user group by col1, col2", "Instructions": { - "OperatorType": "Distinct", + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "(0|2), (1|3)", + "ResultColumns": 2, "Inputs": [ { "OperatorType": "Route", @@ -1847,8 +1858,8 @@ Gen4 plan same as above "Name": "user", "Sharded": true }, - "FieldQuery": "select col1, col2 from `user` where 1 != 1", - "Query": "select distinct col1, col2 from `user`", + "FieldQuery": "select col1, col2, weight_string(col1), weight_string(col2) from `user` where 1 != 1", + "Query": "select distinct col1, col2, weight_string(col1), weight_string(col2) from `user`", "Table": "`user`" } ] diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.txt b/go/vt/vtgate/planbuilder/testdata/select_cases.txt index b1c805cc4a6..ddc0bfa1949 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.txt @@ -1753,4 +1753,45 @@ Gen4 plan same as above ] } } -Gen4 plan same as above +{ + "QueryType": "SELECT", + "Original": "select distinct user.a from user join user_extra", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "(0|1)", + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "-1,-2", + "TableName": "`user`_user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.a, weight_string(`user`.a) from `user` where 1 != 1", + "Query": "select `user`.a, weight_string(`user`.a) from `user`", + "Table": "`user`" + }, + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select 1 from user_extra where 1 != 1", + "Query": "select 1 from user_extra", + "Table": "user_extra" + } + ] + } + ] + } +} From be820a70af03d98b4cb88e557240149f0904ecf6 Mon Sep 17 00:00:00 2001 From: Harshit Gangal Date: Tue, 20 Jul 2021 23:21:51 +0530 Subject: [PATCH 2/9] some code refactor in distinct oa plan Signed-off-by: Harshit Gangal --- go/vt/vtgate/planbuilder/horizon_planning.go | 70 +++++++++++--------- go/vt/vtgate/planbuilder/plan_test.go | 26 +++----- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/go/vt/vtgate/planbuilder/horizon_planning.go b/go/vt/vtgate/planbuilder/horizon_planning.go index 05be96ab024..c33721e4d0b 100644 --- a/go/vt/vtgate/planbuilder/horizon_planning.go +++ b/go/vt/vtgate/planbuilder/horizon_planning.go @@ -474,43 +474,47 @@ func (hp *horizonPlanning) planDistinct() error { case *joinGen4: return hp.pushDistinct() case *orderedAggregate: - eaggr := &engine.OrderedAggregate{} - oa := &orderedAggregate{ - resultsBuilder: resultsBuilder{ - logicalPlanCommon: newBuilderCommon(hp.plan), - weightStrings: make(map[*resultColumn]int), - truncater: eaggr, - }, - eaggr: eaggr, - } - for _, sExpr := range hp.qp.SelectExprs { - found := false - for _, grpParam := range p.eaggr.GroupByKeys { - if sqlparser.EqualsExpr(sExpr.Col.Expr, grpParam.Expr) { - found = true - eaggr.GroupByKeys = append(eaggr.GroupByKeys, grpParam) - break - } - } - if found { - continue - } - for _, aggrParam := range p.eaggr.Aggregates { - if sqlparser.EqualsExpr(sExpr.Col.Expr, aggrParam.Expr) { - found = true - eaggr.GroupByKeys = append(eaggr.GroupByKeys, engine.GroupByParams{KeyCol: aggrParam.Col, WeightStringCol: -1}) - break - } + return hp.planDistinctOA(p) + default: + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unknown plan type for DISTINCT %T", hp.plan) + } +} + +func (hp *horizonPlanning) planDistinctOA(currPlan *orderedAggregate) error { + eaggr := &engine.OrderedAggregate{} + oa := &orderedAggregate{ + resultsBuilder: resultsBuilder{ + logicalPlanCommon: newBuilderCommon(hp.plan), + weightStrings: make(map[*resultColumn]int), + truncater: eaggr, + }, + eaggr: eaggr, + } + for _, sExpr := range hp.qp.SelectExprs { + found := false + for _, grpParam := range currPlan.eaggr.GroupByKeys { + if sqlparser.EqualsExpr(sExpr.Col.Expr, grpParam.Expr) { + found = true + eaggr.GroupByKeys = append(eaggr.GroupByKeys, grpParam) + break } - if !found { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "[BUG] unable to plan distinct query as the column is not projected: %s", sqlparser.String(sExpr.Col)) + } + if found { + continue + } + for _, aggrParam := range currPlan.eaggr.Aggregates { + if sqlparser.EqualsExpr(sExpr.Col.Expr, aggrParam.Expr) { + found = true + eaggr.GroupByKeys = append(eaggr.GroupByKeys, engine.GroupByParams{KeyCol: aggrParam.Col, WeightStringCol: -1}) + break } } - hp.plan = oa - return nil - default: - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unknown plan type for DISTINCT %T", hp.plan) + if !found { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "[BUG] unable to plan distinct query as the column is not projected: %s", sqlparser.String(sExpr.Col)) + } } + hp.plan = oa + return nil } func (hp *horizonPlanning) pushDistinct() error { diff --git a/go/vt/vtgate/planbuilder/plan_test.go b/go/vt/vtgate/planbuilder/plan_test.go index f3ca82587e9..e02c8870b59 100644 --- a/go/vt/vtgate/planbuilder/plan_test.go +++ b/go/vt/vtgate/planbuilder/plan_test.go @@ -464,15 +464,19 @@ func (vw *vschemaWrapper) ErrorIfShardedF(keyspace *vindexes.Keyspace, _, errFmt return nil } +func (vw *vschemaWrapper) currentDb() string { + ksName := "" + if vw.keyspace != nil { + ksName = vw.keyspace.Name + } + return ksName +} + func escapeNewLines(in string) string { return strings.ReplaceAll(in, "\n", "\\n") } func testFile(t *testing.T, filename, tempDir string, vschema *vschemaWrapper, checkGen4equalPlan bool) { - ksName := "" - if vschema.keyspace != nil { - ksName = vschema.keyspace.Name - } var checkAllTests = false t.Run(filename, func(t *testing.T) { expected := &strings.Builder{} @@ -481,7 +485,7 @@ func testFile(t *testing.T, filename, tempDir string, vschema *vschemaWrapper, c for tcase := range iterateExecFile(filename) { t.Run(fmt.Sprintf("%d V3: %s", tcase.lineno, tcase.comments), func(t *testing.T) { vschema.version = V3 - plan, err := TestBuilder(tcase.input, vschema, ksName) + plan, err := TestBuilder(tcase.input, vschema, vschema.currentDb()) out := getPlanOrErrorOutput(err, plan) if out != tcase.output { @@ -558,11 +562,7 @@ func getPlanOutput(tcase testCase, vschema *vschemaWrapper) (out string, err err out = fmt.Sprintf("panicked: %v\n%s", r, string(debug.Stack())) } }() - ksName := "" - if vschema.keyspace != nil { - ksName = vschema.keyspace.Name - } - plan, err := TestBuilder(tcase.input, vschema, ksName) + plan, err := TestBuilder(tcase.input, vschema, vschema.currentDb()) out = getPlanOrErrorOutput(err, plan) return out, err } @@ -747,16 +747,12 @@ func BenchmarkSelectVsDML(b *testing.B) { } func benchmarkPlanner(b *testing.B, version PlannerVersion, testCases []testCase, vschema *vschemaWrapper) { - ksName := "" - if vschema.keyspace != nil { - ksName = vschema.keyspace.Name - } b.ReportAllocs() for n := 0; n < b.N; n++ { for _, tcase := range testCases { if tcase.output2ndPlanner != "" { vschema.version = version - _, _ = TestBuilder(tcase.input, vschema, ksName) + _, _ = TestBuilder(tcase.input, vschema, vschema.currentDb()) } } } From eea503d3e93ac9147e43e8b911956b2316e7ab36 Mon Sep 17 00:00:00 2001 From: Harshit Gangal Date: Wed, 21 Jul 2021 00:18:13 +0530 Subject: [PATCH 3/9] add more supported Gen4 plans Signed-off-by: Harshit Gangal --- .../testdata/postprocess_cases.txt | 103 ++++++++++++++++++ .../planbuilder/testdata/select_cases.txt | 33 +++++- 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt index 3d956547db4..54ccb8d1ed6 100644 --- a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt @@ -619,6 +619,49 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select user.col1 as a, user.col2, music.col3 from user join music on user.id = music.id where user.id = 1 order by a", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "-2,-3,1", + "TableName": "`user`_music", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectEqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.id, `user`.col1 as a, `user`.col2, weight_string(`user`.col1) from `user` where 1 != 1", + "OrderBy": "(1|3) ASC", + "Query": "select `user`.id, `user`.col1 as a, `user`.col2, weight_string(`user`.col1) from `user` where `user`.id = 1 order by a asc", + "Table": "`user`", + "Values": [ + 1 + ], + "Vindex": "user_index" + }, + { + "OperatorType": "Route", + "Variant": "SelectEqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.col3 from music where 1 != 1", + "Query": "select music.col3 from music where music.id = :user_id", + "Table": "music", + "Values": [ + ":user_id" + ], + "Vindex": "music_user_map" + } + ] + } +} # ORDER BY non-key column for implicit join "select user.col1 as a, user.col2, music.col3 from user, music where user.id = music.id and user.id = 1 order by a" @@ -959,6 +1002,7 @@ Gen4 plan same as above # Order by, invalid column number "select col from user order by 18446744073709551616" "error parsing column number: 18446744073709551616" +Gen4 error: strconv.Atoi: parsing "18446744073709551616": value out of range # Order by, out of range column number "select col from user order by 2" @@ -1449,6 +1493,7 @@ Gen4 plan same as above ] } } +Gen4 error: In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column 'num'; this is incompatible with sql_mode=only_full_group_by # aggregation and non-aggregations column with order by "select count(id), num from user order by 2" @@ -1483,6 +1528,7 @@ Gen4 plan same as above ] } } +Gen4 error: In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column 'num'; this is incompatible with sql_mode=only_full_group_by # aggregation and non-aggregations column with group by "select count(id), num from user group by 2" @@ -1511,6 +1557,31 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select count(id), num from user group by 2", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(0)", + "GroupBy": "(1|2)", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(id), num, weight_string(num) from `user` where 1 != 1 group by num", + "OrderBy": "(1|2) ASC", + "Query": "select count(id), num, weight_string(num) from `user` group by num order by num asc", + "Table": "`user`" + } + ] + } +} # aggregation and non-aggregations column with group by and order by "select count(id), num from user group by 2 order by 1" @@ -1546,6 +1617,38 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select count(id), num from user group by 2 order by 1", + "Instructions": { + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "0 ASC", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(0)", + "GroupBy": "(1|2)", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(id), num, weight_string(num) from `user` where 1 != 1 group by num", + "OrderBy": "(1|2) ASC", + "Query": "select count(id), num, weight_string(num) from `user` group by num order by num asc", + "Table": "`user`" + } + ] + } + ] + } +} # join order by with ambiguous column reference ; valid in MySQL "select name, name from user, music order by name" diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.txt b/go/vt/vtgate/planbuilder/testdata/select_cases.txt index ddc0bfa1949..7c8e7c41698 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.txt @@ -302,11 +302,26 @@ Gen4 plan same as above "Table": "authoritative" } } +{ + "QueryType": "SELECT", + "Original": "select * from authoritative a join authoritative b on a.user_id=b.user_id", + "Instructions": { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select a.user_id as user_id, a.col1 as col1, a.col2 as col2, b.user_id as user_id, b.col1 as col1, b.col2 as col2 from authoritative as a, authoritative as b where 1 != 1", + "Query": "select a.user_id as user_id, a.col1 as col1, a.col2 as col2, b.user_id as user_id, b.col1 as col1, b.col2 as col2 from authoritative as a, authoritative as b where a.user_id = b.user_id", + "Table": "authoritative" + } +} # test table lookup failure for authoritative code path "select a.* from authoritative" "table a not found" -"Unknown table 'a'" +Gen4 error: Unknown table 'a' # select * from qualified authoritative table "select a.* from authoritative a" @@ -376,6 +391,21 @@ Gen4 plan same as above "Table": "authoritative, `user`" } } +{ + "QueryType": "SELECT", + "Original": "select user.id, a.*, user.col1 from authoritative a join user on a.user_id=user.id", + "Instructions": { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.id, a.user_id as user_id, a.col1 as col1, a.col2 as col2, `user`.col1 from authoritative as a, `user` where 1 != 1", + "Query": "select `user`.id, a.user_id as user_id, a.col1 as col1, a.col2 as col2, `user`.col1 from authoritative as a, `user` where a.user_id = `user`.id", + "Table": "`user`, authoritative" + } +} # auto-resolve anonymous columns for simple route "select col from user join user_extra on user.id = user_extra.user_id" @@ -1702,6 +1732,7 @@ Gen4 plan same as above "Table": "unsharded" } } +Gen4 plan same as above # Union after into outfile is incorrect "select id from user into outfile 'out_file_name' union all select id from music" From 8edfa349304936e717d3d74c940cc96e795cf0e3 Mon Sep 17 00:00:00 2001 From: Harshit Gangal Date: Wed, 21 Jul 2021 12:58:38 +0530 Subject: [PATCH 4/9] upadte more gen4 supported query plans and errors in plan tests Signed-off-by: Harshit Gangal --- .../planbuilder/testdata/aggr_cases.txt | 45 +++++++++++++++ .../testdata/memory_sort_cases.txt | 39 ++++++++++++- .../testdata/postprocess_cases.txt | 56 +++++++++++++++++++ .../planbuilder/testdata/select_cases.txt | 1 - .../planbuilder/testdata/stream_cases.txt | 2 + .../testdata/unsupported_cases.txt | 20 +------ 6 files changed, 144 insertions(+), 19 deletions(-) diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 65a3c6ddc13..0955b4b8223 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -1917,3 +1917,48 @@ Gen4 plan same as above ] } } + +# Aggregates and joins +"select count(*) from user join user_extra" +"unsupported: cross-shard query with aggregates" +{ + "QueryType": "SELECT", + "Original": "select count(*) from user join user_extra", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(0)", + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "-1", + "TableName": "`user`_user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(*) from `user` where 1 != 1", + "Query": "select count(*) from `user`", + "Table": "`user`" + }, + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select 1 from user_extra where 1 != 1", + "Query": "select 1 from user_extra", + "Table": "user_extra" + } + ] + } + ] + } +} diff --git a/go/vt/vtgate/planbuilder/testdata/memory_sort_cases.txt b/go/vt/vtgate/planbuilder/testdata/memory_sort_cases.txt index 4131b6930ae..e11eba36c58 100644 --- a/go/vt/vtgate/planbuilder/testdata/memory_sort_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/memory_sort_cases.txt @@ -35,6 +35,7 @@ ] } } +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'b' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # scatter aggregate order by references aggregate expression "select a, b, count(*) k from user group by a order by k" @@ -70,6 +71,7 @@ ] } } +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'b' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # scatter aggregate order by references multiple non-group-by expressions "select a, b, count(*) k from user group by a order by b, a, k" @@ -107,6 +109,7 @@ ] } } +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'b' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # scatter aggregate with memory sort and limit "select a, b, count(*) k from user group by a order by k desc limit 10" @@ -148,6 +151,7 @@ ] } } +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'b' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # scatter aggregate with memory sort and order by number "select a, b, count(*) k from user group by a order by 1,3" @@ -185,9 +189,10 @@ ] } } +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'b' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # scatter aggregate with memory sort and order by number, reuse weight_string -# we have to use a meaningless construct to test this. +# we have to use a meaningless construct to test this. TODO: improve to do ordering once for textcol1 "select textcol1 as t, count(*) k from user group by textcol1 order by textcol1, k, textcol1" { "QueryType": "SELECT", @@ -223,6 +228,38 @@ ] } } +{ + "QueryType": "SELECT", + "Original": "select textcol1 as t, count(*) k from user group by textcol1 order by textcol1, k, textcol1", + "Instructions": { + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "(2|3) ASC, 1 ASC, (2|3) ASC", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(1)", + "GroupBy": "(2|3)", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select textcol1 as t, count(*) as k, textcol1, weight_string(textcol1) from `user` where 1 != 1 group by textcol1", + "OrderBy": "(2|3) ASC", + "Query": "select textcol1 as t, count(*) as k, textcol1, weight_string(textcol1) from `user` group by textcol1 order by textcol1 asc", + "Table": "`user`" + } + ] + } + ] + } +} # order by on a cross-shard subquery "select id from (select user.id, user.col from user join user_extra) as t order by id" diff --git a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt index 54ccb8d1ed6..48b1187d6ce 100644 --- a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt @@ -1709,3 +1709,59 @@ Gen4 error: In aggregated query without GROUP BY, expression of SELECT list cont "Table": "`user`" } } + +# Scatter order by and aggregation: order by column must reference column from select list +"select col, count(*) from user group by col order by c1" +"unsupported: memory sort: order by must reference a column in the select list: c1 asc" +{ + "QueryType": "SELECT", + "Original": "select col, count(*) from user group by col order by c1", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(1)", + "GroupBy": "(0|2)", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select col, count(*), weight_string(col), c1, weight_string(c1) from `user` where 1 != 1 group by col", + "OrderBy": "(3|4) ASC, (0|2) ASC", + "Query": "select col, count(*), weight_string(col), c1, weight_string(c1) from `user` group by col order by c1 asc, col asc", + "Table": "`user`" + } + ] + } +} + +# scatter aggregate with ambiguous aliases +"select distinct a, b as a from user" +"generating order by clause: ambiguous symbol reference: a" +{ + "QueryType": "SELECT", + "Original": "select distinct a, b as a from user", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "(0|2), (1|4)", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select a, b as a, weight_string(a), b, weight_string(b) from `user` where 1 != 1", + "Query": "select distinct a, b as a, weight_string(a), b, weight_string(b) from `user`", + "Table": "`user`" + } + ] + } +} diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.txt b/go/vt/vtgate/planbuilder/testdata/select_cases.txt index 7c8e7c41698..8c71190c5b2 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.txt @@ -1732,7 +1732,6 @@ Gen4 plan same as above "Table": "unsharded" } } -Gen4 plan same as above # Union after into outfile is incorrect "select id from user into outfile 'out_file_name' union all select id from music" diff --git a/go/vt/vtgate/planbuilder/testdata/stream_cases.txt b/go/vt/vtgate/planbuilder/testdata/stream_cases.txt index e2a70f7cb75..90853e9df92 100644 --- a/go/vt/vtgate/planbuilder/testdata/stream_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/stream_cases.txt @@ -13,6 +13,7 @@ "Table": "music" } } +Gen4 plan same as above #vstream table "vstream * from user where pos > 'a4afea21-a320-11eb-a37a-98af65a6dc4a:1-44' limit 1000" @@ -31,3 +32,4 @@ "Table": "user" } } +Gen4 plan same as above diff --git a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt index 5d32673c74b..9e76a20596a 100644 --- a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt @@ -110,17 +110,10 @@ Gen4 error: aggregate functions take a single argument 'count(a, b)' "select count(distinct a), count(distinct b) from user" "unsupported: only one distinct aggregation allowed in a select: count(distinct b)" -# scatter aggregate group by doesn't reference select list -"select id from user group by col" -"unsupported: in scatter query: group by column must reference column in SELECT list" - # scatter aggregate symtab lookup error "select id, b as id, count(*) from user order by id" "ambiguous symbol reference: id" - -# scatter aggregate with ambiguous aliases -"select distinct a, b as a from user" -"generating order by clause: ambiguous symbol reference: a" +Gen4 error: In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column 'id'; this is incompatible with sql_mode=only_full_group_by # scatter aggregate complex order by "select id from user group by id order by id+1" @@ -130,17 +123,10 @@ Gen4 error: aggregate functions take a single argument 'count(a, b)' "select col, count(*) from user group by col order by col+1" "unsupported: in scatter query: complex order by expression: col + 1" -# Scatter order by and aggregation: order by column must reference column from select list -"select col, count(*) from user group by col order by c1" -"unsupported: memory sort: order by must reference a column in the select list: c1 asc" - -# Aggregates and joins -"select count(*) from user join user_extra" -"unsupported: cross-shard query with aggregates" - # Aggregate detection (group_concat) "select group_concat(user.a) from user join user_extra" "unsupported: cross-shard query with aggregates" +Gen4 error: unsupported: in scatter query: complex aggregate expression # Aggregate detection (group by) "select user.a from user join user_extra group by user.a" @@ -496,4 +482,4 @@ Gen4 plan same as above ] } } -"Mixing of aggregation and non-aggregation columns is not allowed if there is no GROUP BY clause" +Gen4 error: In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column 'id'; this is incompatible with sql_mode=only_full_group_by From 30bb92d9e0542cfe3eae062b478716b07c1fe29c Mon Sep 17 00:00:00 2001 From: Harshit Gangal Date: Wed, 21 Jul 2021 16:31:29 +0530 Subject: [PATCH 5/9] check and fail query if non-aggregation columns are not in group by expressions Signed-off-by: Harshit Gangal --- .../planbuilder/abstract/queryprojection.go | 2 +- .../planbuilder/testdata/aggr_cases.txt | 41 ++----------------- .../testdata/unsupported_cases.txt | 2 + 3 files changed, 6 insertions(+), 39 deletions(-) diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index d8a0be7b875..35348bf3a0f 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -94,7 +94,7 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { qp.GroupByExprs = append(qp.GroupByExprs, GroupBy{Inner: expr, WeightStrExpr: weightStrExpr}) } - if qp.HasAggr { + if qp.HasAggr || len(qp.GroupByExprs) > 0 { expr := qp.getNonAggrExprNotMatchingGroupByExprs() // if we have aggregation functions, non aggregating columns and GROUP BY, // the non-aggregating expressions must all be listed in the GROUP BY list diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 0955b4b8223..bfcb3f044ca 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -97,21 +97,7 @@ Gen4 plan same as above "Table": "`user`" } } -{ - "QueryType": "SELECT", - "Original": "select distinct col1, id from user group by col1", - "Instructions": { - "OperatorType": "Route", - "Variant": "SelectScatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select col1, id from `user` where 1 != 1", - "Query": "select distinct col1, id from `user`", - "Table": "`user`" - } -} +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'id' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # scatter group by a text column "select count(*), a, textcol1, b from user group by a, textcol1, b" @@ -394,29 +380,7 @@ Gen4 plan same as above ] } } -{ - "QueryType": "SELECT", - "Original": "select distinct col1, col2 from user group by col1", - "Instructions": { - "OperatorType": "Aggregate", - "Variant": "Ordered", - "GroupBy": "(0|2), (1|3)", - "ResultColumns": 2, - "Inputs": [ - { - "OperatorType": "Route", - "Variant": "SelectScatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select col1, col2, weight_string(col1), weight_string(col2) from `user` where 1 != 1", - "Query": "select distinct col1, col2, weight_string(col1), weight_string(col2) from `user`", - "Table": "`user`" - } - ] - } -} +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'col2' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # aggregate on RHS subquery (tests symbol table merge) "select user.a, t.b from user join (select count(*) b from unsharded) as t" @@ -1805,6 +1769,7 @@ Gen4 error: In aggregated query without GROUP BY, expression of SELECT list cont # Group by invalid column number (code is duplicated from symab). "select id from user group by 1.1" "column number is not an int" +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'id' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # Group by out of range column number (code is duplicated from symab). "select id from user group by 2" diff --git a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt index 9e76a20596a..f8ef960ded8 100644 --- a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt @@ -91,10 +91,12 @@ Gen4 plan same as above # group by must reference select list "select a from user group by b" "unsupported: in scatter query: group by column must reference column in SELECT list" +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'a' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # complex group by expression "select a from user group by a+1" "unsupported: in scatter query: only simple references allowed" +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'a' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # Complex aggregate expression on scatter "select 1+count(*) from user" From 1e8b3bdd25e3788d1f3c6c50c52f21d992b30584 Mon Sep 17 00:00:00 2001 From: Harshit Gangal Date: Wed, 21 Jul 2021 21:45:33 +0530 Subject: [PATCH 6/9] group by semantic analysis Signed-off-by: Harshit Gangal --- go/vt/vtgate/semantics/analyzer.go | 75 ++++++++++++++----------- go/vt/vtgate/semantics/analyzer_test.go | 36 ++++++++++++ 2 files changed, 78 insertions(+), 33 deletions(-) diff --git a/go/vt/vtgate/semantics/analyzer.go b/go/vt/vtgate/semantics/analyzer.go index 3caada76040..6294392c72f 100644 --- a/go/vt/vtgate/semantics/analyzer.go +++ b/go/vt/vtgate/semantics/analyzer.go @@ -154,39 +154,12 @@ func (a *analyzer) analyzeDown(cursor *sqlparser.Cursor) bool { case sqlparser.OrderBy: a.changeScopeForOrderBy(cursor) case *sqlparser.Order: - l, ok := node.Expr.(*sqlparser.Literal) - if !ok { - break - } - if l.Type != sqlparser.IntVal { - break - } - currScope := a.currentScope() - num, err := strconv.Atoi(l.Val) - if err != nil { - a.err = err - break - } - if num < 1 || num > len(currScope.selectExprs) { - a.err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.BadFieldError, "Unknown column '%d' in 'order clause'", num) - break - } - - expr, ok := currScope.selectExprs[num-1].(*sqlparser.AliasedExpr) - if !ok { - break + a.analyzeOrderByGroupByExprForLiteral(node.Expr, "order clause") + case sqlparser.GroupBy: + a.changeScopeForOrderBy(cursor) + for _, grpExpr := range node { + a.analyzeOrderByGroupByExprForLiteral(grpExpr, "group statement") } - - var deps TableSet - _ = sqlparser.Walk(func(node sqlparser.SQLNode) (kontinue bool, err error) { - expr, ok := node.(sqlparser.Expr) - if ok { - deps = deps.Merge(a.exprDeps[expr]) - } - return true, nil - }, expr.Expr) - - a.exprDeps[node.Expr] = deps case *sqlparser.ColName: t, err := a.resolveColumn(node, current) if err != nil { @@ -214,6 +187,42 @@ func (a *analyzer) analyzeDown(cursor *sqlparser.Cursor) bool { return true } +func (a *analyzer) analyzeOrderByGroupByExprForLiteral(input sqlparser.Expr, caller string) { + l, ok := input.(*sqlparser.Literal) + if !ok { + return + } + if l.Type != sqlparser.IntVal { + return + } + currScope := a.currentScope() + num, err := strconv.Atoi(l.Val) + if err != nil { + a.err = err + return + } + if num < 1 || num > len(currScope.selectExprs) { + a.err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.BadFieldError, "Unknown column '%d' in '%s'", num, caller) + return + } + + expr, ok := currScope.selectExprs[num-1].(*sqlparser.AliasedExpr) + if !ok { + return + } + + var deps TableSet + _ = sqlparser.Walk(func(node sqlparser.SQLNode) (kontinue bool, err error) { + expr, ok := node.(sqlparser.Expr) + if ok { + deps = deps.Merge(a.exprDeps[expr]) + } + return true, nil + }, expr.Expr) + + a.exprDeps[input] = deps +} + func (a *analyzer) changeScopeForOrderBy(cursor *sqlparser.Cursor) { sel, ok := cursor.Parent().(*sqlparser.Select) if !ok { @@ -366,7 +375,7 @@ func (a *analyzer) analyzeUp(cursor *sqlparser.Cursor) bool { if isParentSelect(cursor) { a.popProjection() } - case *sqlparser.Union, *sqlparser.Select, sqlparser.OrderBy: + case *sqlparser.Union, *sqlparser.Select, sqlparser.OrderBy, sqlparser.GroupBy: a.popScope() case sqlparser.TableExpr: if isParentSelect(cursor) { diff --git a/go/vt/vtgate/semantics/analyzer_test.go b/go/vt/vtgate/semantics/analyzer_test.go index 8ad7712a9a8..7ceb99277c4 100644 --- a/go/vt/vtgate/semantics/analyzer_test.go +++ b/go/vt/vtgate/semantics/analyzer_test.go @@ -143,6 +143,42 @@ func TestOrderByBindingSingleTable(t *testing.T) { }) } +func TestGroupByBindingSingleTable(t *testing.T) { + t.Run("positive tests", func(t *testing.T) { + tcases := []struct { + sql string + deps TableSet + }{{ + "select col from tabl group by col", + T1, + }, { + "select col from tabl group by tabl.col", + T1, + }, { + "select col from tabl group by d.tabl.col", + T1, + }, { + "select col from tabl group by 1", + T1, + }, { + "select col as c from tabl group by c", + T1, + }, { + "select 1 as c from tabl group by c", + T0, + }} + for _, tc := range tcases { + t.Run(tc.sql, func(t *testing.T) { + stmt, semTable := parseAndAnalyze(t, tc.sql, "d") + sel, _ := stmt.(*sqlparser.Select) + grp := sel.GroupBy[0] + d := semTable.Dependencies(grp) + require.Equal(t, tc.deps, d, tc.sql) + }) + } + }) +} + func TestBindingSingleAliasedTable(t *testing.T) { t.Run("positive tests", func(t *testing.T) { queries := []string{ From 9b548951c583e346a2423bb4296ca7c54180db6b Mon Sep 17 00:00:00 2001 From: Harshit Gangal Date: Thu, 22 Jul 2021 14:55:39 +0530 Subject: [PATCH 7/9] resolve unqualified columns in analyzer by looking in parent scope if on current scope it is not able to resolve Signed-off-by: Harshit Gangal --- go/vt/vtgate/planbuilder/route_planning.go | 2 +- .../testdata/unsupported_cases.txt | 1 + go/vt/vtgate/semantics/analyzer.go | 6 ++ go/vt/vtgate/semantics/analyzer_test.go | 93 ++++++++++++------- 4 files changed, 67 insertions(+), 35 deletions(-) diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index 9270e395050..de775d451b7 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -189,7 +189,7 @@ type horizonPlanning struct { vtgateGrouping bool } -func (hp horizonPlanning) planHorizon() (logicalPlan, error) { +func (hp *horizonPlanning) planHorizon() (logicalPlan, error) { rb, ok := hp.plan.(*route) if !ok && hp.semTable.ProjectionErr != nil { return nil, hp.semTable.ProjectionErr diff --git a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt index f8ef960ded8..decec3a19ff 100644 --- a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt @@ -137,6 +137,7 @@ Gen4 error: unsupported: in scatter query: complex aggregate expression # group by and ',' joins "select user.id from user, user_extra group by id" "unsupported: cross-shard query with aggregates" +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column '`user`.id' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # if subquery scatter and ordering, then we don't allow outer constructs to be pushed down. "select count(*) from (select col, user_extra.extra from user join user_extra on user.id = user_extra.user_id order by user_extra.extra) a" diff --git a/go/vt/vtgate/semantics/analyzer.go b/go/vt/vtgate/semantics/analyzer.go index 6294392c72f..86e1d553b05 100644 --- a/go/vt/vtgate/semantics/analyzer.go +++ b/go/vt/vtgate/semantics/analyzer.go @@ -287,6 +287,8 @@ func (a *analyzer) depsForExpr(expr sqlparser.Expr) TableSet { // resolveUnQualifiedColumn func (a *analyzer) resolveUnQualifiedColumn(current *scope, expr *sqlparser.ColName) (TableSet, error) { var tsp *TableSet + +tryAgain: for _, tbl := range current.tables { ts := tbl.DepsFor(expr, a, len(current.tables) == 1) if ts != nil && tsp != nil { @@ -296,6 +298,10 @@ func (a *analyzer) resolveUnQualifiedColumn(current *scope, expr *sqlparser.ColN tsp = ts } } + if tsp == nil && current.parent != nil { + current = current.parent + goto tryAgain + } if tsp == nil { return 0, nil } diff --git a/go/vt/vtgate/semantics/analyzer_test.go b/go/vt/vtgate/semantics/analyzer_test.go index 7ceb99277c4..efee37c6bce 100644 --- a/go/vt/vtgate/semantics/analyzer_test.go +++ b/go/vt/vtgate/semantics/analyzer_test.go @@ -144,39 +144,49 @@ func TestOrderByBindingSingleTable(t *testing.T) { } func TestGroupByBindingSingleTable(t *testing.T) { - t.Run("positive tests", func(t *testing.T) { - tcases := []struct { - sql string - deps TableSet - }{{ - "select col from tabl group by col", - T1, - }, { - "select col from tabl group by tabl.col", - T1, - }, { - "select col from tabl group by d.tabl.col", - T1, - }, { - "select col from tabl group by 1", - T1, - }, { - "select col as c from tabl group by c", - T1, - }, { - "select 1 as c from tabl group by c", - T0, - }} - for _, tc := range tcases { - t.Run(tc.sql, func(t *testing.T) { - stmt, semTable := parseAndAnalyze(t, tc.sql, "d") - sel, _ := stmt.(*sqlparser.Select) - grp := sel.GroupBy[0] - d := semTable.Dependencies(grp) - require.Equal(t, tc.deps, d, tc.sql) - }) - } - }) + tcases := []struct { + sql string + deps TableSet + }{{ + "select col from tabl group by col", + T1, + }, { + "select col from tabl group by tabl.col", + T1, + }, { + "select col from tabl group by d.tabl.col", + T1, + }, { + "select tabl.col as x from tabl group by x", + T1, + }, { + "select tabl.col as x from tabl group by col", + T1, + }, { + "select col from tabl group by 1", + T1, + }, { + "select col as c from tabl group by c", + T1, + }, { + "select 1 as c from tabl group by c", + T0, + }, { + "select t1.id from t1, t2 group by id", + T1, + }, { + "select t.id from t, t1 group by id", + T2, + }} + for _, tc := range tcases { + t.Run(tc.sql, func(t *testing.T) { + stmt, semTable := parseAndAnalyze(t, tc.sql, "d") + sel, _ := stmt.(*sqlparser.Select) + grp := sel.GroupBy[0] + d := semTable.Dependencies(grp) + require.Equal(t, tc.deps, d, tc.sql) + }) + } } func TestBindingSingleAliasedTable(t *testing.T) { @@ -520,9 +530,24 @@ func parseAndAnalyze(t *testing.T, query, dbName string) (sqlparser.Statement, * t.Helper() parse, err := sqlparser.Parse(query) require.NoError(t, err) + + cols1 := []vindexes.Column{{ + Name: sqlparser.NewColIdent("id"), + Type: querypb.Type_INT64, + }} + cols2 := []vindexes.Column{{ + Name: sqlparser.NewColIdent("uid"), + Type: querypb.Type_INT64, + }, { + Name: sqlparser.NewColIdent("name"), + Type: querypb.Type_VARCHAR, + }} + semTable, err := Analyze(parse, dbName, &FakeSI{ Tables: map[string]*vindexes.Table{ - "t": {Name: sqlparser.NewTableIdent("t")}, + "t": {Name: sqlparser.NewTableIdent("t")}, + "t1": {Name: sqlparser.NewTableIdent("t1"), Columns: cols1, ColumnListAuthoritative: true}, + "t2": {Name: sqlparser.NewTableIdent("t2"), Columns: cols2, ColumnListAuthoritative: true}, }, }) require.NoError(t, err) From cbad38860265a0803b856635e0fcd74e6ff39943 Mon Sep 17 00:00:00 2001 From: Harshit Gangal Date: Thu, 22 Jul 2021 17:12:54 +0530 Subject: [PATCH 8/9] support grouping on top of join plan Signed-off-by: Harshit Gangal --- go/vt/vtgate/planbuilder/horizon_planning.go | 11 +- go/vt/vtgate/planbuilder/route_planning.go | 4 +- .../planbuilder/testdata/aggr_cases.txt | 143 ++++++++++++++++++ .../testdata/unsupported_cases.txt | 4 - 4 files changed, 154 insertions(+), 8 deletions(-) diff --git a/go/vt/vtgate/planbuilder/horizon_planning.go b/go/vt/vtgate/planbuilder/horizon_planning.go index c33721e4d0b..ffbf40ea4b7 100644 --- a/go/vt/vtgate/planbuilder/horizon_planning.go +++ b/go/vt/vtgate/planbuilder/horizon_planning.go @@ -128,7 +128,9 @@ func (hp *horizonPlanning) haveToTruncate(v bool) { func (hp *horizonPlanning) planAggregations() error { newPlan := hp.plan var oa *orderedAggregate - if !hasUniqueVindex(hp.vschema, hp.semTable, hp.qp.GroupByExprs) { + uniqVindex := hasUniqueVindex(hp.vschema, hp.semTable, hp.qp.GroupByExprs) + _, joinPlan := hp.plan.(*joinGen4) + if !uniqVindex || joinPlan { eaggr := &engine.OrderedAggregate{} oa = &orderedAggregate{ resultsBuilder: resultsBuilder{ @@ -219,6 +221,9 @@ func planGroupByGen4(groupExpr abstract.GroupBy, plan logicalPlan, semTable *sem sel := node.Select.(*sqlparser.Select) sel.GroupBy = append(sel.GroupBy, groupExpr.Inner) return false, nil + case *joinGen4: + _, _, added, err := wrapAndPushExpr(groupExpr.Inner, groupExpr.WeightStrExpr, node, semTable) + return added, err case *orderedAggregate: keyCol, weightStringOffset, colAdded, err := wrapAndPushExpr(groupExpr.Inner, groupExpr.WeightStrExpr, node.input, semTable) if err != nil { @@ -315,6 +320,8 @@ func planOrderByForRoute(orderExprs []abstract.OrderBy, plan *route, semTable *s return plan, origColCount != plan.Select.GetColumnCount(), nil } +// wrapAndPushExpr pushes the expression and weighted_string function to the plan using semantics.SemTable +// It returns (expr offset, weight_string offset, new_column added, error) func wrapAndPushExpr(expr sqlparser.Expr, weightStrExpr sqlparser.Expr, plan logicalPlan, semTable *semantics.SemTable) (int, int, bool, error) { offset, added, err := pushProjection(&sqlparser.AliasedExpr{Expr: expr}, plan, semTable, true, true) if err != nil { @@ -370,8 +377,6 @@ func (hp *horizonPlanning) planOrderByForJoin(orderExprs []abstract.OrderBy, pla return nil, err } plan.Left = newLeft - hp.needsTruncation = false // since this is a join, we can safely - // add extra columns and not need to truncate them return plan, nil } sortPlan, err := hp.createMemorySortPlan(plan, orderExprs) diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index de775d451b7..74a618f267f 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -251,7 +251,7 @@ func (hp *horizonPlanning) planHorizon() (logicalPlan, error) { return hp.plan, nil } -func (hp horizonPlanning) truncateColumnsIfNeeded() error { +func (hp *horizonPlanning) truncateColumnsIfNeeded() error { if !hp.needsTruncation { return nil } @@ -259,6 +259,8 @@ func (hp horizonPlanning) truncateColumnsIfNeeded() error { switch p := hp.plan.(type) { case *route: p.eroute.SetTruncateColumnCount(hp.sel.GetColumnCount()) + case *joinGen4: + // since this is a join, we can safely add extra columns and not need to truncate them case *orderedAggregate: p.eaggr.SetTruncateColumnCount(hp.sel.GetColumnCount()) case *memorySort: diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index bfcb3f044ca..544c0bfaa5e 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -1927,3 +1927,146 @@ Gen4 plan same as above ] } } + +# Grouping on join +"select user.a from user join user_extra group by user.a" +"unsupported: cross-shard query with aggregates" +{ + "QueryType": "SELECT", + "Original": "select user.a from user join user_extra group by user.a", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "(0|1)", + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "-1,-2", + "TableName": "`user`_user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.a, weight_string(`user`.a) from `user` where 1 != 1", + "OrderBy": "(0|1) ASC", + "Query": "select `user`.a, weight_string(`user`.a) from `user` order by `user`.a asc", + "Table": "`user`" + }, + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select 1 from user_extra where 1 != 1", + "Query": "select 1 from user_extra", + "Table": "user_extra" + } + ] + } + ] + } +} + +# Aggregate on join +"select user.a, count(*) from user join user_extra group by user.a" +"unsupported: cross-shard query with aggregates" +{ + "QueryType": "SELECT", + "Original": "select user.a, count(*) from user join user_extra group by user.a", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(1)", + "GroupBy": "(0|2)", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "-1,-2,-3", + "TableName": "`user`_user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.a, count(*), weight_string(`user`.a) from `user` where 1 != 1", + "OrderBy": "(0|2) ASC", + "Query": "select `user`.a, count(*), weight_string(`user`.a) from `user` order by `user`.a asc", + "Table": "`user`" + }, + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select 1 from user_extra where 1 != 1", + "Query": "select 1 from user_extra", + "Table": "user_extra" + } + ] + } + ] + } +} + +# Aggregate on other table in join +"select user.a, count(user_extra.a) from user join user_extra group by user.a" +"unsupported: cross-shard query with aggregates" +{ + "QueryType": "SELECT", + "Original": "select user.a, count(user_extra.a) from user join user_extra group by user.a", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(1)", + "GroupBy": "(0|2)", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "-1,1,-2", + "TableName": "`user`_user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.a, weight_string(`user`.a) from `user` where 1 != 1", + "OrderBy": "(0|1) ASC", + "Query": "select `user`.a, weight_string(`user`.a) from `user` order by `user`.a asc", + "Table": "`user`" + }, + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(user_extra.a) from user_extra where 1 != 1", + "Query": "select count(user_extra.a) from user_extra", + "Table": "user_extra" + } + ] + } + ] + } +} diff --git a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt index decec3a19ff..2586b87cd0e 100644 --- a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt @@ -130,10 +130,6 @@ Gen4 error: In aggregated query without GROUP BY, expression of SELECT list cont "unsupported: cross-shard query with aggregates" Gen4 error: unsupported: in scatter query: complex aggregate expression -# Aggregate detection (group by) -"select user.a from user join user_extra group by user.a" -"unsupported: cross-shard query with aggregates" - # group by and ',' joins "select user.id from user, user_extra group by id" "unsupported: cross-shard query with aggregates" From e4c82d38cd769b82e70635cdac3e772f44e30fbc Mon Sep 17 00:00:00 2001 From: Harshit Gangal Date: Thu, 22 Jul 2021 18:24:19 +0530 Subject: [PATCH 9/9] scatter query with aggregation to check order by columns are in group by Signed-off-by: Harshit Gangal --- .../planbuilder/abstract/queryprojection.go | 41 +++++++++++++------ go/vt/vtgate/planbuilder/horizon_planning.go | 6 +-- .../testdata/postprocess_cases.txt | 28 +------------ go/vt/vtgate/semantics/analyzer.go | 2 +- 4 files changed, 35 insertions(+), 42 deletions(-) diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index 35348bf3a0f..412f4fad69e 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -94,18 +94,6 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { qp.GroupByExprs = append(qp.GroupByExprs, GroupBy{Inner: expr, WeightStrExpr: weightStrExpr}) } - if qp.HasAggr || len(qp.GroupByExprs) > 0 { - expr := qp.getNonAggrExprNotMatchingGroupByExprs() - // if we have aggregation functions, non aggregating columns and GROUP BY, - // the non-aggregating expressions must all be listed in the GROUP BY list - if expr != nil { - if len(qp.GroupByExprs) > 0 { - return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongFieldWithGroup, "Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column '%s' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by", sqlparser.String(expr)) - } - return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.MixOfGroupFuncAndFields, "In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column '%s'; this is incompatible with sql_mode=only_full_group_by", sqlparser.String(expr)) - } - } - canPushDownSorting := true for _, order := range sel.OrderBy { expr, weightStrExpr, err := qp.getSimplifiedExpr(order.Expr, "order clause") @@ -121,10 +109,24 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { }) canPushDownSorting = canPushDownSorting && !sqlparser.ContainsAggregation(weightStrExpr) } + + if qp.HasAggr || len(qp.GroupByExprs) > 0 { + expr := qp.getNonAggrExprNotMatchingGroupByExprs() + // if we have aggregation functions, non aggregating columns and GROUP BY, + // the non-aggregating expressions must all be listed in the GROUP BY list + if expr != nil { + if len(qp.GroupByExprs) > 0 { + return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongFieldWithGroup, "Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column '%s' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by", sqlparser.String(expr)) + } + return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.MixOfGroupFuncAndFields, "In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column '%s'; this is incompatible with sql_mode=only_full_group_by", sqlparser.String(expr)) + } + } + if qp.Distinct && !qp.HasAggr { qp.GroupByExprs = nil } qp.CanPushDownSorting = canPushDownSorting + return qp, nil } @@ -159,6 +161,21 @@ func (qp *QueryProjection) getNonAggrExprNotMatchingGroupByExprs() sqlparser.Exp return expr.Col.Expr } } + for _, order := range qp.OrderExprs { + if sqlparser.IsAggregation(order.WeightStrExpr) { + continue + } + isGroupByOk := false + for _, groupByExpr := range qp.GroupByExprs { + if sqlparser.EqualsExpr(groupByExpr.WeightStrExpr, order.WeightStrExpr) { + isGroupByOk = true + break + } + } + if !isGroupByOk { + return order.Inner.Expr + } + } return nil } diff --git a/go/vt/vtgate/planbuilder/horizon_planning.go b/go/vt/vtgate/planbuilder/horizon_planning.go index ffbf40ea4b7..1b0e5dddf3a 100644 --- a/go/vt/vtgate/planbuilder/horizon_planning.go +++ b/go/vt/vtgate/planbuilder/horizon_planning.go @@ -473,11 +473,11 @@ func (hp *horizonPlanning) planDistinct() error { // and then we might also add a distinct operator on top if it is needed p.Select.MakeDistinct() if !p.isSingleShard() && !selectHasUniqueVindex(hp.vschema, hp.semTable, hp.qp.SelectExprs) { - return hp.pushDistinct() + return hp.addDistinct() } return nil case *joinGen4: - return hp.pushDistinct() + return hp.addDistinct() case *orderedAggregate: return hp.planDistinctOA(p) default: @@ -522,7 +522,7 @@ func (hp *horizonPlanning) planDistinctOA(currPlan *orderedAggregate) error { return nil } -func (hp *horizonPlanning) pushDistinct() error { +func (hp *horizonPlanning) addDistinct() error { eaggr := &engine.OrderedAggregate{} oa := &orderedAggregate{ resultsBuilder: resultsBuilder{ diff --git a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt index 48b1187d6ce..3f081a470f3 100644 --- a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt @@ -1002,7 +1002,7 @@ Gen4 plan same as above # Order by, invalid column number "select col from user order by 18446744073709551616" "error parsing column number: 18446744073709551616" -Gen4 error: strconv.Atoi: parsing "18446744073709551616": value out of range +Gen4 plan same as above # Order by, out of range column number "select col from user order by 2" @@ -1713,31 +1713,7 @@ Gen4 error: In aggregated query without GROUP BY, expression of SELECT list cont # Scatter order by and aggregation: order by column must reference column from select list "select col, count(*) from user group by col order by c1" "unsupported: memory sort: order by must reference a column in the select list: c1 asc" -{ - "QueryType": "SELECT", - "Original": "select col, count(*) from user group by col order by c1", - "Instructions": { - "OperatorType": "Aggregate", - "Variant": "Ordered", - "Aggregates": "count(1)", - "GroupBy": "(0|2)", - "ResultColumns": 2, - "Inputs": [ - { - "OperatorType": "Route", - "Variant": "SelectScatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select col, count(*), weight_string(col), c1, weight_string(c1) from `user` where 1 != 1 group by col", - "OrderBy": "(3|4) ASC, (0|2) ASC", - "Query": "select col, count(*), weight_string(col), c1, weight_string(c1) from `user` group by col order by c1 asc, col asc", - "Table": "`user`" - } - ] - } -} +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'c1' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # scatter aggregate with ambiguous aliases "select distinct a, b as a from user" diff --git a/go/vt/vtgate/semantics/analyzer.go b/go/vt/vtgate/semantics/analyzer.go index 86e1d553b05..2defc115955 100644 --- a/go/vt/vtgate/semantics/analyzer.go +++ b/go/vt/vtgate/semantics/analyzer.go @@ -198,7 +198,7 @@ func (a *analyzer) analyzeOrderByGroupByExprForLiteral(input sqlparser.Expr, cal currScope := a.currentScope() num, err := strconv.Atoi(l.Val) if err != nil { - a.err = err + a.err = vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "error parsing column number: %s", l.Val) return } if num < 1 || num > len(currScope.selectExprs) {