From 1d4b9991505aa28226bd9e808c8242372775a0af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Taylor?= Date: Mon, 15 Jul 2024 15:30:56 +0200 Subject: [PATCH] fix issue with aggregation inside of derived tables (#16366) Signed-off-by: Andres Taylor --- .../queries/aggregation/aggregation_test.go | 2 + .../planbuilder/operators/SQL_builder.go | 7 ++ .../operators/horizon_expanding.go | 28 ++++- .../planbuilder/operators/queryprojection.go | 5 +- .../planbuilder/testdata/aggr_cases.json | 101 +++++++++++++++++- .../planbuilder/testdata/cte_cases.json | 2 +- 6 files changed, 137 insertions(+), 8 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go index 7a08dffda19..83840a78516 100644 --- a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go +++ b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go @@ -432,12 +432,14 @@ func TestOrderByCount(t *testing.T) { defer closer() mcmp.Exec("insert into t9(id1, id2, id3) values(1, '1', '1'), (2, '2', '2'), (3, '2', '2'), (4, '3', '3'), (5, '3', '3'), (6, '3', '3')") + mcmp.Exec("insert into t1(t1_id, `name`, `value`, shardkey) values(1,'a1','foo',100), (2,'b1','foo',200), (3,'c1','foo',300), (4,'a1','foo',100), (5,'b1','bar',200)") mcmp.Exec("SELECT t9.id2 FROM t9 GROUP BY t9.id2 ORDER BY COUNT(t9.id2) DESC") version, err := cluster.GetMajorVersion("vtgate") require.NoError(t, err) if version == 19 { mcmp.Exec("select COUNT(*) from (select 1 as one FROM t9 WHERE id3 = 3 ORDER BY id1 DESC LIMIT 3 OFFSET 0) subquery_for_count") + mcmp.Exec("select t.id1, t1.name, t.leCount from (select id1, count(*) as leCount from t9 group by 1 order by 2 desc limit 20) t join t1 on t.id1 = t1.t1_id") } } diff --git a/go/vt/vtgate/planbuilder/operators/SQL_builder.go b/go/vt/vtgate/planbuilder/operators/SQL_builder.go index 6339803af31..c58d6d2b002 100644 --- a/go/vt/vtgate/planbuilder/operators/SQL_builder.go +++ b/go/vt/vtgate/planbuilder/operators/SQL_builder.go @@ -465,6 +465,13 @@ func buildAggregation(op *Aggregator, qb *queryBuilder) { qb.addGroupBy(weightStringFor(simplified)) } } + if op.DT != nil { + sel := qb.asSelectStatement() + qb.stmt = nil + qb.addTableExpr(op.DT.Alias, op.DT.Alias, TableID(op), &sqlparser.DerivedTable{ + Select: sel, + }, nil, op.DT.Columns) + } } func buildOrdering(op *Ordering, qb *queryBuilder) { diff --git a/go/vt/vtgate/planbuilder/operators/horizon_expanding.go b/go/vt/vtgate/planbuilder/operators/horizon_expanding.go index 0a95cd5e5e7..c004739ee0e 100644 --- a/go/vt/vtgate/planbuilder/operators/horizon_expanding.go +++ b/go/vt/vtgate/planbuilder/operators/horizon_expanding.go @@ -81,7 +81,7 @@ func expandSelectHorizon(ctx *plancontext.PlanningContext, horizon *Horizon, sel // if we are dealing with a derived table, we need to make sure that the ordering columns // are available outside the derived table for _, order := range horizon.Query.GetOrderBy() { - qp.addColumn(ctx, order.Expr) + qp.addDerivedColumn(ctx, order.Expr) } } op := createProjectionFromSelect(ctx, horizon) @@ -106,7 +106,7 @@ func expandSelectHorizon(ctx *plancontext.PlanningContext, horizon *Horizon, sel } if len(qp.OrderExprs) > 0 { - op = expandOrderBy(ctx, op, qp) + op = expandOrderBy(ctx, op, qp, horizon.Alias) extracted = append(extracted, "Ordering") } @@ -121,7 +121,7 @@ func expandSelectHorizon(ctx *plancontext.PlanningContext, horizon *Horizon, sel return op, Rewrote(fmt.Sprintf("expand SELECT horizon into (%s)", strings.Join(extracted, ", "))) } -func expandOrderBy(ctx *plancontext.PlanningContext, op Operator, qp *QueryProjection) Operator { +func expandOrderBy(ctx *plancontext.PlanningContext, op Operator, qp *QueryProjection, derived string) Operator { var newOrder []OrderBy sqc := &SubQueryBuilder{} proj, ok := op.(*Projection) @@ -131,6 +131,9 @@ func expandOrderBy(ctx *plancontext.PlanningContext, op Operator, qp *QueryProje newExpr, subqs := sqc.pullOutValueSubqueries(ctx, expr.SimplifiedExpr, TableID(op), false) if newExpr == nil { // If no subqueries are found, retain the original order expression + if derived != "" { + expr = exposeOrderingColumn(ctx, qp, expr, derived) + } newOrder = append(newOrder, expr) continue } @@ -164,6 +167,25 @@ func expandOrderBy(ctx *plancontext.PlanningContext, op Operator, qp *QueryProje } } +// exposeOrderingColumn will expose the ordering column to the outer query +func exposeOrderingColumn(ctx *plancontext.PlanningContext, qp *QueryProjection, orderBy OrderBy, derived string) OrderBy { + for _, se := range qp.SelectExprs { + aliasedExpr, err := se.GetAliasedExpr() + if err != nil { + panic(vterrors.VT13001("unexpected expression in select")) + } + if ctx.SemTable.EqualsExprWithDeps(aliasedExpr.Expr, orderBy.SimplifiedExpr) { + newExpr := sqlparser.NewColNameWithQualifier(aliasedExpr.ColumnName(), sqlparser.NewTableName(derived)) + ctx.SemTable.CopySemanticInfo(orderBy.SimplifiedExpr, newExpr) + orderBy.SimplifiedExpr = newExpr + orderBy.Inner = &sqlparser.Order{Expr: newExpr, Direction: orderBy.Inner.Direction} + break + } + } + + return orderBy +} + func createProjectionFromSelect(ctx *plancontext.PlanningContext, horizon *Horizon) Operator { qp := horizon.getQP(ctx) diff --git a/go/vt/vtgate/planbuilder/operators/queryprojection.go b/go/vt/vtgate/planbuilder/operators/queryprojection.go index 3bf22364cbd..e2d9adb315d 100644 --- a/go/vt/vtgate/planbuilder/operators/queryprojection.go +++ b/go/vt/vtgate/planbuilder/operators/queryprojection.go @@ -743,8 +743,9 @@ func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningCont return true } -// addColumn adds a column to the QueryProjection if it is not already present -func (qp *QueryProjection) addColumn(ctx *plancontext.PlanningContext, expr sqlparser.Expr) { +// addColumn adds a column to the QueryProjection if it is not already present. +// It will use a column name that is available on the outside of the derived table +func (qp *QueryProjection) addDerivedColumn(ctx *plancontext.PlanningContext, expr sqlparser.Expr) { for _, selectExpr := range qp.SelectExprs { getExpr, err := selectExpr.GetExpr() if err != nil { diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index 961f81769a8..06906278a00 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -716,6 +716,58 @@ ] } }, + { + "comment": "Aggregation with derived table", + "query": "select u.id, u.name, t.num_segments from (select id, count(*) as num_segments from user group by 1 order by 2 desc limit 20) t join unsharded u on u.id = t.id", + "plan": { + "QueryType": "SELECT", + "Original": "select u.id, u.name, t.num_segments from (select id, count(*) as num_segments from user group by 1 order by 2 desc limit 20) t join unsharded u on u.id = t.id", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "R:0,R:1,L:0", + "JoinVars": { + "t_id": 1 + }, + "TableName": "`user`_unsharded", + "Inputs": [ + { + "OperatorType": "Limit", + "Count": "20", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select t.num_segments, t.id from (select id, count(*) as num_segments from `user` where 1 != 1 group by id) as t where 1 != 1", + "OrderBy": "0 DESC", + "Query": "select t.num_segments, t.id from (select id, count(*) as num_segments from `user` group by id) as t order by t.num_segments desc limit :__upper_limit", + "Table": "`user`" + } + ] + }, + { + "OperatorType": "Route", + "Variant": "Unsharded", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select u.id, u.`name` from unsharded as u where 1 != 1", + "Query": "select u.id, u.`name` from unsharded as u where u.id = :t_id", + "Table": "unsharded" + } + ] + }, + "TablesUsed": [ + "main.unsharded", + "user.user" + ] + } + }, { "comment": "scatter aggregate multiple group by (numbers)", "query": "select a, b, count(*) from user group by 2, 1", @@ -3535,7 +3587,7 @@ }, "FieldQuery": "select x.id, x.val1, 1, weight_string(x.val1) from (select id, val1 from `user` where 1 != 1) as x where 1 != 1", "OrderBy": "(1|3) ASC", - "Query": "select x.id, x.val1, 1, weight_string(x.val1) from (select id, val1 from `user` where val2 < 4) as x order by `user`.val1 asc limit :__upper_limit", + "Query": "select x.id, x.val1, 1, weight_string(x.val1) from (select id, val1 from `user` where val2 < 4) as x order by x.val1 asc limit :__upper_limit", "Table": "`user`" } ] @@ -6727,7 +6779,7 @@ }, "FieldQuery": "select subquery_for_count.one, subquery_for_count.id, 1, weight_string(subquery_for_count.id) from (select 1 as one, id from `user` where 1 != 1) as subquery_for_count where 1 != 1", "OrderBy": "(1|3) DESC", - "Query": "select subquery_for_count.one, subquery_for_count.id, 1, weight_string(subquery_for_count.id) from (select 1 as one, id from `user` where `user`.is_not_deleted = true) as subquery_for_count order by id desc limit :__upper_limit", + "Query": "select subquery_for_count.one, subquery_for_count.id, 1, weight_string(subquery_for_count.id) from (select 1 as one, id from `user` where `user`.is_not_deleted = true) as subquery_for_count order by subquery_for_count.id desc limit :__upper_limit", "Table": "`user`" } ] @@ -7003,5 +7055,50 @@ "comment": "baz in the HAVING clause can't be accessed because of the GROUP BY", "query": "select foo, count(bar) as x from user group by foo having baz > avg(baz) order by x", "plan": "Unknown column 'baz' in 'having clause'" + }, + { + "comment": "Aggregation over a ORDER BY/LIMIT inside a derived table", + "query": "SELECT COUNT(*) FROM (SELECT 1 AS one FROM `user` WHERE `user`.`is_not_deleted` = true ORDER BY id DESC LIMIT 25 OFFSET 0) subquery_for_count", + "plan": { + "QueryType": "SELECT", + "Original": "SELECT COUNT(*) FROM (SELECT 1 AS one FROM `user` WHERE `user`.`is_not_deleted` = true ORDER BY id DESC LIMIT 25 OFFSET 0) subquery_for_count", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Scalar", + "Aggregates": "count_star(0) AS count(*)", + "Inputs": [ + { + "OperatorType": "SimpleProjection", + "Columns": [ + 2 + ], + "Inputs": [ + { + "OperatorType": "Limit", + "Count": "25", + "Offset": "0", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select subquery_for_count.one, subquery_for_count.id, 1, weight_string(subquery_for_count.id) from (select 1 as one, id from `user` where 1 != 1) as subquery_for_count where 1 != 1", + "OrderBy": "(1|3) DESC", + "Query": "select subquery_for_count.one, subquery_for_count.id, 1, weight_string(subquery_for_count.id) from (select 1 as one, id from `user` where `user`.is_not_deleted = true) as subquery_for_count order by subquery_for_count.id desc limit :__upper_limit", + "Table": "`user`" + } + ] + } + ] + } + ] + }, + "TablesUsed": [ + "user.user" + ] + } } ] diff --git a/go/vt/vtgate/planbuilder/testdata/cte_cases.json b/go/vt/vtgate/planbuilder/testdata/cte_cases.json index 0d7d9020ac2..51b130d25cc 100644 --- a/go/vt/vtgate/planbuilder/testdata/cte_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/cte_cases.json @@ -348,7 +348,7 @@ }, "FieldQuery": "select x.id, x.val1, 1, weight_string(x.val1) from (select id, val1 from `user` where 1 != 1) as x where 1 != 1", "OrderBy": "(1|3) ASC", - "Query": "select x.id, x.val1, 1, weight_string(x.val1) from (select id, val1 from `user` where val2 < 4) as x order by `user`.val1 asc limit :__upper_limit", + "Query": "select x.id, x.val1, 1, weight_string(x.val1) from (select id, val1 from `user` where val2 < 4) as x order by x.val1 asc limit :__upper_limit", "Table": "`user`" } ]