Skip to content

Commit

Permalink
[SPARK-48307][SQL][FOLLOWUP] Allow outer references in un-referenced …
Browse files Browse the repository at this point in the history
…CTE relations

### What changes were proposed in this pull request?

This is a followup of #46617 .  Subquery expression has a bunch of correlation checks which need to match certain plan shapes. We broke this by leaving `WithCTE` in the plan for un-referenced CTE relations. This PR fixes the issue by skipping CTE plan nodes in correlated subquery expression checks.

### Why are the changes needed?

bug fix
### Does this PR introduce _any_ user-facing change?

no bug is not released yet.

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #46869 from cloud-fan/check.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
cloud-fan committed Jun 5, 2024
1 parent 490a4b3 commit d5c33c6
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,13 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
aggregated,
canContainOuter && SQLConf.get.getConf(SQLConf.DECORRELATE_OFFSET_ENABLED))

// We always inline CTE relations before analysis check, and only un-referenced CTE
// relations will be kept in the plan. Here we should simply skip them and check the
// children, as un-referenced CTE relations won't be executed anyway and doesn't need to
// be restricted by the current subquery correlation limitations.
case _: WithCTE | _: CTERelationDef =>
plan.children.foreach(p => checkPlan(p, aggregated, canContainOuter))

// Category 4: Any other operators not in the above 3 categories
// cannot be on a correlation path, that is they are allowed only
// under a correlation point but they and their descendant operators
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,10 @@ case class WithCTE(plan: LogicalPlan, cteDefs: Seq[CTERelationDef]) extends Logi
def withNewPlan(newPlan: LogicalPlan): WithCTE = {
withNewChildren(children.init :+ newPlan).asInstanceOf[WithCTE]
}

override def maxRows: Option[Long] = plan.maxRows

override def maxRowsPerPartition: Option[Long] = plan.maxRowsPerPartition
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,30 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
+- OneRowRelation


-- !query
SELECT (
WITH unreferenced AS (SELECT id)
SELECT 1
) FROM range(1)
-- !query analysis
Project [scalar-subquery#x [] AS scalarsubquery()#x]
: +- Project [1 AS 1#x]
: +- OneRowRelation
+- Range (0, 1, step=1)


-- !query
SELECT (
WITH unreferenced AS (SELECT 1)
SELECT id
) FROM range(1)
-- !query analysis
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL]
: +- Project [outer(id#xL)]
: +- OneRowRelation
+- Range (0, 1, step=1)


-- !query
SELECT * FROM
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,40 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
+- OneRowRelation


-- !query
SELECT (
WITH unreferenced AS (SELECT id)
SELECT 1
) FROM range(1)
-- !query analysis
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#x]
: +- WithCTE
: :- CTERelationDef xxxx, false
: : +- SubqueryAlias unreferenced
: : +- Project [outer(id#xL)]
: : +- OneRowRelation
: +- Project [1 AS 1#x]
: +- OneRowRelation
+- Range (0, 1, step=1)


-- !query
SELECT (
WITH unreferenced AS (SELECT 1)
SELECT id
) FROM range(1)
-- !query analysis
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL]
: +- WithCTE
: :- CTERelationDef xxxx, false
: : +- SubqueryAlias unreferenced
: : +- Project [1 AS 1#x]
: : +- OneRowRelation
: +- Project [outer(id#xL)]
: +- OneRowRelation
+- Range (0, 1, step=1)


-- !query
SELECT * FROM
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,40 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
+- OneRowRelation


-- !query
SELECT (
WITH unreferenced AS (SELECT id)
SELECT 1
) FROM range(1)
-- !query analysis
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#x]
: +- WithCTE
: :- CTERelationDef xxxx, false
: : +- SubqueryAlias unreferenced
: : +- Project [outer(id#xL)]
: : +- OneRowRelation
: +- Project [1 AS 1#x]
: +- OneRowRelation
+- Range (0, 1, step=1)


-- !query
SELECT (
WITH unreferenced AS (SELECT 1)
SELECT id
) FROM range(1)
-- !query analysis
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL]
: +- WithCTE
: :- CTERelationDef xxxx, false
: : +- SubqueryAlias unreferenced
: : +- Project [1 AS 1#x]
: : +- OneRowRelation
: +- Project [outer(id#xL)]
: +- OneRowRelation
+- Range (0, 1, step=1)


-- !query
SELECT * FROM
(
Expand Down
12 changes: 12 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ SELECT (
SELECT * FROM t
);

-- un-referenced CTE in subquery expression: outer reference in CTE relation
SELECT (
WITH unreferenced AS (SELECT id)
SELECT 1
) FROM range(1);

-- un-referenced CTE in subquery expression: outer reference in CTE main query
SELECT (
WITH unreferenced AS (SELECT 1)
SELECT id
) FROM range(1);

-- Make sure CTE in subquery is scoped to that subquery rather than global
-- the 2nd half of the union should fail because the cte is scoped to the first half
SELECT * FROM
Expand Down
22 changes: 22 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,28 @@ struct<scalarsubquery():int>
1


-- !query
SELECT (
WITH unreferenced AS (SELECT id)
SELECT 1
) FROM range(1)
-- !query schema
struct<scalarsubquery():int>
-- !query output
1


-- !query
SELECT (
WITH unreferenced AS (SELECT 1)
SELECT id
) FROM range(1)
-- !query schema
struct<scalarsubquery(id):bigint>
-- !query output
0


-- !query
SELECT * FROM
(
Expand Down
22 changes: 22 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,28 @@ struct<scalarsubquery():int>
1


-- !query
SELECT (
WITH unreferenced AS (SELECT id)
SELECT 1
) FROM range(1)
-- !query schema
struct<scalarsubquery(id):int>
-- !query output
1


-- !query
SELECT (
WITH unreferenced AS (SELECT 1)
SELECT id
) FROM range(1)
-- !query schema
struct<scalarsubquery(id):bigint>
-- !query output
0


-- !query
SELECT * FROM
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,28 @@ struct<scalarsubquery():int>
1


-- !query
SELECT (
WITH unreferenced AS (SELECT id)
SELECT 1
) FROM range(1)
-- !query schema
struct<scalarsubquery(id):int>
-- !query output
1


-- !query
SELECT (
WITH unreferenced AS (SELECT 1)
SELECT id
) FROM range(1)
-- !query schema
struct<scalarsubquery(id):bigint>
-- !query output
0


-- !query
SELECT * FROM
(
Expand Down

0 comments on commit d5c33c6

Please sign in to comment.