diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 2c8aaa404d540..928549a46679d 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -524,9 +524,9 @@ querySpecification ; transformClause - : (SELECT kind=TRANSFORM '(' namedExpressionSeq ')' - | kind=MAP namedExpressionSeq - | kind=REDUCE namedExpressionSeq) + : (SELECT kind=TRANSFORM '(' setQuantifier? namedExpressionSeq ')' + | kind=MAP setQuantifier? namedExpressionSeq + | kind=REDUCE setQuantifier? namedExpressionSeq) inRowFormat=rowFormat? (RECORDWRITER recordWriter=STRING)? USING script=STRING diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f1455d67edfcb..c7af21cabfe8f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -660,6 +660,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg havingClause: HavingClauseContext, windowClause: WindowClauseContext, relation: LogicalPlan): LogicalPlan = withOrigin(ctx) { + if (transformClause.setQuantifier != null) { + throw QueryParsingErrors.transformNotSupportQuantifierError(transformClause.setQuantifier) + } // Create the attributes. val (attributes, schemaLess) = if (transformClause.colTypeList != null) { // Typed return columns. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 56d9aaa32c32d..c51db7497850b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -88,6 +88,10 @@ object QueryParsingErrors { new ParseException("DISTRIBUTE BY is not supported", ctx) } + def transformNotSupportQuantifierError(ctx: ParserRuleContext): Throwable = { + new ParseException("TRANSFORM does not support DISTINCT/ALL in inputs", ctx) + } + def transformWithSerdeUnsupportedError(ctx: ParserRuleContext): Throwable = { new ParseException("TRANSFORM with serde is only supported in hive mode", ctx) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql index e89404c5f845f..8ee0ddc0a0ba1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/transform.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql @@ -311,7 +311,7 @@ SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false; SET spark.sql.parser.quotedRegexColumnNames=true; SELECT TRANSFORM(`(a|b)?+.+`) - USING 'cat' AS (c) + USING 'cat' AS (c) FROM script_trans; SET spark.sql.parser.quotedRegexColumnNames=false; @@ -320,4 +320,14 @@ SET spark.sql.parser.quotedRegexColumnNames=false; WITH temp AS ( SELECT TRANSFORM(a) USING 'cat' AS (b string) FROM t ) -SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b +SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b; + +SELECT TRANSFORM(DISTINCT b, a, c) + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4; + +SELECT TRANSFORM(ALL b, a, c) + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4; diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out index 1fa165be1a87a..fd879865fd5eb 100644 --- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 42 +-- Number of queries: 44 -- !query @@ -610,7 +610,7 @@ spark.sql.parser.quotedRegexColumnNames true -- !query SELECT TRANSFORM(`(a|b)?+.+`) - USING 'cat' AS (c) + USING 'cat' AS (c) FROM script_trans -- !query schema struct @@ -627,6 +627,7 @@ struct -- !query output spark.sql.parser.quotedRegexColumnNames false + -- !query WITH temp AS ( SELECT TRANSFORM(a) USING 'cat' AS (b string) FROM t @@ -638,3 +639,43 @@ struct 1 2 3 + + +-- !query +SELECT TRANSFORM(DISTINCT b, a, c) + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17) + +== SQL == +SELECT TRANSFORM(DISTINCT b, a, c) +-----------------^^^ + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4 + + +-- !query +SELECT TRANSFORM(ALL b, a, c) + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17) + +== SQL == +SELECT TRANSFORM(ALL b, a, c) +-----------------^^^ + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4