From 118f242e60cae053777719d557023d57c87f967e Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 13 Apr 2021 17:35:27 +0800 Subject: [PATCH 1/3] [SPARK-28227][SQL][FOLLOWUP] Add test case about transform with distict --- .../resources/sql-tests/inputs/transform.sql | 9 +++++++-- .../sql-tests/results/transform.sql.out | 17 +++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql index e89404c5f845f..6e7b2eb4e1aa3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/transform.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql @@ -311,7 +311,7 @@ SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false; SET spark.sql.parser.quotedRegexColumnNames=true; SELECT TRANSFORM(`(a|b)?+.+`) - USING 'cat' AS (c) + USING 'cat' AS (c) FROM script_trans; SET spark.sql.parser.quotedRegexColumnNames=false; @@ -320,4 +320,9 @@ SET spark.sql.parser.quotedRegexColumnNames=false; WITH temp AS ( SELECT TRANSFORM(a) USING 'cat' AS (b string) FROM t ) -SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b +SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b; + +SELECT TRANSFORM(distinct b, a, c) + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4; diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out index 1fa165be1a87a..c6a1b614be3f6 100644 --- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 42 +-- Number of queries: 43 -- !query @@ -610,7 +610,7 @@ spark.sql.parser.quotedRegexColumnNames true -- !query SELECT TRANSFORM(`(a|b)?+.+`) - USING 'cat' AS (c) + USING 'cat' AS (c) FROM script_trans -- !query schema struct @@ -627,6 +627,7 @@ struct -- !query output spark.sql.parser.quotedRegexColumnNames false + -- !query WITH temp AS ( SELECT TRANSFORM(a) USING 'cat' AS (b string) FROM t @@ -638,3 +639,15 @@ struct 1 2 3 + + +-- !query +SELECT TRANSFORM(distinct b, a, c) + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'distinct' given input columns: [script_trans.a, script_trans.b, script_trans.c]; line 1 pos 17 From d04c63825c77e6fea6d7cebb9da6778447b6517b Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 14 Apr 2021 16:56:46 +0800 Subject: [PATCH 2/3] follow comment --- .../spark/sql/catalyst/parser/SqlBase.g4 | 6 ++-- .../sql/catalyst/parser/AstBuilder.scala | 3 ++ .../spark/sql/errors/QueryParsingErrors.scala | 5 +++ .../resources/sql-tests/inputs/transform.sql | 7 +++- .../sql-tests/results/transform.sql.out | 36 ++++++++++++++++--- 5 files changed, 49 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 2c8aaa404d540..928549a46679d 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -524,9 +524,9 @@ querySpecification ; transformClause - : (SELECT kind=TRANSFORM '(' namedExpressionSeq ')' - | kind=MAP namedExpressionSeq - | kind=REDUCE namedExpressionSeq) + : (SELECT kind=TRANSFORM '(' setQuantifier? namedExpressionSeq ')' + | kind=MAP setQuantifier? namedExpressionSeq + | kind=REDUCE setQuantifier? namedExpressionSeq) inRowFormat=rowFormat? (RECORDWRITER recordWriter=STRING)? USING script=STRING diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f1455d67edfcb..c7af21cabfe8f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -660,6 +660,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg havingClause: HavingClauseContext, windowClause: WindowClauseContext, relation: LogicalPlan): LogicalPlan = withOrigin(ctx) { + if (transformClause.setQuantifier != null) { + throw QueryParsingErrors.transformNotSupportQuantifierError(transformClause.setQuantifier) + } // Create the attributes. val (attributes, schemaLess) = if (transformClause.colTypeList != null) { // Typed return columns. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 56d9aaa32c32d..922e10086fab1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -88,6 +88,11 @@ object QueryParsingErrors { new ParseException("DISTRIBUTE BY is not supported", ctx) } + def transformNotSupportQuantifierError(ctx: ParserRuleContext): Throwable = { + new ParseException("TRANSFORM not support quantifier DISTINCT/ALL " + + "for input expression seq", ctx) + } + def transformWithSerdeUnsupportedError(ctx: ParserRuleContext): Throwable = { new ParseException("TRANSFORM with serde is only supported in hive mode", ctx) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql index 6e7b2eb4e1aa3..8ee0ddc0a0ba1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/transform.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql @@ -322,7 +322,12 @@ WITH temp AS ( ) SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b; -SELECT TRANSFORM(distinct b, a, c) +SELECT TRANSFORM(DISTINCT b, a, c) + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4; + +SELECT TRANSFORM(ALL b, a, c) USING 'cat' AS (a, b, c) FROM script_trans WHERE a <= 4; diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out index c6a1b614be3f6..aba988a0be360 100644 --- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 43 +-- Number of queries: 44 -- !query @@ -642,12 +642,40 @@ struct -- !query -SELECT TRANSFORM(distinct b, a, c) +SELECT TRANSFORM(DISTINCT b, a, c) USING 'cat' AS (a, b, c) FROM script_trans WHERE a <= 4 -- !query schema struct<> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve 'distinct' given input columns: [script_trans.a, script_trans.b, script_trans.c]; line 1 pos 17 +org.apache.spark.sql.catalyst.parser.ParseException + +TRANSFORM not support quantifier DISTINCT/ALL for input expression seq(line 1, pos 17) + +== SQL == +SELECT TRANSFORM(DISTINCT b, a, c) +-----------------^^^ + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4 + + +-- !query +SELECT TRANSFORM(ALL b, a, c) + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +TRANSFORM not support quantifier DISTINCT/ALL for input expression seq(line 1, pos 17) + +== SQL == +SELECT TRANSFORM(ALL b, a, c) +-----------------^^^ + USING 'cat' AS (a, b, c) +FROM script_trans +WHERE a <= 4 From 542aa461394a2cd1b6fa44d51d0c7def5b2a2beb Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 14 Apr 2021 17:06:54 +0800 Subject: [PATCH 3/3] follow comment --- .../org/apache/spark/sql/errors/QueryParsingErrors.scala | 3 +-- .../src/test/resources/sql-tests/results/transform.sql.out | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 922e10086fab1..c51db7497850b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -89,8 +89,7 @@ object QueryParsingErrors { } def transformNotSupportQuantifierError(ctx: ParserRuleContext): Throwable = { - new ParseException("TRANSFORM not support quantifier DISTINCT/ALL " + - "for input expression seq", ctx) + new ParseException("TRANSFORM does not support DISTINCT/ALL in inputs", ctx) } def transformWithSerdeUnsupportedError(ctx: ParserRuleContext): Throwable = { diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out index aba988a0be360..fd879865fd5eb 100644 --- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out @@ -651,7 +651,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -TRANSFORM not support quantifier DISTINCT/ALL for input expression seq(line 1, pos 17) +TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17) == SQL == SELECT TRANSFORM(DISTINCT b, a, c) @@ -671,7 +671,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -TRANSFORM not support quantifier DISTINCT/ALL for input expression seq(line 1, pos 17) +TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17) == SQL == SELECT TRANSFORM(ALL b, a, c)