Skip to content

Commit

Permalink
[SPARK-35069][SQL] TRANSFORM forbidden DISTICNT and ALL, also mak…
Browse files Browse the repository at this point in the history
…e the error clear

### What changes were proposed in this pull request?
According to #29087 (comment),  add UT in `transform.sql`

It seems that distinct is not recognized as a reserved word here

```
-- !query
explain extended SELECT TRANSFORM(distinct b, a, c)
                   USING 'cat' AS (a, b, c)
                 FROM script_trans
                 WHERE a <= 4
-- !query schema
struct<plan:string>
-- !query output
== Parsed Logical Plan ==
'ScriptTransformation [*], cat, [a#x, b#x, c#x], ScriptInputOutputSchema(List(),List(),None,None,List(),List(),None,None,false)
+- 'Project ['distinct AS b#x, 'a, 'c]
   +- 'Filter ('a <= 4)
      +- 'UnresolvedRelation [script_trans], [], false

== Analyzed Logical Plan ==
org.apache.spark.sql.AnalysisException: cannot resolve 'distinct' given input columns: [script_trans.a, script_trans.b, script_trans.c]; line 1 pos 34;
'ScriptTransformation [*], cat, [a#x, b#x, c#x], ScriptInputOutputSchema(List(),List(),None,None,List(),List(),None,None,false)
+- 'Project ['distinct AS b#x, a#x, c#x]
   +- Filter (a#x <= 4)
      +- SubqueryAlias script_trans
         +- View (`script_trans`, [a#x,b#x,c#x])
            +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, cast(c#x as int) AS c#x]
               +- Project [a#x, b#x, c#x]
                  +- SubqueryAlias script_trans
                     +- LocalRelation [a#x, b#x, c#x]
```

Hive's error
![image](https://user-images.githubusercontent.com/46485123/114533170-355d8380-9c80-11eb-992f-982f0b296759.png)

### Why are the changes needed?

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added Ut

Closes #32149 from AngersZhuuuu/SPARK-28227-new-followup.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
AngersZhuuuu authored and cloud-fan committed Apr 14, 2021
1 parent a153efa commit 4ca9958
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -524,9 +524,9 @@ querySpecification
;

transformClause
: (SELECT kind=TRANSFORM '(' namedExpressionSeq ')'
| kind=MAP namedExpressionSeq
| kind=REDUCE namedExpressionSeq)
: (SELECT kind=TRANSFORM '(' setQuantifier? namedExpressionSeq ')'
| kind=MAP setQuantifier? namedExpressionSeq
| kind=REDUCE setQuantifier? namedExpressionSeq)
inRowFormat=rowFormat?
(RECORDWRITER recordWriter=STRING)?
USING script=STRING
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
havingClause: HavingClauseContext,
windowClause: WindowClauseContext,
relation: LogicalPlan): LogicalPlan = withOrigin(ctx) {
if (transformClause.setQuantifier != null) {
throw QueryParsingErrors.transformNotSupportQuantifierError(transformClause.setQuantifier)
}
// Create the attributes.
val (attributes, schemaLess) = if (transformClause.colTypeList != null) {
// Typed return columns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ object QueryParsingErrors {
new ParseException("DISTRIBUTE BY is not supported", ctx)
}

def transformNotSupportQuantifierError(ctx: ParserRuleContext): Throwable = {
new ParseException("TRANSFORM does not support DISTINCT/ALL in inputs", ctx)
}

def transformWithSerdeUnsupportedError(ctx: ParserRuleContext): Throwable = {
new ParseException("TRANSFORM with serde is only supported in hive mode", ctx)
}
Expand Down
14 changes: 12 additions & 2 deletions sql/core/src/test/resources/sql-tests/inputs/transform.sql
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false;
SET spark.sql.parser.quotedRegexColumnNames=true;

SELECT TRANSFORM(`(a|b)?+.+`)
USING 'cat' AS (c)
USING 'cat' AS (c)
FROM script_trans;

SET spark.sql.parser.quotedRegexColumnNames=false;
Expand All @@ -320,4 +320,14 @@ SET spark.sql.parser.quotedRegexColumnNames=false;
WITH temp AS (
SELECT TRANSFORM(a) USING 'cat' AS (b string) FROM t
)
SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b
SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b;

SELECT TRANSFORM(DISTINCT b, a, c)
USING 'cat' AS (a, b, c)
FROM script_trans
WHERE a <= 4;

SELECT TRANSFORM(ALL b, a, c)
USING 'cat' AS (a, b, c)
FROM script_trans
WHERE a <= 4;
45 changes: 43 additions & 2 deletions sql/core/src/test/resources/sql-tests/results/transform.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 42
-- Number of queries: 44


-- !query
Expand Down Expand Up @@ -610,7 +610,7 @@ spark.sql.parser.quotedRegexColumnNames true

-- !query
SELECT TRANSFORM(`(a|b)?+.+`)
USING 'cat' AS (c)
USING 'cat' AS (c)
FROM script_trans
-- !query schema
struct<c:string>
Expand All @@ -627,6 +627,7 @@ struct<key:string,value:string>
-- !query output
spark.sql.parser.quotedRegexColumnNames false


-- !query
WITH temp AS (
SELECT TRANSFORM(a) USING 'cat' AS (b string) FROM t
Expand All @@ -638,3 +639,43 @@ struct<b:string>
1
2
3


-- !query
SELECT TRANSFORM(DISTINCT b, a, c)
USING 'cat' AS (a, b, c)
FROM script_trans
WHERE a <= 4
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)

== SQL ==
SELECT TRANSFORM(DISTINCT b, a, c)
-----------------^^^
USING 'cat' AS (a, b, c)
FROM script_trans
WHERE a <= 4


-- !query
SELECT TRANSFORM(ALL b, a, c)
USING 'cat' AS (a, b, c)
FROM script_trans
WHERE a <= 4
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)

== SQL ==
SELECT TRANSFORM(ALL b, a, c)
-----------------^^^
USING 'cat' AS (a, b, c)
FROM script_trans
WHERE a <= 4

0 comments on commit 4ca9958

Please sign in to comment.