-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core #29085
Changes from 40 commits
dfcec3c
e53744b
a693722
5bfa669
ec754e2
a2b12a1
c3dc66b
cb19b7b
ce8a0a5
d37ef86
fce25ff
f3e05c6
5c049b5
04684a8
6811721
fc96e1f
ed901af
a6f1e7d
e367c05
e74d04c
4ef4d76
22d223c
72b2155
a3628ac
e16c136
858f4e5
cfecc90
43d0f24
9e18fa8
9537d9b
5227441
670f21b
ce8184a
4615733
08d97c8
33923b6
f5ec656
7916d72
a769aa7
d93f7fa
be80c27
7f3cff8
03d3409
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._ | |
import org.apache.spark.sql.catalyst.plans._ | ||
import org.apache.spark.sql.catalyst.plans.logical._ | ||
import org.apache.spark.sql.internal.SQLConf | ||
import org.apache.spark.sql.types.IntegerType | ||
import org.apache.spark.sql.types.{IntegerType, LongType, StringType} | ||
|
||
/** | ||
* Parser test cases for rules defined in [[CatalystSqlParser]] / [[AstBuilder]]. | ||
|
@@ -1031,4 +1031,96 @@ class PlanParserSuite extends AnalysisTest { | |
assertEqual("select a, b from db.c;;;", table("db", "c").select('a, 'b)) | ||
assertEqual("select a, b from db.c; ;; ;", table("db", "c").select('a, 'b)) | ||
} | ||
|
||
test("SPARK-32106: TRANSFORM without serde") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, could you check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Add UT |
||
// verify schema less | ||
assertEqual( | ||
""" | ||
|SELECT TRANSFORM(a, b, c) | ||
|USING 'cat' | ||
|FROM testData | ||
""".stripMargin, | ||
ScriptTransformation( | ||
Seq('a, 'b, 'c), | ||
"cat", | ||
Seq(AttributeReference("key", StringType)(), | ||
AttributeReference("value", StringType)()), | ||
UnresolvedRelation(TableIdentifier("testData")), | ||
ScriptInputOutputSchema(List.empty, List.empty, None, None, | ||
List.empty, List.empty, None, None, true)) | ||
) | ||
|
||
// verify without output schema | ||
assertEqual( | ||
""" | ||
|SELECT TRANSFORM(a, b, c) | ||
|USING 'cat' AS (a, b, c) | ||
|FROM testData | ||
""".stripMargin, | ||
ScriptTransformation( | ||
Seq('a, 'b, 'c), | ||
"cat", | ||
Seq(AttributeReference("a", StringType)(), | ||
AttributeReference("b", StringType)(), | ||
AttributeReference("c", StringType)()), | ||
UnresolvedRelation(TableIdentifier("testData")), | ||
ScriptInputOutputSchema(List.empty, List.empty, None, None, | ||
List.empty, List.empty, None, None, false))) | ||
|
||
// verify with output schema | ||
assertEqual( | ||
""" | ||
|SELECT TRANSFORM(a, b, c) | ||
|USING 'cat' AS (a int, b string, c long) | ||
|FROM testData | ||
""".stripMargin, | ||
ScriptTransformation( | ||
Seq('a, 'b, 'c), | ||
"cat", | ||
Seq(AttributeReference("a", IntegerType)(), | ||
AttributeReference("b", StringType)(), | ||
AttributeReference("c", LongType)()), | ||
UnresolvedRelation(TableIdentifier("testData")), | ||
ScriptInputOutputSchema(List.empty, List.empty, None, None, | ||
List.empty, List.empty, None, None, false))) | ||
|
||
// verify with ROW FORMAT DELIMETED | ||
assertEqual( | ||
""" | ||
|SELECT TRANSFORM(a, b, c) | ||
|ROW FORMAT DELIMITED | ||
|FIELDS TERMINATED BY '\t' | ||
|COLLECTION ITEMS TERMINATED BY '\u0002' | ||
|MAP KEYS TERMINATED BY '\u0003' | ||
|LINES TERMINATED BY '\n' | ||
|NULL DEFINED AS 'null' | ||
|USING 'cat' AS (a, b, c) | ||
|ROW FORMAT DELIMITED | ||
|FIELDS TERMINATED BY '\t' | ||
|COLLECTION ITEMS TERMINATED BY '\u0004' | ||
|MAP KEYS TERMINATED BY '\u0005' | ||
|LINES TERMINATED BY '\n' | ||
|NULL DEFINED AS 'NULL' | ||
|FROM testData | ||
""".stripMargin, | ||
ScriptTransformation( | ||
Seq('a, 'b, 'c), | ||
"cat", | ||
Seq(AttributeReference("a", StringType)(), | ||
AttributeReference("b", StringType)(), | ||
AttributeReference("c", StringType)()), | ||
UnresolvedRelation(TableIdentifier("testData")), | ||
ScriptInputOutputSchema( | ||
Seq(("TOK_TABLEROWFORMATFIELD", "'\\t'"), | ||
("TOK_TABLEROWFORMATCOLLITEMS", "'\u0002'"), | ||
("TOK_TABLEROWFORMATMAPKEYS", "'\u0003'"), | ||
("TOK_TABLEROWFORMATLINES", "'\\n'"), | ||
("TOK_TABLEROWFORMATNULL", "'null'")), | ||
Seq(("TOK_TABLEROWFORMATFIELD", "'\\t'"), | ||
("TOK_TABLEROWFORMATCOLLITEMS", "'\u0004'"), | ||
("TOK_TABLEROWFORMATMAPKEYS", "'\u0005'"), | ||
("TOK_TABLEROWFORMATLINES", "'\\n'"), | ||
("TOK_TABLEROWFORMATNULL", "'NULL'")), None, None, | ||
List.empty, List.empty, None, None, false))) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Format
->ScriptIOFormat
? Then, could you make the comment above clearer?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done