Skip to content

Commit

Permalink
[SPARK-44834][PYTHON][SQL][TESTS][FOLLOW-UP] Update the analyzer resu…
Browse files Browse the repository at this point in the history
…lts of the udtf tests

### What changes were proposed in this pull request?

This is a follow up for #42517.
We need to re-generate the analyzer results for udtf tests after #42519 is merged. Also updated PythonUDTFSuite after #42520 is merged.

### Why are the changes needed?

To fix test failures

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Test only change

Closes #42543 from allisonwang-db/spark-44834-fix.

Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
(cherry picked from commit bb41cd8)
Signed-off-by: Yuming Wang <yumwang@ebay.com>
  • Loading branch information
allisonwang-db authored and wangyum committed Aug 18, 2023
1 parent 7786d0b commit 41e7234
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,84 +10,49 @@ CreateViewCommand `t1`, VALUES (0, 1), (1, 2) t(c1, c2), false, true, LocalTempV
-- !query
SELECT * FROM udtf(1, 2)
-- !query analysis
Project [x#x, y#x]
+- Generate TestUDTF(1, 2)#x, false, [x#x, y#x]
+- OneRowRelation
[Analyzer test output redacted due to nondeterminism]


-- !query
SELECT * FROM udtf(-1, 0)
-- !query analysis
Project [x#x, y#x]
+- Generate TestUDTF(-1, 0)#x, false, [x#x, y#x]
+- OneRowRelation
[Analyzer test output redacted due to nondeterminism]


-- !query
SELECT * FROM udtf(0, -1)
-- !query analysis
Project [x#x, y#x]
+- Generate TestUDTF(0, -1)#x, false, [x#x, y#x]
+- OneRowRelation
[Analyzer test output redacted due to nondeterminism]


-- !query
SELECT * FROM udtf(0, 0)
-- !query analysis
Project [x#x, y#x]
+- Generate TestUDTF(0, 0)#x, false, [x#x, y#x]
+- OneRowRelation
[Analyzer test output redacted due to nondeterminism]


-- !query
SELECT a, b FROM udtf(1, 2) t(a, b)
-- !query analysis
Project [a#x, b#x]
+- SubqueryAlias t
+- Project [x#x AS a#x, y#x AS b#x]
+- Generate TestUDTF(1, 2)#x, false, [x#x, y#x]
+- OneRowRelation
[Analyzer test output redacted due to nondeterminism]


-- !query
SELECT * FROM t1, LATERAL udtf(c1, c2)
-- !query analysis
Project [c1#x, c2#x, x#x, y#x]
+- LateralJoin lateral-subquery#x [c1#x && c2#x], Inner
: +- Generate TestUDTF(outer(c1#x), outer(c2#x))#x, false, [x#x, y#x]
: +- OneRowRelation
+- SubqueryAlias t1
+- View (`t1`, [c1#x,c2#x])
+- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
+- SubqueryAlias t
+- LocalRelation [c1#x, c2#x]
[Analyzer test output redacted due to nondeterminism]


-- !query
SELECT * FROM t1 LEFT JOIN LATERAL udtf(c1, c2)
-- !query analysis
Project [c1#x, c2#x, x#x, y#x]
+- LateralJoin lateral-subquery#x [c1#x && c2#x], LeftOuter
: +- Generate TestUDTF(outer(c1#x), outer(c2#x))#x, false, [x#x, y#x]
: +- OneRowRelation
+- SubqueryAlias t1
+- View (`t1`, [c1#x,c2#x])
+- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
+- SubqueryAlias t
+- LocalRelation [c1#x, c2#x]
[Analyzer test output redacted due to nondeterminism]


-- !query
SELECT * FROM udtf(1, 2) t(c1, c2), LATERAL udtf(c1, c2)
-- !query analysis
Project [c1#x, c2#x, x#x, y#x]
+- LateralJoin lateral-subquery#x [c1#x && c2#x], Inner
: +- Generate TestUDTF(outer(c1#x), outer(c2#x))#x, false, [x#x, y#x]
: +- OneRowRelation
+- SubqueryAlias t
+- Project [x#x AS c1#x, y#x AS c2#x]
+- Generate TestUDTF(1, 2)#x, false, [x#x, y#x]
+- OneRowRelation
[Analyzer test output redacted due to nondeterminism]


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,28 +39,15 @@ class PythonUDTFSuite extends QueryTest with SharedSparkSession {
| yield a, b, b - a
|""".stripMargin

private val arrowPythonScript: String =
"""
|import pandas as pd
|class VectorizedUDTF:
| def eval(self, a: pd.Series, b: pd.Series):
| data = [
| [a, b, a + b],
| [a, b, a - b],
| [a, b, b - a],
| ]
| yield pd.DataFrame(data)
|""".stripMargin

private val returnType: StructType = StructType.fromDDL("a int, b int, c int")

private val pythonUDTF: UserDefinedPythonTableFunction =
createUserDefinedPythonTableFunction("SimpleUDTF", pythonScript, returnType)

private val arrowPythonUDTF: UserDefinedPythonTableFunction =
createUserDefinedPythonTableFunction(
"VectorizedUDTF",
arrowPythonScript,
"SimpleUDTF",
pythonScript,
returnType,
evalType = PythonEvalType.SQL_ARROW_TABLE_UDF)

Expand Down

0 comments on commit 41e7234

Please sign in to comment.