From cb298e8021965a83d40bc43c9c010361e62e6dbd Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Thu, 14 Sep 2023 19:20:05 +0800 Subject: [PATCH] [SPARK-45119][PYTHON][DOCS] Refine docstring of inline ### What changes were proposed in this pull request? This PR improves the docstring of the function `inline` by adding more examples. ### Why are the changes needed? To improve PySpark documentation. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? doctest ### Was this patch authored or co-authored using generative AI tooling? No Closes #42875 from allisonwang-db/spark-45119-refine-inline. Authored-by: allisonwang-db Signed-off-by: Ruifeng Zheng --- python/pyspark/sql/functions.py | 105 +++++++++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 8 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 2d4194c98e9ce..31936241619e6 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -12471,37 +12471,126 @@ def inline(col: "ColumnOrName") -> Column: """ Explodes an array of structs into a table. + This function takes an input column containing an array of structs and returns a + new column where each struct in the array is exploded into a separate row. + .. versionadded:: 3.4.0 Parameters ---------- col : :class:`~pyspark.sql.Column` or str - input column of values to explode. + Input column of values to explode. Returns ------- :class:`~pyspark.sql.Column` - generator expression with the inline exploded result. + Generator expression with the inline exploded result. See Also -------- - :meth:`explode` - - Notes - ----- - Supports Spark Connect. + :meth:`pyspark.functions.explode` + :meth:`pyspark.functions.inline_outer` Examples -------- + Example 1: Using inline with a single struct array column + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql import Row + >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])]) + >>> df.select(sf.inline(df.structlist)).show() + +---+---+ + | a| b| + +---+---+ + | 1| 2| + | 3| 4| + +---+---+ + + Example 2: Using inline with a column name + + >>> import pyspark.sql.functions as sf >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])]) - >>> df.select(inline(df.structlist)).show() + >>> df.select(sf.inline("structlist")).show() +---+---+ | a| b| +---+---+ | 1| 2| | 3| 4| +---+---+ + + Example 3: Using inline with an alias + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql import Row + >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])]) + >>> df.select(sf.inline("structlist").alias("c1", "c2")).show() + +---+---+ + | c1| c2| + +---+---+ + | 1| 2| + | 3| 4| + +---+---+ + + Example 4: Using inline with multiple struct array columns + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql import Row + >>> df = spark.createDataFrame([ + ... Row(structlist1=[Row(a=1, b=2), Row(a=3, b=4)], + ... structlist2=[Row(c=5, d=6), Row(c=7, d=8)]) + ... ]) + >>> df.select(sf.inline("structlist1"), "structlist2") \\ + ... .select("a", "b", sf.inline("structlist2")).show() + +---+---+---+---+ + | a| b| c| d| + +---+---+---+---+ + | 1| 2| 5| 6| + | 1| 2| 7| 8| + | 3| 4| 5| 6| + | 3| 4| 7| 8| + +---+---+---+---+ + + Example 5: Using inline with a nested struct array column + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql import Row + >>> df = spark.createDataFrame([ + ... Row(structlist=Row(a=1, b=2, nested=[Row(c=3, d=4), Row(c=5, d=6)])) + ... ]) + >>> df.select(sf.inline("structlist.nested")).show() + +---+---+ + | c| d| + +---+---+ + | 3| 4| + | 5| 6| + +---+---+ + + Example 6: Using inline with an empty struct array column + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql import Row + >>> df = spark.createDataFrame( + ... [Row(structlist=[])], "structlist: array>") + >>> df.select(sf.inline(df.structlist)).show() + +---+---+ + | a| b| + +---+---+ + +---+---+ + + Example 7: Using inline with a struct array column containing null values + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql import Row + >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), None, Row(a=3, b=4)])]) + >>> df.select(sf.inline(df.structlist)).show() + +----+----+ + | a| b| + +----+----+ + | 1| 2| + |NULL|NULL| + | 3| 4| + +----+----+ """ return _invoke_function_over_columns("inline", col)