From cd1d687ac0e6740a504bc15673d827ae9f1cd1f1 Mon Sep 17 00:00:00 2001
From: allisonwang-db <allison.wang@databricks.com>
Date: Mon, 8 Jul 2024 18:31:11 +0800
Subject: [PATCH] [SPARK-48825][DOCS] Unify the 'See Also' section formatting
 across PySpark docstrings

### What changes were proposed in this pull request?

This PR unifies the 'See Also' section formatting across PySpark docstrings and fixes some invalid references.

### Why are the changes needed?

To improve PySpark documentation

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

doctest

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47240 from allisonwang-db/spark-48825-also-see-docs.

Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
---
 python/pyspark/sql/dataframe.py         | 17 +++++++++--------
 python/pyspark/sql/functions/builtin.py | 24 ++++++++++++------------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8d16604879bfb..d31d8fa85ea14 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1887,7 +1887,7 @@ def distinct(self) -> "DataFrame":
 
         See Also
         --------
-        DataFrame.dropDuplicates
+        DataFrame.dropDuplicates : Remove duplicate rows from this DataFrame.
 
         Examples
         --------
@@ -2951,7 +2951,7 @@ def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
 
         See Also
         --------
-        DataFrame.summary
+        DataFrame.summary : Computes summary statistics for numeric and string columns.
         """
         ...
 
@@ -3022,7 +3022,7 @@ def summary(self, *statistics: str) -> "DataFrame":
 
         See Also
         --------
-        DataFrame.display
+        DataFrame.describe : Computes basic statistics for numeric and string columns.
         """
         ...
 
@@ -3790,7 +3790,7 @@ def groupingSets(
         self, groupingSets: Sequence[Sequence["ColumnOrName"]], *cols: "ColumnOrName"
     ) -> "GroupedData":
         """
-        Create multi-dimensional aggregation for the current `class`:DataFrame using the specified
+        Create multi-dimensional aggregation for the current :class:`DataFrame` using the specified
         grouping sets, so we can run aggregation on them.
 
         .. versionadded:: 4.0.0
@@ -3873,7 +3873,7 @@ def groupingSets(
 
         See Also
         --------
-        GroupedData
+        DataFrame.rollup : Compute hierarchical summaries at multiple levels.
         """
         ...
 
@@ -5420,7 +5420,7 @@ def withColumnRenamed(self, existing: str, new: str) -> "DataFrame":
 
         See Also
         --------
-        :meth:`withColumnsRenamed`
+        DataFrame.withColumnsRenamed
 
         Examples
         --------
@@ -5480,7 +5480,7 @@ def withColumnsRenamed(self, colsMap: Dict[str, str]) -> "DataFrame":
 
         See Also
         --------
-        :meth:`withColumnRenamed`
+        DataFrame.withColumnRenamed
 
         Examples
         --------
@@ -6183,6 +6183,7 @@ def mapInPandas(
         See Also
         --------
         pyspark.sql.functions.pandas_udf
+        DataFrame.mapInArrow
         """
         ...
 
@@ -6259,7 +6260,7 @@ def mapInArrow(
         See Also
         --------
         pyspark.sql.functions.pandas_udf
-        pyspark.sql.DataFrame.mapInPandas
+        DataFrame.mapInPandas
         """
         ...
 
diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
index a2f4523a3f248..1508b042b61a6 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -14040,8 +14040,8 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
 
     See Also
     --------
-    :meth:`get`
-    :meth:`try_element_at`
+    :meth:`pyspark.sql.functions.get`
+    :meth:`pyspark.sql.functions.try_element_at`
 
     Examples
     --------
@@ -14131,8 +14131,8 @@ def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column:
 
     See Also
     --------
-    :meth:`get`
-    :meth:`element_at`
+    :meth:`pyspark.sql.functions.get`
+    :meth:`pyspark.sql.functions.element_at`
 
     Examples
     --------
@@ -14233,7 +14233,7 @@ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column:
 
     See Also
     --------
-    :meth:`element_at`
+    :meth:`pyspark.sql.functions.element_at`
 
     Examples
     --------
@@ -15153,9 +15153,9 @@ def explode(col: "ColumnOrName") -> Column:
 
     See Also
     --------
-    :meth:`pyspark.functions.posexplode`
-    :meth:`pyspark.functions.explode_outer`
-    :meth:`pyspark.functions.posexplode_outer`
+    :meth:`pyspark.sql.functions.posexplode`
+    :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.posexplode_outer`
 
     Notes
     -----
@@ -15342,8 +15342,8 @@ def inline(col: "ColumnOrName") -> Column:
 
     See Also
     --------
-    :meth:`pyspark.functions.explode`
-    :meth:`pyspark.functions.inline_outer`
+    :meth:`pyspark.sql.functions.explode`
+    :meth:`pyspark.sql.functions.inline_outer`
 
     Examples
     --------
@@ -15570,8 +15570,8 @@ def inline_outer(col: "ColumnOrName") -> Column:
 
     See Also
     --------
-    :meth:`explode_outer`
-    :meth:`inline`
+    :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.inline`
 
     Notes
     -----