Adjusted comment

itholic · Aug 2, 2023 · 16b8755 · 16b8755
1 parent 76dc06a
commit 16b8755
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 3 deletions.
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
@@ -1626,9 +1626,10 @@ def factorize(
         ----------
         sort : bool, default True
         use_na_sentinel : bool, default True
-            If True, the sentinel -1 will be used for NaN values. If False,
-            NaN values will be encoded as non-negative integers and will not drop the
-            NaN from the uniques of the values.
+            If True, the sentinel -1 will be used for NaN values, effectively assigning them
+            a distinct category. If False, NaN values will be encoded as non-negative integers,
+            treating them as unique categories in the encoding process and retaining them in the
+            set of unique categories in the data.
 
         Returns
         -------

diff --git a/python/pyspark/pandas/tests/connect/series/test_parity_compute.py b/python/pyspark/pandas/tests/connect/series/test_parity_compute.py
@@ -28,6 +28,10 @@ class SeriesParityComputeTests(SeriesComputeMixin, PandasOnSparkTestUtils, Reuse
     def test_unstack(self):
         super().test_unstack()
 
+    @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.")
+    def test_factorize(self):
+        super().test_factorize()
+
 
 if __name__ == "__main__":
     from pyspark.pandas.tests.connect.series.test_parity_compute import *  # noqa: F401