apache · zhengruifeng · Jun 7, 2024 · Jun 7, 2024 · Jun 7, 2024 · Jun 7, 2024
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -1102,6 +1102,8 @@ def __hash__(self):
         "python/pyspark/pandas",
     ],
     python_test_goals=[
+        # unittests dedicated for Spark Connect
+        "pyspark.pandas.tests.connect.test_connect_plotting",
         # pandas-on-Spark unittests
         "pyspark.pandas.tests.connect.test_parity_categorical",
         "pyspark.pandas.tests.connect.test_parity_config",

diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py
@@ -23,6 +23,7 @@
 from pandas.core.dtypes.inference import is_integer
 
 from pyspark.sql import functions as F
+from pyspark.sql.utils import is_remote
 from pyspark.pandas.missing import unsupported_function
 from pyspark.pandas.config import get_option
 from pyspark.pandas.utils import name_like_string
@@ -571,10 +572,14 @@ def _get_plot_backend(backend=None):
         return module
 
     def __call__(self, kind="line", backend=None, **kwargs):
+        kind = {"density": "kde"}.get(kind, kind)
+
+        if is_remote() and kind in ["hist", "kde"]:
+            return unsupported_function(class_name="pd.DataFrame", method_name=kind)()
+
         plot_backend = PandasOnSparkPlotAccessor._get_plot_backend(backend)
         plot_data = self.data
 
-        kind = {"density": "kde"}.get(kind, kind)
         if hasattr(plot_backend, "plot_pandas_on_spark"):
             # use if there's pandas-on-Spark specific method.
             return plot_backend.plot_pandas_on_spark(plot_data, kind=kind, **kwargs)
@@ -948,6 +953,9 @@ def hist(self, bins=10, **kwds):
             >>> df = ps.from_pandas(df)
             >>> df.plot.hist(bins=12, alpha=0.5)  # doctest: +SKIP
         """
+        if is_remote():
+            return unsupported_function(class_name="pd.DataFrame", method_name="hist")()
+
         return self(kind="hist", bins=bins, **kwds)
 
     def kde(self, bw_method=None, ind=None, **kwargs):
@@ -1023,6 +1031,9 @@ def kde(self, bw_method=None, ind=None, **kwargs):
             ... })
             >>> df.plot.kde(ind=[1, 2, 3, 4, 5, 6], bw_method=0.3)  # doctest: +SKIP
         """
+        if is_remote():
+            return unsupported_function(class_name="pd.DataFrame", method_name="kde")()
+
         return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
 
     density = kde

diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py
@@ -24,6 +24,10 @@
 class SeriesPlotMatplotlibParityTests(
     SeriesPlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
 ):
+    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
+    def test_empty_hist(self):
+        super().test_empty_hist()
+
     @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
     def test_hist(self):
         super().test_hist()

diff --git a/python/pyspark/pandas/tests/connect/test_connect_plotting.py b/python/pyspark/pandas/tests/connect/test_connect_plotting.py
@@ -0,0 +1,124 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.exceptions import PandasNotImplementedError
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+
+
+class ConnectPlottingTests(PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase):
+    @property
+    def pdf1(self):
+        return pd.DataFrame(
+            [[1, 2], [4, 5], [7, 8]],
+            index=["cobra", "viper", None],
+            columns=["max_speed", "shield"],
+        )
+
+    @property
+    def psdf1(self):
+        return ps.from_pandas(self.pdf1)
+
+    def test_unsupported_functions(self):
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.hist()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.hist(bins=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.kde()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.kde(bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.density()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.density(bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.hist()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.hist(bins=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.kde()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.kde(bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.density()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.density(bw_method=3)
+
+    def test_unsupported_kinds(self):
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="hist")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="hist", bins=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="kde")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="kde", bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="density")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="density", bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="hist")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="hist", bins=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="kde")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="kde", bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="density")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="density", bw_method=3)
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.test_connect_plotting import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)