diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index e182d0c33f16c..b97ec34b53824 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -1102,6 +1102,8 @@ def __hash__(self): "python/pyspark/pandas", ], python_test_goals=[ + # unittests dedicated for Spark Connect + "pyspark.pandas.tests.connect.test_connect_plotting", # pandas-on-Spark unittests "pyspark.pandas.tests.connect.test_parity_categorical", "pyspark.pandas.tests.connect.test_parity_config", diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py index 5bd2a67ed39bb..819ac02a51266 100644 --- a/python/pyspark/pandas/plot/core.py +++ b/python/pyspark/pandas/plot/core.py @@ -23,6 +23,7 @@ from pandas.core.dtypes.inference import is_integer from pyspark.sql import functions as F +from pyspark.sql.utils import is_remote from pyspark.pandas.missing import unsupported_function from pyspark.pandas.config import get_option from pyspark.pandas.utils import name_like_string @@ -571,10 +572,14 @@ def _get_plot_backend(backend=None): return module def __call__(self, kind="line", backend=None, **kwargs): + kind = {"density": "kde"}.get(kind, kind) + + if is_remote() and kind in ["hist", "kde"]: + return unsupported_function(class_name="pd.DataFrame", method_name=kind)() + plot_backend = PandasOnSparkPlotAccessor._get_plot_backend(backend) plot_data = self.data - kind = {"density": "kde"}.get(kind, kind) if hasattr(plot_backend, "plot_pandas_on_spark"): # use if there's pandas-on-Spark specific method. return plot_backend.plot_pandas_on_spark(plot_data, kind=kind, **kwargs) @@ -948,6 +953,9 @@ def hist(self, bins=10, **kwds): >>> df = ps.from_pandas(df) >>> df.plot.hist(bins=12, alpha=0.5) # doctest: +SKIP """ + if is_remote(): + return unsupported_function(class_name="pd.DataFrame", method_name="hist")() + return self(kind="hist", bins=bins, **kwds) def kde(self, bw_method=None, ind=None, **kwargs): @@ -1023,6 +1031,9 @@ def kde(self, bw_method=None, ind=None, **kwargs): ... }) >>> df.plot.kde(ind=[1, 2, 3, 4, 5, 6], bw_method=0.3) # doctest: +SKIP """ + if is_remote(): + return unsupported_function(class_name="pd.DataFrame", method_name="kde")() + return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) density = kde diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py index f093f48b16e9c..abb18d473bf8d 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py @@ -24,6 +24,10 @@ class SeriesPlotMatplotlibParityTests( SeriesPlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") + def test_empty_hist(self): + super().test_empty_hist() + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist(self): super().test_hist() diff --git a/python/pyspark/pandas/tests/connect/test_connect_plotting.py b/python/pyspark/pandas/tests/connect/test_connect_plotting.py new file mode 100644 index 0000000000000..9b7cfebfcd552 --- /dev/null +++ b/python/pyspark/pandas/tests/connect/test_connect_plotting.py @@ -0,0 +1,124 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +import pandas as pd + +from pyspark import pandas as ps +from pyspark.pandas.exceptions import PandasNotImplementedError +from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils + + +class ConnectPlottingTests(PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase): + @property + def pdf1(self): + return pd.DataFrame( + [[1, 2], [4, 5], [7, 8]], + index=["cobra", "viper", None], + columns=["max_speed", "shield"], + ) + + @property + def psdf1(self): + return ps.from_pandas(self.pdf1) + + def test_unsupported_functions(self): + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.hist() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.hist(bins=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.kde() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.kde(bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.density() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.density(bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.hist() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.hist(bins=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.kde() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.kde(bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.density() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.density(bw_method=3) + + def test_unsupported_kinds(self): + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="hist") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="hist", bins=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="kde") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="kde", bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="density") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="density", bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="hist") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="hist", bins=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="kde") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="kde", bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="density") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="density", bw_method=3) + + +if __name__ == "__main__": + from pyspark.pandas.tests.connect.test_connect_plotting import * # noqa: F401 + + try: + import xmlrunner # type: ignore[import] + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2)