From bc54a3f0c2e08893702c3929bfe7a9d543a08cdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20S=C5=82apek?= <28485371+mslapek@users.noreply.github.com> Date: Tue, 28 Dec 2021 10:38:27 +0900 Subject: [PATCH] [SPARK-37730][PYTHON] Replace use of MPLPlot._add_legend_handle with MPLPlot._append_legend_handles_labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Replace use of MPLPlot._add_legend_handle (removed in pandas) with MPLPlot._append_legend_handles_labels in histogram and KDE plots. Based on: https://github.com/pandas-dev/pandas/commit/029907c9d69a0260401b78a016a6c4515d8f1c40 ### Why are the changes needed? Fix of SPARK-37730. plot.hist and plot.kde don't throw AttributeError for pandas=1.3.5. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? ~~Tested with existing plot test on CI (for older pandas only).~~ (it seems that CI doesn't run matplotlib tests, see https://github.com/apache/spark/pull/35000#issuecomment-1001267197) I've run tests on a local computer, see https://github.com/apache/spark/pull/35000#issuecomment-1001494019 : ``` $ python python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py ``` :question: **QUESTION:** Maybe add plot testing for pandas 1.3.5 on CI? (I've noticed that CI uses `pandas=1.3.4`, maybe update it to `1.3.5`?) Closes #35000 from mslapek/fixpythonplot. Authored-by: Michał Słapek <28485371+mslapek@users.noreply.github.com> Signed-off-by: Hyukjin Kwon (cherry picked from commit 371e307686debc4f7b44a37d2345a1a512f3fdcc) Signed-off-by: Dongjoon Hyun --- python/pyspark/pandas/plot/matplotlib.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/python/pyspark/pandas/plot/matplotlib.py b/python/pyspark/pandas/plot/matplotlib.py index 91387805c421f..030623605e513 100644 --- a/python/pyspark/pandas/plot/matplotlib.py +++ b/python/pyspark/pandas/plot/matplotlib.py @@ -392,6 +392,12 @@ def _make_plot(self): kwds = self.kwds.copy() label = pprint_thing(label if len(label) > 1 else label[0]) + # `if hasattr(...)` makes plotting compatible with pandas < 1.3, see pandas-dev/pandas#40078. + label = ( + self._mark_right_label(label, index=i) + if hasattr(self, "_mark_right_label") + else label + ) kwds["label"] = label style, kwds = self._apply_style_colors(colors, kwds, i, label) @@ -400,7 +406,10 @@ def _make_plot(self): kwds = self._make_plot_keywords(kwds, y) artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds) - self._add_legend_handle(artists[0], label, index=i) + # `if hasattr(...)` makes plotting compatible with pandas < 1.3, see pandas-dev/pandas#40078. + self._append_legend_handles_labels(artists[0], label) if hasattr( + self, "_append_legend_handles_labels" + ) else self._add_legend_handle(artists[0], label, index=i) @classmethod def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, stacking_id=None, **kwds): @@ -483,6 +492,12 @@ def _make_plot(self): kwds = self.kwds.copy() label = pprint_thing(label if len(label) > 1 else label[0]) + # `if hasattr(...)` makes plotting compatible with pandas < 1.3, see pandas-dev/pandas#40078. + label = ( + self._mark_right_label(label, index=i) + if hasattr(self, "_mark_right_label") + else label + ) kwds["label"] = label style, kwds = self._apply_style_colors(colors, kwds, i, label) @@ -491,7 +506,10 @@ def _make_plot(self): kwds = self._make_plot_keywords(kwds, y) artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds) - self._add_legend_handle(artists[0], label, index=i) + # `if hasattr(...)` makes plotting compatible with pandas < 1.3, see pandas-dev/pandas#40078. + self._append_legend_handles_labels(artists[0], label) if hasattr( + self, "_append_legend_handles_labels" + ) else self._add_legend_handle(artists[0], label, index=i) def _get_ind(self, y): return KdePlotBase.get_ind(y, self.ind)