diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 33e9bd0c2732a..3ce2bb0978385 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -507,6 +507,8 @@ Plotting - Bug in :meth:`DataFrame.plot` was rotating xticklabels when ``subplots=True``, even if the x-axis wasn't an irregular time series (:issue:`29460`) - Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes causes a ``ValueError`` (:issue:`21003`) - Twinned axes were losing their tick labels which should only happen to all but the last row or column of 'externally' shared axes (:issue:`33819`) +- Bug in :meth:`DataFrameGroupBy.boxplot` when ``subplots=False``, a KeyError would raise (:issue:`16748`) + Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 8ceba22b1f7a4..3d0e30f8b9234 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -9,6 +9,7 @@ from pandas.core.dtypes.missing import remove_na_arraylike import pandas as pd +import pandas.core.common as com from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib.core import LinePlot, MPLPlot @@ -443,6 +444,15 @@ def boxplot_frame_groupby( df = frames[0].join(frames[1::]) else: df = frames[0] + + # GH 16748, DataFrameGroupby fails when subplots=False and `column` argument + # is assigned, and in this case, since `df` here becomes MI after groupby, + # so we need to couple the keys (grouped values) and column (original df + # column) together to search for subset to plot + if column is not None: + column = com.convert_to_list_like(column) + multi_key = pd.MultiIndex.from_product([keys, column]) + column = list(multi_key.values) ret = df.boxplot( column=column, fontsize=fontsize, diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index dc2e9e1e8d15f..9e1a8d473b9d6 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -454,3 +454,76 @@ def test_fontsize(self): self._check_ticks_props( df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16 ) + + @pytest.mark.parametrize( + "col, expected_xticklabel", + [ + ("v", ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), + (["v"], ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), + ("v1", ["(a, v1)", "(b, v1)", "(c, v1)", "(d, v1)", "(e, v1)"]), + ( + ["v", "v1"], + [ + "(a, v)", + "(a, v1)", + "(b, v)", + "(b, v1)", + "(c, v)", + "(c, v1)", + "(d, v)", + "(d, v1)", + "(e, v)", + "(e, v1)", + ], + ), + ( + None, + [ + "(a, v)", + "(a, v1)", + "(b, v)", + "(b, v1)", + "(c, v)", + "(c, v1)", + "(d, v)", + "(d, v1)", + "(e, v)", + "(e, v1)", + ], + ), + ], + ) + def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): + # GH 16748 + df = DataFrame( + { + "cat": np.random.choice(list("abcde"), 100), + "v": np.random.rand(100), + "v1": np.random.rand(100), + } + ) + grouped = df.groupby("cat") + + axes = _check_plot_works( + grouped.boxplot, subplots=False, column=col, return_type="axes" + ) + + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel + + def test_boxplot_multiindex_column(self): + # GH 16748 + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + + col = [("bar", "one"), ("bar", "two")] + axes = _check_plot_works(df.boxplot, column=col, return_type="axes") + + expected_xticklabel = ["(bar, one)", "(bar, two)"] + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel