From 4c8e273934ae53f440b91945456906c1596ba390 Mon Sep 17 00:00:00 2001 From: Jiaxiang Date: Sat, 28 Dec 2019 00:16:49 +0800 Subject: [PATCH] Bug groupby quantile listlike q and int columns (#30485) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/groupby/groupby.py | 25 ++++++++++++----------- pandas/tests/groupby/test_function.py | 29 +++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index f52ae726e9eba..597b49f53a250 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -849,6 +849,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) - Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) +- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 529d123d256e8..227547daf3668 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1937,21 +1937,22 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] # but this hits https://github.com/pandas-dev/pandas/issues/10710 # which doesn't reorder the list-like `q` on the inner level. - order = np.roll(list(range(result.index.nlevels)), -1) - result = result.reorder_levels(order) - result = result.reindex(q, level=-1) + order = list(range(1, result.index.nlevels)) + [0] + + # temporarily saves the index names + index_names = np.array(result.index.names) - # fix order. - hi = len(q) * self.ngroups - arr = np.arange(0, hi, self.ngroups) - arrays = [] + # set index names to positions to avoid confusion + result.index.names = np.arange(len(index_names)) + + # place quantiles on the inside + result = result.reorder_levels(order) - for i in range(self.ngroups): - arr2 = arr + i - arrays.append(arr2) + # restore the index names in order + result.index.names = index_names[order] - indices = np.concatenate(arrays) - assert len(indices) == len(result) + # reorder rows to keep things sorted + indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten() return result.take(indices) @Substitution(name="groupby") diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 4ca23c61ba920..c41c9b4db053a 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1398,6 +1398,35 @@ def test_quantile_array_multiple_levels(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("frame_size", [(2, 3), (100, 10)]) +@pytest.mark.parametrize("groupby", [[0], [0, 1]]) +@pytest.mark.parametrize("q", [[0.5, 0.6]]) +def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, q): + # GH30289 + nrow, ncol = frame_size + df = pd.DataFrame( + np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol) + ) + + idx_levels = [list(range(min(nrow, 4)))] * len(groupby) + [q] + idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [ + list(range(len(q))) * min(nrow, 4) + ] + expected_index = pd.MultiIndex( + levels=idx_levels, codes=idx_codes, names=groupby + [None] + ) + expected_values = [ + [float(x)] * (ncol - len(groupby)) for x in range(min(nrow, 4)) for _ in q + ] + expected_columns = [x for x in range(ncol) if x not in groupby] + expected = pd.DataFrame( + expected_values, index=expected_index, columns=expected_columns + ) + result = df.groupby(groupby).quantile(q) + + tm.assert_frame_equal(result, expected) + + def test_quantile_raises(): df = pd.DataFrame( [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]