diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9023cf2ab1b4f..6ef43142fa386 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -848,6 +848,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) - Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) +- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 529d123d256e8..d8b5a9ed67959 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1937,21 +1937,18 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] # but this hits https://github.com/pandas-dev/pandas/issues/10710 # which doesn't reorder the list-like `q` on the inner level. - order = np.roll(list(range(result.index.nlevels)), -1) + order = np.append(np.arange(1, result.index.nlevels), 0) + # temporarily saves the index names + index_names = np.array(result.index.names) + # set index names to positions to avoid confusion + result.index.names = np.arange(len(index_names)) + # place quantiles on the inside result = result.reorder_levels(order) - result = result.reindex(q, level=-1) + # restore the index names in order + result.index.names = index_names[order] - # fix order. - hi = len(q) * self.ngroups - arr = np.arange(0, hi, self.ngroups) - arrays = [] - - for i in range(self.ngroups): - arr2 = arr + i - arrays.append(arr2) - - indices = np.concatenate(arrays) - assert len(indices) == len(result) + # reorder rows to keep things sorted + indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten() return result.take(indices) @Substitution(name="groupby") diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 4ca23c61ba920..a2423374a4e8f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1398,6 +1398,22 @@ def test_quantile_array_multiple_levels(): tm.assert_frame_equal(result, expected) +def test_groupby_quantile_with_arraylike_q_and_int_columns(): + # GH30289 + df = pd.DataFrame(np.array([2 * [_ % 4] for _ in range(10)]), columns=[0, 1]) + + quantiles = [0.5, 0.6] + expected_index = pd.MultiIndex.from_product( + [[0, 1, 2, 3], [0.5, 0.6]], names=[0, None] + ) + + expected_values = [float(x) for x in [0, 0, 1, 1, 2, 2, 3, 3]] + expected = pd.DataFrame(expected_values, index=expected_index, columns=[1]) + result = df.groupby(0).quantile(quantiles) + + tm.assert_frame_equal(result, expected) + + def test_quantile_raises(): df = pd.DataFrame( [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]