CLN: groupby assorted (#41379)

pandas-dev · May 10, 2021 · 562235d · 562235d
1 parent 58accd7
commit 562235d
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 26 deletions.
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -89,7 +89,6 @@
     MultiIndex,
     all_indexes_same,
 )
-import pandas.core.indexes.base as ibase
 from pandas.core.series import Series
 from pandas.core.util.numba_ import maybe_use_numba
 
@@ -481,14 +480,13 @@ def _get_index() -> Index:
         if isinstance(values[0], dict):
             # GH #823 #24880
             index = _get_index()
-            result: FrameOrSeriesUnion = self._reindex_output(
-                self.obj._constructor_expanddim(values, index=index)
-            )
+            res_df = self.obj._constructor_expanddim(values, index=index)
+            res_df = self._reindex_output(res_df)
             # if self.observed is False,
             # keep all-NaN rows created while re-indexing
-            result = result.stack(dropna=self.observed)
-            result.name = self._selection_name
-            return result
+            res_ser = res_df.stack(dropna=self.observed)
+            res_ser.name = self._selection_name
+            return res_ser
         elif isinstance(values[0], (Series, DataFrame)):
             return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
         else:
@@ -1019,13 +1017,18 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
             # grouper specific aggregations
             if self.grouper.nkeys > 1:
+                # test_groupby_as_index_series_scalar gets here with 'not self.as_index'
                 return self._python_agg_general(func, *args, **kwargs)
             elif args or kwargs:
+                # test_pass_args_kwargs gets here (with and without as_index)
+                # can't return early
                 result = self._aggregate_frame(func, *args, **kwargs)
 
             elif self.axis == 1:
                 # _aggregate_multiple_funcs does not allow self.axis == 1
+                # Note: axis == 1 precludes 'not self.as_index', see __init__
                 result = self._aggregate_frame(func)
+                return result
 
             else:
 
@@ -1055,7 +1058,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
         if not self.as_index:
             self._insert_inaxis_grouper_inplace(result)
-            result.index = np.arange(len(result))
+            result.index = Index(range(len(result)))
 
         return result._convert(datetime=True)
 
@@ -1181,7 +1184,9 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
             if self.as_index:
                 return self.obj._constructor_sliced(values, index=key_index)
             else:
-                result = DataFrame(values, index=key_index, columns=[self._selection])
+                result = self.obj._constructor(
+                    values, index=key_index, columns=[self._selection]
+                )
                 self._insert_inaxis_grouper_inplace(result)
                 return result
         else:
@@ -1664,8 +1669,8 @@ def _wrap_transformed_output(
 
     def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
         if not self.as_index:
-            index = np.arange(mgr.shape[1])
-            mgr.set_axis(1, ibase.Index(index))
+            index = Index(range(mgr.shape[1]))
+            mgr.set_axis(1, index)
             result = self.obj._constructor(mgr)
 
             self._insert_inaxis_grouper_inplace(result)
@@ -1793,7 +1798,7 @@ def nunique(self, dropna: bool = True) -> DataFrame:
         results.columns.names = obj.columns.names  # TODO: do at higher level?
 
         if not self.as_index:
-            results.index = ibase.default_index(len(results))
+            results.index = Index(range(len(results)))
             self._insert_inaxis_grouper_inplace(results)
 
         return results

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -889,9 +889,8 @@ def codes_info(self) -> np.ndarray:
 
     @final
     def _get_compressed_codes(self) -> tuple[np.ndarray, np.ndarray]:
-        all_codes = self.codes
-        if len(all_codes) > 1:
-            group_index = get_group_index(all_codes, self.shape, sort=True, xnull=True)
+        if len(self.groupings) > 1:
+            group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
             return compress_group_index(group_index, sort=self.sort)
 
         ping = self.groupings[0]
@@ -1111,6 +1110,7 @@ def groups(self):
 
     @property
     def nkeys(self) -> int:
+        # still matches len(self.groupings), but we can hard-code
         return 1
 
     def _get_grouper(self):

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -234,17 +234,18 @@ def f(x, q=None, axis=0):
     tm.assert_series_equal(trans_result, trans_expected)
 
     # DataFrame
-    df_grouped = tsframe.groupby(lambda x: x.month)
-    agg_result = df_grouped.agg(np.percentile, 80, axis=0)
-    apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
-    expected = df_grouped.quantile(0.8)
-    tm.assert_frame_equal(apply_result, expected, check_names=False)
-    tm.assert_frame_equal(agg_result, expected)
-
-    agg_result = df_grouped.agg(f, q=80)
-    apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
-    tm.assert_frame_equal(agg_result, expected)
-    tm.assert_frame_equal(apply_result, expected, check_names=False)
+    for as_index in [True, False]:
+        df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index)
+        agg_result = df_grouped.agg(np.percentile, 80, axis=0)
+        apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
+        expected = df_grouped.quantile(0.8)
+        tm.assert_frame_equal(apply_result, expected, check_names=False)
+        tm.assert_frame_equal(agg_result, expected)
+
+        agg_result = df_grouped.agg(f, q=80)
+        apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
+        tm.assert_frame_equal(agg_result, expected)
+        tm.assert_frame_equal(apply_result, expected, check_names=False)
 
 
 def test_len():