Skip to content

Commit

Permalink
CLN: groupby assorted (#41379)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed May 10, 2021
1 parent 58accd7 commit 562235d
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 26 deletions.
29 changes: 17 additions & 12 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@
MultiIndex,
all_indexes_same,
)
import pandas.core.indexes.base as ibase
from pandas.core.series import Series
from pandas.core.util.numba_ import maybe_use_numba

Expand Down Expand Up @@ -481,14 +480,13 @@ def _get_index() -> Index:
if isinstance(values[0], dict):
# GH #823 #24880
index = _get_index()
result: FrameOrSeriesUnion = self._reindex_output(
self.obj._constructor_expanddim(values, index=index)
)
res_df = self.obj._constructor_expanddim(values, index=index)
res_df = self._reindex_output(res_df)
# if self.observed is False,
# keep all-NaN rows created while re-indexing
result = result.stack(dropna=self.observed)
result.name = self._selection_name
return result
res_ser = res_df.stack(dropna=self.observed)
res_ser.name = self._selection_name
return res_ser
elif isinstance(values[0], (Series, DataFrame)):
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
else:
Expand Down Expand Up @@ -1019,13 +1017,18 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)

# grouper specific aggregations
if self.grouper.nkeys > 1:
# test_groupby_as_index_series_scalar gets here with 'not self.as_index'
return self._python_agg_general(func, *args, **kwargs)
elif args or kwargs:
# test_pass_args_kwargs gets here (with and without as_index)
# can't return early
result = self._aggregate_frame(func, *args, **kwargs)

elif self.axis == 1:
# _aggregate_multiple_funcs does not allow self.axis == 1
# Note: axis == 1 precludes 'not self.as_index', see __init__
result = self._aggregate_frame(func)
return result

else:

Expand Down Expand Up @@ -1055,7 +1058,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)

if not self.as_index:
self._insert_inaxis_grouper_inplace(result)
result.index = np.arange(len(result))
result.index = Index(range(len(result)))

return result._convert(datetime=True)

Expand Down Expand Up @@ -1181,7 +1184,9 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
if self.as_index:
return self.obj._constructor_sliced(values, index=key_index)
else:
result = DataFrame(values, index=key_index, columns=[self._selection])
result = self.obj._constructor(
values, index=key_index, columns=[self._selection]
)
self._insert_inaxis_grouper_inplace(result)
return result
else:
Expand Down Expand Up @@ -1664,8 +1669,8 @@ def _wrap_transformed_output(

def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
if not self.as_index:
index = np.arange(mgr.shape[1])
mgr.set_axis(1, ibase.Index(index))
index = Index(range(mgr.shape[1]))
mgr.set_axis(1, index)
result = self.obj._constructor(mgr)

self._insert_inaxis_grouper_inplace(result)
Expand Down Expand Up @@ -1793,7 +1798,7 @@ def nunique(self, dropna: bool = True) -> DataFrame:
results.columns.names = obj.columns.names # TODO: do at higher level?

if not self.as_index:
results.index = ibase.default_index(len(results))
results.index = Index(range(len(results)))
self._insert_inaxis_grouper_inplace(results)

return results
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,9 +889,8 @@ def codes_info(self) -> np.ndarray:

@final
def _get_compressed_codes(self) -> tuple[np.ndarray, np.ndarray]:
all_codes = self.codes
if len(all_codes) > 1:
group_index = get_group_index(all_codes, self.shape, sort=True, xnull=True)
if len(self.groupings) > 1:
group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
return compress_group_index(group_index, sort=self.sort)

ping = self.groupings[0]
Expand Down Expand Up @@ -1111,6 +1110,7 @@ def groups(self):

@property
def nkeys(self) -> int:
# still matches len(self.groupings), but we can hard-code
return 1

def _get_grouper(self):
Expand Down
23 changes: 12 additions & 11 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,17 +234,18 @@ def f(x, q=None, axis=0):
tm.assert_series_equal(trans_result, trans_expected)

# DataFrame
df_grouped = tsframe.groupby(lambda x: x.month)
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
expected = df_grouped.quantile(0.8)
tm.assert_frame_equal(apply_result, expected, check_names=False)
tm.assert_frame_equal(agg_result, expected)

agg_result = df_grouped.agg(f, q=80)
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
tm.assert_frame_equal(agg_result, expected)
tm.assert_frame_equal(apply_result, expected, check_names=False)
for as_index in [True, False]:
df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index)
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
expected = df_grouped.quantile(0.8)
tm.assert_frame_equal(apply_result, expected, check_names=False)
tm.assert_frame_equal(agg_result, expected)

agg_result = df_grouped.agg(f, q=80)
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
tm.assert_frame_equal(agg_result, expected)
tm.assert_frame_equal(apply_result, expected, check_names=False)


def test_len():
Expand Down

0 comments on commit 562235d

Please sign in to comment.