diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 53a2d1d1a9685..caf2365a54ec8 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -133,32 +133,30 @@ def test_agg_python_multiindex(self): [lambda x: x.month, lambda x: x.weekday()], ]) def test_aggregate_str_func(self, groupbyfunc): - def _check_results(grouped): - # single series - result = grouped['A'].agg('std') - expected = grouped['A'].std() - tm.assert_series_equal(result, expected) - - # group frame by function name - result = grouped.aggregate('var') - expected = grouped.var() - tm.assert_frame_equal(result, expected) - - # group frame by function dict - result = grouped.agg(OrderedDict([['A', 'var'], - ['B', 'std'], - ['C', 'mean'], - ['D', 'sem']])) - expected = DataFrame(OrderedDict([['A', grouped['A'].var()], - ['B', grouped['B'].std()], - ['C', grouped['C'].mean()], - ['D', grouped['D'].sem()]])) - tm.assert_frame_equal(result, expected) - - _check_results(self.tsframe.groupby(groupbyfunc)) + grouped = self.tsframe.groupby(groupbyfunc) - def test_aggregate_item_by_item(self): + # single series + result = grouped['A'].agg('std') + expected = grouped['A'].std() + tm.assert_series_equal(result, expected) + + # group frame by function name + result = grouped.aggregate('var') + expected = grouped.var() + tm.assert_frame_equal(result, expected) + + # group frame by function dict + result = grouped.agg(OrderedDict([['A', 'var'], + ['B', 'std'], + ['C', 'mean'], + ['D', 'sem']])) + expected = DataFrame(OrderedDict([['A', grouped['A'].var()], + ['B', grouped['B'].std()], + ['C', grouped['C'].mean()], + ['D', grouped['D'].sem()]])) + tm.assert_frame_equal(result, expected) + def test_aggregate_item_by_item(self): df = self.df.copy() df['E'] = ['a'] * len(self.df) grouped = self.df.groupby('A') @@ -210,7 +208,6 @@ def test_agg_multiple_functions_maintain_order(self): def test_multiple_functions_tuples_and_non_tuples(self): # #1359 - funcs = [('foo', 'mean'), 'std'] ex_funcs = [('foo', 'mean'), ('std', 'std')] @@ -231,7 +228,6 @@ def test_agg_multiple_functions_too_many_lambdas(self): grouped.agg(funcs) def test_more_flexible_frame_multi_function(self): - grouped = self.df.groupby('A') exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 186f31a6ca212..c8ee05ddbb74f 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -17,7 +17,7 @@ import pandas.util.testing as tm -@pytest.mark.parametrize('op', [ +@pytest.mark.parametrize('op_name', [ 'count', 'sum', 'std', @@ -29,41 +29,37 @@ 'min', 'max', ]) -def test_cythonized_aggers(op): +def test_cythonized_aggers(op_name): data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], 'B': ['A', 'B'] * 6, 'C': np.random.randn(12)} df = DataFrame(data) df.loc[2:10:2, 'C'] = nan - def _testit(name): + op = lambda x: getattr(x, op_name)() - op = lambda x: getattr(x, name)() + # single column + grouped = df.drop(['B'], axis=1).groupby('A') + exp = {} + for cat, group in grouped: + exp[cat] = op(group['C']) + exp = DataFrame({'C': exp}) + exp.index.name = 'A' + result = op(grouped) + tm.assert_frame_equal(result, exp) - # single column - grouped = df.drop(['B'], axis=1).groupby('A') - exp = {} - for cat, group in grouped: - exp[cat] = op(group['C']) - exp = DataFrame({'C': exp}) - exp.index.name = 'A' - result = op(grouped) - tm.assert_frame_equal(result, exp) + # multiple columns + grouped = df.groupby(['A', 'B']) + expd = {} + for (cat1, cat2), group in grouped: + expd.setdefault(cat1, {})[cat2] = op(group['C']) + exp = DataFrame(expd).T.stack(dropna=False) + exp.index.names = ['A', 'B'] + exp.name = 'C' - # multiple columns - grouped = df.groupby(['A', 'B']) - expd = {} - for (cat1, cat2), group in grouped: - expd.setdefault(cat1, {})[cat2] = op(group['C']) - exp = DataFrame(expd).T.stack(dropna=False) - exp.index.names = ['A', 'B'] - exp.name = 'C' - - result = op(grouped)['C'] - if name in ['sum', 'prod']: - tm.assert_series_equal(result, exp) - - _testit(op) + result = op(grouped)['C'] + if op_name in ['sum', 'prod']: + tm.assert_series_equal(result, exp) def test_cython_agg_boolean(): @@ -151,11 +147,7 @@ def test__cython_agg_general(op, targop): result = df.groupby(labels)._cython_agg_general(op) expected = df.groupby(labels).agg(targop) - try: - tm.assert_frame_equal(result, expected) - except BaseException as exc: - exc.args += ('operation: %s' % op, ) - raise + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize('op, targop', [ @@ -173,11 +165,7 @@ def test_cython_agg_empty_buckets(op, targop): # which sets different values for min_count, so do that here. result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op) expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x)) - try: - tm.assert_frame_equal(result, expected) - except BaseException as exc: - exc.args += ('operation: %s' % op,) - raise + tm.assert_frame_equal(result, expected) def test_cython_agg_empty_buckets_nanops(): diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 92b8999a7bdc7..f8e44b1548819 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -158,7 +158,6 @@ def test_aggregate_api_consistency(): # GH 9052 # make sure that the aggregates via dict # are consistent - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'two', @@ -230,7 +229,6 @@ def test_agg_dict_renaming_deprecation(): def test_agg_compat(): # GH 12334 - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'two', @@ -257,7 +255,6 @@ def test_agg_compat(): def test_agg_nested_dicts(): # API change for disallowing these types of nested dicts - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'two', @@ -341,7 +338,6 @@ def bad(x): def test_agg_consistency(): # agg with ([]) and () not consistent # GH 6715 - def P1(a): try: return np.percentile(a.dropna(), q=1) @@ -435,7 +431,6 @@ def test_agg_timezone_round_trip(): def test_sum_uint64_overflow(): # see gh-14758 - # Convert to uint64 and don't overflow df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) df = df + 9223372036854775807 @@ -492,7 +487,7 @@ def test_agg_structs_series(structure, expected): @pytest.mark.xfail(reason="GH-18869: agg func not called on empty groups.") -def test_agg_category_nansum(self): +def test_agg_category_nansum(): categories = ['a', 'b', 'c'] df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'], categories=categories),