From dfd192e1993dd2a0dc749b19b234df0f1d855035 Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 27 Dec 2017 11:01:52 -0600 Subject: [PATCH] Boyscouting -- cleaning up code --- .../tests/groupby/aggregate/test_aggregate.py | 80 ++++++------- pandas/tests/groupby/aggregate/test_cython.py | 18 ++- pandas/tests/groupby/aggregate/test_other.py | 109 +++++++----------- 3 files changed, 88 insertions(+), 119 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 6a598c3de55c9d..35973974da1362 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- """ -we test .agg behavior / note that .apply is tested -generally in test_groupby.py +test .agg behavior / note that .apply is tested generally in test_groupby.py """ import numpy as np @@ -34,11 +33,10 @@ def setup_method(self, method): {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), - 'D': np.array( - np.random.randn(8), dtype='float32')}) + 'D': np.array(np.random.randn(8), dtype='float32')}) - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) @@ -86,22 +84,21 @@ def test_agg_apply_corner(self): assert self.ts.dtype == np.float64 # groupby float64 values results in Float64Index - exp = Series([], - dtype=np.float64, + exp = Series([], dtype=np.float64, index=pd.Index([], dtype=np.float64)) tm.assert_series_equal(grouped.sum(), exp) tm.assert_series_equal(grouped.agg(np.sum), exp) - tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False) + tm.assert_series_equal(grouped.apply(np.sum), exp, + check_index_type=False) # DataFrame grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan) - exp_df = DataFrame(columns=self.tsframe.columns, - dtype=float, + exp_df = DataFrame(columns=self.tsframe.columns, dtype=float, index=pd.Index([], dtype=np.float64)) tm.assert_frame_equal(grouped.sum(), exp_df, check_names=False) tm.assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False) tm.assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0], - check_names=False) + check_names=False) def test_agg_grouping_is_list_tuple(self): from pandas.core.groupby import Grouping @@ -142,11 +139,14 @@ def _check_results(grouped): tm.assert_frame_equal(result, expected) # group frame by function dict - result = grouped.agg(OrderedDict([['A', 'var'], ['B', 'std'], - ['C', 'mean'], ['D', 'sem']])) - expected = DataFrame(OrderedDict([['A', grouped['A'].var( - )], ['B', grouped['B'].std()], ['C', grouped['C'].mean()], - ['D', grouped['D'].sem()]])) + result = grouped.agg(OrderedDict([['A', 'var'], + ['B', 'std'], + ['C', 'mean'], + ['D', 'sem']])) + expected = DataFrame(OrderedDict([['A', grouped['A'].var()], + ['B', grouped['B'].std()], + ['C', grouped['C'].mean()], + ['D', grouped['D'].sem()]])) tm.assert_frame_equal(result, expected) by_weekday = self.tsframe.groupby(lambda x: x.weekday()) @@ -264,10 +264,10 @@ def bar(x): return np.std(x, ddof=1) # this uses column selection & renaming - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - d = OrderedDict([['C', np.mean], ['D', OrderedDict( - [['foo', np.mean], ['bar', np.std]])]]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + d = OrderedDict([['C', np.mean], + ['D', OrderedDict([['foo', np.mean], + ['bar', np.std]])]]) result = grouped.aggregate(d) d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) @@ -279,31 +279,25 @@ def test_multi_function_flexible_mix(self): # GH #1268 grouped = self.df.groupby('A') - d = OrderedDict([['C', OrderedDict([['foo', 'mean'], - ['bar', 'std']])], ['D', 'sum']]) - + # Expected + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], + ['D', {'sum': 'sum'}]]) # this uses column selection & renaming - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = grouped.aggregate(d) - - d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], - ['bar', 'std']])], - ['D', ['sum']]]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + expected = grouped.aggregate(d) + # Test 1 + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], + ['D', 'sum']]) # this uses column selection & renaming - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result2 = grouped.aggregate(d2) - - d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], - ['bar', 'std']])], - ['D', {'sum': 'sum'}]]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = grouped.aggregate(d) + tm.assert_frame_equal(result, expected) + # Test 2 + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], + ['D', ['sum']]]) # this uses column selection & renaming - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - expected = grouped.aggregate(d3) - + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = grouped.aggregate(d) tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result2, expected) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 909346c342047a..494081cc5986f5 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- """ -we test .agg behavior / note that .apply is tested -generally in test_groupby.py +test cython .agg behavior """ from __future__ import print_function @@ -74,14 +73,14 @@ def test_cython_agg_boolean(self): def test_cython_agg_nothing_to_agg(self): frame = DataFrame({'a': np.random.randint(0, 5, 50), 'b': ['foo', 'bar'] * 25}) - with tm.assert_raises_regex(DataError, - "No numeric types to aggregate"): + msg = "No numeric types to aggregate" + + with tm.assert_raises_regex(DataError, msg): frame.groupby('a')['b'].mean() frame = DataFrame({'a': np.random.randint(0, 5, 50), 'b': ['foo', 'bar'] * 25}) - with tm.assert_raises_regex(DataError, - "No numeric types to aggregate"): + with tm.assert_raises_regex(DataError, msg): frame[['b']].groupby(frame['a']).mean() def test_cython_agg_nothing_to_agg_with_dates(self): @@ -89,8 +88,8 @@ def test_cython_agg_nothing_to_agg_with_dates(self): 'b': ['foo', 'bar'] * 25, 'dates': pd.date_range('now', periods=50, freq='T')}) - with tm.assert_raises_regex(DataError, - "No numeric types to aggregate"): + msg = "No numeric types to aggregate" + with tm.assert_raises_regex(DataError, msg): frame.groupby('b').dates.mean() def test_cython_agg_frame_columns(self): @@ -110,8 +109,7 @@ def test_cython_agg_return_dict(self): 'C': np.random.randn(8), 'D': np.random.randn(8)}) - ts = df.groupby('A')['B'].agg( - lambda x: x.value_counts().to_dict()) + ts = df.groupby('A')['B'].agg(lambda x: x.value_counts().to_dict()) expected = Series([{'two': 1, 'one': 1, 'three': 1}, {'two': 2, 'one': 2, 'three': 1}], index=Index(['bar', 'foo'], name='A'), diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 23c3e9fbc4cd81..76059e2d658544 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- """ -we test .agg behavior / note that .apply is tested -generally in test_groupby.py +test all other .agg behavior """ from __future__ import print_function @@ -42,14 +41,18 @@ def peak_to_peak(arr): def test_agg_datetimes_mixed(): - data = [[1, '2012-01-01', 1.0], [2, '2012-01-02', 2.0], [3, None, 3.0]] + data = [[1, '2012-01-01', 1.0], + [2, '2012-01-02', 2.0], + [3, None, 3.0]] df1 = DataFrame({'key': [x[0] for x in data], 'date': [x[1] for x in data], 'value': [x[2] for x in data]}) - data = [[row[0], datetime.strptime(row[1], '%Y-%m-%d').date() if row[1] - else None, row[2]] for row in data] + data = [[row[0], + datetime.strptime(row[1], '%Y-%m-%d').date() if row[1] else None, + row[2]] + for row in data] df2 = DataFrame({'key': [x[0] for x in data], 'date': [x[1] for x in data], @@ -84,9 +87,8 @@ def test_agg_period_index(): def test_agg_dict_parameter_cast_result_dtypes(): # GH 12821 - df = DataFrame( - {'class': ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'], - 'time': date_range('1/1/2011', periods=8, freq='H')}) + df = DataFrame({'class': ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'], + 'time': date_range('1/1/2011', periods=8, freq='H')}) df.loc[[0, 1, 2, 5], 'time'] = None # test for `first` function @@ -138,15 +140,13 @@ def test_aggregate_float64_no_int64(): "b": [1, 2, 2, 4, 5], "c": [1, 2, 3, 4, 5]}) - expected = DataFrame({"a": [1, 2.5, 4, 5]}, - index=[1, 2, 4, 5]) + expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5]) expected.index.name = "b" result = df.groupby("b")[["a"]].mean() tm.assert_frame_equal(result, expected) - expected = DataFrame({"a": [1, 2.5, 4, 5], - "c": [1, 2.5, 4, 5]}, + expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5]) expected.index.name = "b" @@ -173,56 +173,36 @@ def test_aggregate_api_consistency(): d_sum = grouped['D'].sum() result = grouped['D'].agg(['sum', 'mean']) - expected = pd.concat([d_sum, d_mean], - axis=1) + expected = pd.concat([d_sum, d_mean], axis=1) expected.columns = ['sum', 'mean'] tm.assert_frame_equal(result, expected, check_like=True) result = grouped.agg([np.sum, np.mean]) - expected = pd.concat([c_sum, - c_mean, - d_sum, - d_mean], - axis=1) + expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1) expected.columns = MultiIndex.from_product([['C', 'D'], ['sum', 'mean']]) tm.assert_frame_equal(result, expected, check_like=True) result = grouped[['D', 'C']].agg([np.sum, np.mean]) - expected = pd.concat([d_sum, - d_mean, - c_sum, - c_mean], - axis=1) + expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1) expected.columns = MultiIndex.from_product([['D', 'C'], ['sum', 'mean']]) tm.assert_frame_equal(result, expected, check_like=True) result = grouped.agg({'C': 'mean', 'D': 'sum'}) - expected = pd.concat([d_sum, - c_mean], - axis=1) + expected = pd.concat([d_sum, c_mean], axis=1) tm.assert_frame_equal(result, expected, check_like=True) result = grouped.agg({'C': ['mean', 'sum'], 'D': ['mean', 'sum']}) - expected = pd.concat([c_mean, - c_sum, - d_mean, - d_sum], - axis=1) + expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1) expected.columns = MultiIndex.from_product([['C', 'D'], ['mean', 'sum']]) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = grouped[['D', 'C']].agg({'r': np.sum, 'r2': np.mean}) - expected = pd.concat([d_sum, - c_sum, - d_mean, - c_mean], - axis=1) + expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1) expected.columns = MultiIndex.from_product([['r', 'r2'], ['D', 'C']]) tm.assert_frame_equal(result, expected, check_like=True) @@ -240,8 +220,7 @@ def test_agg_dict_renaming_deprecation(): 'C': {'bar': ['count', 'min']}}) assert "using a dict with renaming" in str(w[0].message) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): df.groupby('A')[['B', 'C']].agg({'ma': 'max'}) with tm.assert_produces_warning(FutureWarning) as w: @@ -261,23 +240,17 @@ def test_agg_compat(): g = df.groupby(['A', 'B']) - expected = pd.concat([g['D'].sum(), - g['D'].std()], - axis=1) + expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1) expected.columns = MultiIndex.from_tuples([('C', 'sum'), ('C', 'std')]) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = g['D'].agg({'C': ['sum', 'std']}) tm.assert_frame_equal(result, expected, check_like=True) - expected = pd.concat([g['D'].sum(), - g['D'].std()], - axis=1) + expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1) expected.columns = ['C', 'D'] - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = g['D'].agg({'C': 'sum', 'D': 'std'}) tm.assert_frame_equal(result, expected, check_like=True) @@ -299,8 +272,7 @@ def test_agg_nested_dicts(): g.aggregate({'r1': {'C': ['mean', 'sum']}, 'r2': {'D': ['mean', 'sum']}}) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = g.agg({'C': {'ra': ['mean', 'std']}, 'D': {'rb': ['mean', 'std']}}) expected = pd.concat([g['C'].mean(), g['C'].std(), @@ -313,13 +285,11 @@ def test_agg_nested_dicts(): # same name as the original column # GH9052 - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = g['D'].agg({'result1': np.sum, 'result2': np.mean}) expected = expected.rename(columns={'result1': 'D'}) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = g['D'].agg({'D': np.sum, 'result2': np.mean}) tm.assert_frame_equal(result, expected, check_like=True) @@ -402,8 +372,12 @@ class fn_class(object): def __call__(self, x): return sum(x) - equiv_callables = [sum, np.sum, lambda x: sum(x), lambda x: x.sum(), - partial(sum), fn_class()] + equiv_callables = [sum, + np.sum, + lambda x: sum(x), + lambda x: x.sum(), + partial(sum), + fn_class(), ] expected = df.groupby("foo").agg(sum) for ecall in equiv_callables: @@ -432,8 +406,8 @@ def test_agg_over_numpy_arrays(): def test_agg_timezone_round_trip(): # GH 15426 ts = pd.Timestamp("2016-01-01 12:00:00", tz='US/Pacific') - df = pd.DataFrame({'a': 1, 'b': [ts + timedelta(minutes=nn) - for nn in range(10)]}) + df = pd.DataFrame({'a': 1, + 'b': [ts + timedelta(minutes=nn) for nn in range(10)]}) result1 = df.groupby('a')['b'].agg(np.min).iloc[0] result2 = df.groupby('a')['b'].agg(lambda x: np.min(x)).iloc[0] @@ -463,14 +437,17 @@ def test_sum_uint64_overflow(): # see gh-14758 # Convert to uint64 and don't overflow - df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], - dtype=object) + 9223372036854775807 + df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) + df = df + 9223372036854775807 - index = pd.Index([9223372036854775808, 9223372036854775810, - 9223372036854775812], dtype=np.uint64) + index = pd.Index([9223372036854775808, + 9223372036854775810, + 9223372036854775812], + dtype=np.uint64) expected = pd.DataFrame({1: [9223372036854775809, 9223372036854775811, - 9223372036854775813]}, index=index) + 9223372036854775813]}, + index=index) expected.index.name = 0 result = df.groupby(0).sum()