From dfd192e1993dd2a0dc749b19b234df0f1d855035 Mon Sep 17 00:00:00 2001
From: Aly Sivji <alysivji@gmail.com>
Date: Wed, 27 Dec 2017 11:01:52 -0600
Subject: [PATCH] Boyscouting -- cleaning up code

---
 .../tests/groupby/aggregate/test_aggregate.py |  80 ++++++-------
 pandas/tests/groupby/aggregate/test_cython.py |  18 ++-
 pandas/tests/groupby/aggregate/test_other.py  | 109 +++++++-----------
 3 files changed, 88 insertions(+), 119 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 6a598c3de55c9d..35973974da1362 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
 
 """
-we test .agg behavior / note that .apply is tested
-generally in test_groupby.py
+test .agg behavior / note that .apply is tested generally in test_groupby.py
 """
 
 import numpy as np
@@ -34,11 +33,10 @@ def setup_method(self, method):
             {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
              'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
              'C': np.random.randn(8),
-             'D': np.array(
-                 np.random.randn(8), dtype='float32')})
+             'D': np.array(np.random.randn(8), dtype='float32')})
 
-        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                                  'three']],
+        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
+                                   ['one', 'two', 'three']],
                            labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                    [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                            names=['first', 'second'])
@@ -86,22 +84,21 @@ def test_agg_apply_corner(self):
         assert self.ts.dtype == np.float64
 
         # groupby float64 values results in Float64Index
-        exp = Series([],
-                     dtype=np.float64,
+        exp = Series([], dtype=np.float64,
                      index=pd.Index([], dtype=np.float64))
         tm.assert_series_equal(grouped.sum(), exp)
         tm.assert_series_equal(grouped.agg(np.sum), exp)
-        tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False)
+        tm.assert_series_equal(grouped.apply(np.sum), exp,
+                               check_index_type=False)
 
         # DataFrame
         grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan)
-        exp_df = DataFrame(columns=self.tsframe.columns,
-                           dtype=float,
+        exp_df = DataFrame(columns=self.tsframe.columns, dtype=float,
                            index=pd.Index([], dtype=np.float64))
         tm.assert_frame_equal(grouped.sum(), exp_df, check_names=False)
         tm.assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False)
         tm.assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0],
-                           check_names=False)
+                              check_names=False)
 
     def test_agg_grouping_is_list_tuple(self):
         from pandas.core.groupby import Grouping
@@ -142,11 +139,14 @@ def _check_results(grouped):
             tm.assert_frame_equal(result, expected)
 
             # group frame by function dict
-            result = grouped.agg(OrderedDict([['A', 'var'], ['B', 'std'],
-                                              ['C', 'mean'], ['D', 'sem']]))
-            expected = DataFrame(OrderedDict([['A', grouped['A'].var(
-            )], ['B', grouped['B'].std()], ['C', grouped['C'].mean()],
-                ['D', grouped['D'].sem()]]))
+            result = grouped.agg(OrderedDict([['A', 'var'],
+                                              ['B', 'std'],
+                                              ['C', 'mean'],
+                                              ['D', 'sem']]))
+            expected = DataFrame(OrderedDict([['A', grouped['A'].var()],
+                                              ['B', grouped['B'].std()],
+                                              ['C', grouped['C'].mean()],
+                                              ['D', grouped['D'].sem()]]))
             tm.assert_frame_equal(result, expected)
 
         by_weekday = self.tsframe.groupby(lambda x: x.weekday())
@@ -264,10 +264,10 @@ def bar(x):
             return np.std(x, ddof=1)
 
         # this uses column selection & renaming
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            d = OrderedDict([['C', np.mean], ['D', OrderedDict(
-                [['foo', np.mean], ['bar', np.std]])]])
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            d = OrderedDict([['C', np.mean],
+                             ['D', OrderedDict([['foo', np.mean],
+                                                ['bar', np.std]])]])
             result = grouped.aggregate(d)
 
         d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]])
@@ -279,31 +279,25 @@ def test_multi_function_flexible_mix(self):
         # GH #1268
         grouped = self.df.groupby('A')
 
-        d = OrderedDict([['C', OrderedDict([['foo', 'mean'],
-                                            ['bar', 'std']])], ['D', 'sum']])
-
+        # Expected
+        d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
+                         ['D', {'sum': 'sum'}]])
         # this uses column selection & renaming
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result = grouped.aggregate(d)
-
-        d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'],
-                                             ['bar', 'std']])],
-                          ['D', ['sum']]])
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            expected = grouped.aggregate(d)
 
+        # Test 1
+        d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
+                         ['D', 'sum']])
         # this uses column selection & renaming
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result2 = grouped.aggregate(d2)
-
-        d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'],
-                                             ['bar', 'std']])],
-                          ['D', {'sum': 'sum'}]])
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = grouped.aggregate(d)
+        tm.assert_frame_equal(result, expected)
 
+        # Test 2
+        d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
+                         ['D', ['sum']]])
         # this uses column selection & renaming
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            expected = grouped.aggregate(d3)
-
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = grouped.aggregate(d)
         tm.assert_frame_equal(result, expected)
-        tm.assert_frame_equal(result2, expected)
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index 909346c342047a..494081cc5986f5 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
 
 """
-we test .agg behavior / note that .apply is tested
-generally in test_groupby.py
+test cython .agg behavior
 """
 
 from __future__ import print_function
@@ -74,14 +73,14 @@ def test_cython_agg_boolean(self):
     def test_cython_agg_nothing_to_agg(self):
         frame = DataFrame({'a': np.random.randint(0, 5, 50),
                            'b': ['foo', 'bar'] * 25})
-        with tm.assert_raises_regex(DataError,
-                                    "No numeric types to aggregate"):
+        msg = "No numeric types to aggregate"
+
+        with tm.assert_raises_regex(DataError, msg):
             frame.groupby('a')['b'].mean()
 
         frame = DataFrame({'a': np.random.randint(0, 5, 50),
                            'b': ['foo', 'bar'] * 25})
-        with tm.assert_raises_regex(DataError,
-                                    "No numeric types to aggregate"):
+        with tm.assert_raises_regex(DataError, msg):
             frame[['b']].groupby(frame['a']).mean()
 
     def test_cython_agg_nothing_to_agg_with_dates(self):
@@ -89,8 +88,8 @@ def test_cython_agg_nothing_to_agg_with_dates(self):
                            'b': ['foo', 'bar'] * 25,
                            'dates': pd.date_range('now', periods=50,
                                                   freq='T')})
-        with tm.assert_raises_regex(DataError,
-                                    "No numeric types to aggregate"):
+        msg = "No numeric types to aggregate"
+        with tm.assert_raises_regex(DataError, msg):
             frame.groupby('b').dates.mean()
 
     def test_cython_agg_frame_columns(self):
@@ -110,8 +109,7 @@ def test_cython_agg_return_dict(self):
              'C': np.random.randn(8),
              'D': np.random.randn(8)})
 
-        ts = df.groupby('A')['B'].agg(
-            lambda x: x.value_counts().to_dict())
+        ts = df.groupby('A')['B'].agg(lambda x: x.value_counts().to_dict())
         expected = Series([{'two': 1, 'one': 1, 'three': 1},
                            {'two': 2, 'one': 2, 'three': 1}],
                           index=Index(['bar', 'foo'], name='A'),
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 23c3e9fbc4cd81..76059e2d658544 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
 
 """
-we test .agg behavior / note that .apply is tested
-generally in test_groupby.py
+test all other .agg behavior
 """
 
 from __future__ import print_function
@@ -42,14 +41,18 @@ def peak_to_peak(arr):
 
 
 def test_agg_datetimes_mixed():
-    data = [[1, '2012-01-01', 1.0], [2, '2012-01-02', 2.0], [3, None, 3.0]]
+    data = [[1, '2012-01-01', 1.0],
+            [2, '2012-01-02', 2.0],
+            [3, None, 3.0]]
 
     df1 = DataFrame({'key': [x[0] for x in data],
                      'date': [x[1] for x in data],
                      'value': [x[2] for x in data]})
 
-    data = [[row[0], datetime.strptime(row[1], '%Y-%m-%d').date() if row[1]
-             else None, row[2]] for row in data]
+    data = [[row[0],
+             datetime.strptime(row[1], '%Y-%m-%d').date() if row[1] else None,
+             row[2]]
+            for row in data]
 
     df2 = DataFrame({'key': [x[0] for x in data],
                      'date': [x[1] for x in data],
@@ -84,9 +87,8 @@ def test_agg_period_index():
 def test_agg_dict_parameter_cast_result_dtypes():
     # GH 12821
 
-    df = DataFrame(
-        {'class': ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'],
-         'time': date_range('1/1/2011', periods=8, freq='H')})
+    df = DataFrame({'class': ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'],
+                    'time': date_range('1/1/2011', periods=8, freq='H')})
     df.loc[[0, 1, 2, 5], 'time'] = None
 
     # test for `first` function
@@ -138,15 +140,13 @@ def test_aggregate_float64_no_int64():
                     "b": [1, 2, 2, 4, 5],
                     "c": [1, 2, 3, 4, 5]})
 
-    expected = DataFrame({"a": [1, 2.5, 4, 5]},
-                         index=[1, 2, 4, 5])
+    expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5])
     expected.index.name = "b"
 
     result = df.groupby("b")[["a"]].mean()
     tm.assert_frame_equal(result, expected)
 
-    expected = DataFrame({"a": [1, 2.5, 4, 5],
-                          "c": [1, 2.5, 4, 5]},
+    expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]},
                          index=[1, 2, 4, 5])
     expected.index.name = "b"
 
@@ -173,56 +173,36 @@ def test_aggregate_api_consistency():
     d_sum = grouped['D'].sum()
 
     result = grouped['D'].agg(['sum', 'mean'])
-    expected = pd.concat([d_sum, d_mean],
-                         axis=1)
+    expected = pd.concat([d_sum, d_mean], axis=1)
     expected.columns = ['sum', 'mean']
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg([np.sum, np.mean])
-    expected = pd.concat([c_sum,
-                          c_mean,
-                          d_sum,
-                          d_mean],
-                         axis=1)
+    expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
     expected.columns = MultiIndex.from_product([['C', 'D'],
                                                 ['sum', 'mean']])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped[['D', 'C']].agg([np.sum, np.mean])
-    expected = pd.concat([d_sum,
-                          d_mean,
-                          c_sum,
-                          c_mean],
-                         axis=1)
+    expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
     expected.columns = MultiIndex.from_product([['D', 'C'],
                                                 ['sum', 'mean']])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg({'C': 'mean', 'D': 'sum'})
-    expected = pd.concat([d_sum,
-                          c_mean],
-                         axis=1)
+    expected = pd.concat([d_sum, c_mean], axis=1)
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg({'C': ['mean', 'sum'],
                           'D': ['mean', 'sum']})
-    expected = pd.concat([c_mean,
-                          c_sum,
-                          d_mean,
-                          d_sum],
-                         axis=1)
+    expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
     expected.columns = MultiIndex.from_product([['C', 'D'],
                                                 ['mean', 'sum']])
 
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         result = grouped[['D', 'C']].agg({'r': np.sum,
                                           'r2': np.mean})
-    expected = pd.concat([d_sum,
-                          c_sum,
-                          d_mean,
-                          c_mean],
-                         axis=1)
+    expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
     expected.columns = MultiIndex.from_product([['r', 'r2'],
                                                 ['D', 'C']])
     tm.assert_frame_equal(result, expected, check_like=True)
@@ -240,8 +220,7 @@ def test_agg_dict_renaming_deprecation():
                              'C': {'bar': ['count', 'min']}})
         assert "using a dict with renaming" in str(w[0].message)
 
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         df.groupby('A')[['B', 'C']].agg({'ma': 'max'})
 
     with tm.assert_produces_warning(FutureWarning) as w:
@@ -261,23 +240,17 @@ def test_agg_compat():
 
     g = df.groupby(['A', 'B'])
 
-    expected = pd.concat([g['D'].sum(),
-                          g['D'].std()],
-                         axis=1)
+    expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
     expected.columns = MultiIndex.from_tuples([('C', 'sum'),
                                                ('C', 'std')])
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         result = g['D'].agg({'C': ['sum', 'std']})
     tm.assert_frame_equal(result, expected, check_like=True)
 
-    expected = pd.concat([g['D'].sum(),
-                          g['D'].std()],
-                         axis=1)
+    expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
     expected.columns = ['C', 'D']
 
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         result = g['D'].agg({'C': 'sum', 'D': 'std'})
     tm.assert_frame_equal(result, expected, check_like=True)
 
@@ -299,8 +272,7 @@ def test_agg_nested_dicts():
         g.aggregate({'r1': {'C': ['mean', 'sum']},
                      'r2': {'D': ['mean', 'sum']}})
 
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         result = g.agg({'C': {'ra': ['mean', 'std']},
                         'D': {'rb': ['mean', 'std']}})
     expected = pd.concat([g['C'].mean(), g['C'].std(),
@@ -313,13 +285,11 @@ def test_agg_nested_dicts():
 
     # same name as the original column
     # GH9052
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         expected = g['D'].agg({'result1': np.sum, 'result2': np.mean})
     expected = expected.rename(columns={'result1': 'D'})
 
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         result = g['D'].agg({'D': np.sum, 'result2': np.mean})
     tm.assert_frame_equal(result, expected, check_like=True)
 
@@ -402,8 +372,12 @@ class fn_class(object):
         def __call__(self, x):
             return sum(x)
 
-    equiv_callables = [sum, np.sum, lambda x: sum(x), lambda x: x.sum(),
-                       partial(sum), fn_class()]
+    equiv_callables = [sum,
+                       np.sum,
+                       lambda x: sum(x),
+                       lambda x: x.sum(),
+                       partial(sum),
+                       fn_class(), ]
 
     expected = df.groupby("foo").agg(sum)
     for ecall in equiv_callables:
@@ -432,8 +406,8 @@ def test_agg_over_numpy_arrays():
 def test_agg_timezone_round_trip():
     # GH 15426
     ts = pd.Timestamp("2016-01-01 12:00:00", tz='US/Pacific')
-    df = pd.DataFrame({'a': 1, 'b': [ts + timedelta(minutes=nn)
-                                     for nn in range(10)]})
+    df = pd.DataFrame({'a': 1,
+                       'b': [ts + timedelta(minutes=nn) for nn in range(10)]})
 
     result1 = df.groupby('a')['b'].agg(np.min).iloc[0]
     result2 = df.groupby('a')['b'].agg(lambda x: np.min(x)).iloc[0]
@@ -463,14 +437,17 @@ def test_sum_uint64_overflow():
     # see gh-14758
 
     # Convert to uint64 and don't overflow
-    df = pd.DataFrame([[1, 2], [3, 4], [5, 6]],
-                      dtype=object) + 9223372036854775807
+    df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object)
+    df = df + 9223372036854775807
 
-    index = pd.Index([9223372036854775808, 9223372036854775810,
-                      9223372036854775812], dtype=np.uint64)
+    index = pd.Index([9223372036854775808,
+                      9223372036854775810,
+                      9223372036854775812],
+                     dtype=np.uint64)
     expected = pd.DataFrame({1: [9223372036854775809,
                                  9223372036854775811,
-                                 9223372036854775813]}, index=index)
+                                 9223372036854775813]},
+                            index=index)
 
     expected.index.name = 0
     result = df.groupby(0).sum()