Boyscouting -- cleaning up code

pandas-dev · Dec 27, 2017 · dfd192e · dfd192e
1 parent b083ef9
commit dfd192e
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 119 deletions.
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
 
 """
-we test .agg behavior / note that .apply is tested
-generally in test_groupby.py
+test .agg behavior / note that .apply is tested generally in test_groupby.py
 """
 
 import numpy as np
@@ -34,11 +33,10 @@ def setup_method(self, method):
             {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
              'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
              'C': np.random.randn(8),
-             'D': np.array(
-                 np.random.randn(8), dtype='float32')})
+             'D': np.array(np.random.randn(8), dtype='float32')})
 
-        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                                  'three']],
+        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
+                                   ['one', 'two', 'three']],
                            labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                    [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                            names=['first', 'second'])
@@ -86,22 +84,21 @@ def test_agg_apply_corner(self):
         assert self.ts.dtype == np.float64
 
         # groupby float64 values results in Float64Index
-        exp = Series([],
-                     dtype=np.float64,
+        exp = Series([], dtype=np.float64,
                      index=pd.Index([], dtype=np.float64))
         tm.assert_series_equal(grouped.sum(), exp)
         tm.assert_series_equal(grouped.agg(np.sum), exp)
-        tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False)
+        tm.assert_series_equal(grouped.apply(np.sum), exp,
+                               check_index_type=False)
 
         # DataFrame
         grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan)
-        exp_df = DataFrame(columns=self.tsframe.columns,
-                           dtype=float,
+        exp_df = DataFrame(columns=self.tsframe.columns, dtype=float,
                            index=pd.Index([], dtype=np.float64))
         tm.assert_frame_equal(grouped.sum(), exp_df, check_names=False)
         tm.assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False)
         tm.assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0],
-                           check_names=False)
+                              check_names=False)
 
     def test_agg_grouping_is_list_tuple(self):
         from pandas.core.groupby import Grouping
@@ -142,11 +139,14 @@ def _check_results(grouped):
             tm.assert_frame_equal(result, expected)
 
             # group frame by function dict
-            result = grouped.agg(OrderedDict([['A', 'var'], ['B', 'std'],
-                                              ['C', 'mean'], ['D', 'sem']]))
-            expected = DataFrame(OrderedDict([['A', grouped['A'].var(
-            )], ['B', grouped['B'].std()], ['C', grouped['C'].mean()],
-                ['D', grouped['D'].sem()]]))
+            result = grouped.agg(OrderedDict([['A', 'var'],
+                                              ['B', 'std'],
+                                              ['C', 'mean'],
+                                              ['D', 'sem']]))
+            expected = DataFrame(OrderedDict([['A', grouped['A'].var()],
+                                              ['B', grouped['B'].std()],
+                                              ['C', grouped['C'].mean()],
+                                              ['D', grouped['D'].sem()]]))
             tm.assert_frame_equal(result, expected)
 
         by_weekday = self.tsframe.groupby(lambda x: x.weekday())
@@ -264,10 +264,10 @@ def bar(x):
             return np.std(x, ddof=1)
 
         # this uses column selection & renaming
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            d = OrderedDict([['C', np.mean], ['D', OrderedDict(
-                [['foo', np.mean], ['bar', np.std]])]])
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            d = OrderedDict([['C', np.mean],
+                             ['D', OrderedDict([['foo', np.mean],
+                                                ['bar', np.std]])]])
             result = grouped.aggregate(d)
 
         d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]])
@@ -279,31 +279,25 @@ def test_multi_function_flexible_mix(self):
         # GH #1268
         grouped = self.df.groupby('A')
 
-        d = OrderedDict([['C', OrderedDict([['foo', 'mean'],
-                                            ['bar', 'std']])], ['D', 'sum']])
-
+        # Expected
+        d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
+                         ['D', {'sum': 'sum'}]])
         # this uses column selection & renaming
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result = grouped.aggregate(d)
-
-        d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'],
-                                             ['bar', 'std']])],
-                          ['D', ['sum']]])
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            expected = grouped.aggregate(d)
 
+        # Test 1
+        d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
+                         ['D', 'sum']])
         # this uses column selection & renaming
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result2 = grouped.aggregate(d2)
-
-        d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'],
-                                             ['bar', 'std']])],
-                          ['D', {'sum': 'sum'}]])
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = grouped.aggregate(d)
+        tm.assert_frame_equal(result, expected)
 
+        # Test 2
+        d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
+                         ['D', ['sum']]])
         # this uses column selection & renaming
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            expected = grouped.aggregate(d3)
-
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = grouped.aggregate(d)
         tm.assert_frame_equal(result, expected)
-        tm.assert_frame_equal(result2, expected)
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
 
 """
-we test .agg behavior / note that .apply is tested
-generally in test_groupby.py
+test cython .agg behavior
 """
 
 from __future__ import print_function
@@ -74,23 +73,23 @@ def test_cython_agg_boolean(self):
     def test_cython_agg_nothing_to_agg(self):
         frame = DataFrame({'a': np.random.randint(0, 5, 50),
                            'b': ['foo', 'bar'] * 25})
-        with tm.assert_raises_regex(DataError,
-                                    "No numeric types to aggregate"):
+        msg = "No numeric types to aggregate"
+
+        with tm.assert_raises_regex(DataError, msg):
             frame.groupby('a')['b'].mean()
 
         frame = DataFrame({'a': np.random.randint(0, 5, 50),
                            'b': ['foo', 'bar'] * 25})
-        with tm.assert_raises_regex(DataError,
-                                    "No numeric types to aggregate"):
+        with tm.assert_raises_regex(DataError, msg):
             frame[['b']].groupby(frame['a']).mean()
 
     def test_cython_agg_nothing_to_agg_with_dates(self):
         frame = DataFrame({'a': np.random.randint(0, 5, 50),
                            'b': ['foo', 'bar'] * 25,
                            'dates': pd.date_range('now', periods=50,
                                                   freq='T')})
-        with tm.assert_raises_regex(DataError,
-                                    "No numeric types to aggregate"):
+        msg = "No numeric types to aggregate"
+        with tm.assert_raises_regex(DataError, msg):
             frame.groupby('b').dates.mean()
 
     def test_cython_agg_frame_columns(self):
@@ -110,8 +109,7 @@ def test_cython_agg_return_dict(self):
              'C': np.random.randn(8),
              'D': np.random.randn(8)})
 
-        ts = df.groupby('A')['B'].agg(
-            lambda x: x.value_counts().to_dict())
+        ts = df.groupby('A')['B'].agg(lambda x: x.value_counts().to_dict())
         expected = Series([{'two': 1, 'one': 1, 'three': 1},
                            {'two': 2, 'one': 2, 'three': 1}],
                           index=Index(['bar', 'foo'], name='A'),