Skip to content

Commit

Permalink
Boyscouting -- cleaning up code
Browse files Browse the repository at this point in the history
  • Loading branch information
alysivji committed Dec 27, 2017
1 parent b083ef9 commit dfd192e
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 119 deletions.
80 changes: 37 additions & 43 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# -*- coding: utf-8 -*-

"""
we test .agg behavior / note that .apply is tested
generally in test_groupby.py
test .agg behavior / note that .apply is tested generally in test_groupby.py
"""

import numpy as np
Expand Down Expand Up @@ -34,11 +33,10 @@ def setup_method(self, method):
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.array(
np.random.randn(8), dtype='float32')})
'D': np.array(np.random.randn(8), dtype='float32')})

index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
'three']],
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
Expand Down Expand Up @@ -86,22 +84,21 @@ def test_agg_apply_corner(self):
assert self.ts.dtype == np.float64

# groupby float64 values results in Float64Index
exp = Series([],
dtype=np.float64,
exp = Series([], dtype=np.float64,
index=pd.Index([], dtype=np.float64))
tm.assert_series_equal(grouped.sum(), exp)
tm.assert_series_equal(grouped.agg(np.sum), exp)
tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False)
tm.assert_series_equal(grouped.apply(np.sum), exp,
check_index_type=False)

# DataFrame
grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan)
exp_df = DataFrame(columns=self.tsframe.columns,
dtype=float,
exp_df = DataFrame(columns=self.tsframe.columns, dtype=float,
index=pd.Index([], dtype=np.float64))
tm.assert_frame_equal(grouped.sum(), exp_df, check_names=False)
tm.assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False)
tm.assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0],
check_names=False)
check_names=False)

def test_agg_grouping_is_list_tuple(self):
from pandas.core.groupby import Grouping
Expand Down Expand Up @@ -142,11 +139,14 @@ def _check_results(grouped):
tm.assert_frame_equal(result, expected)

# group frame by function dict
result = grouped.agg(OrderedDict([['A', 'var'], ['B', 'std'],
['C', 'mean'], ['D', 'sem']]))
expected = DataFrame(OrderedDict([['A', grouped['A'].var(
)], ['B', grouped['B'].std()], ['C', grouped['C'].mean()],
['D', grouped['D'].sem()]]))
result = grouped.agg(OrderedDict([['A', 'var'],
['B', 'std'],
['C', 'mean'],
['D', 'sem']]))
expected = DataFrame(OrderedDict([['A', grouped['A'].var()],
['B', grouped['B'].std()],
['C', grouped['C'].mean()],
['D', grouped['D'].sem()]]))
tm.assert_frame_equal(result, expected)

by_weekday = self.tsframe.groupby(lambda x: x.weekday())
Expand Down Expand Up @@ -264,10 +264,10 @@ def bar(x):
return np.std(x, ddof=1)

# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
d = OrderedDict([['C', np.mean], ['D', OrderedDict(
[['foo', np.mean], ['bar', np.std]])]])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
d = OrderedDict([['C', np.mean],
['D', OrderedDict([['foo', np.mean],
['bar', np.std]])]])
result = grouped.aggregate(d)

d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]])
Expand All @@ -279,31 +279,25 @@ def test_multi_function_flexible_mix(self):
# GH #1268
grouped = self.df.groupby('A')

d = OrderedDict([['C', OrderedDict([['foo', 'mean'],
['bar', 'std']])], ['D', 'sum']])

# Expected
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
['D', {'sum': 'sum'}]])
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = grouped.aggregate(d)

d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'],
['bar', 'std']])],
['D', ['sum']]])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
expected = grouped.aggregate(d)

# Test 1
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
['D', 'sum']])
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result2 = grouped.aggregate(d2)

d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'],
['bar', 'std']])],
['D', {'sum': 'sum'}]])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped.aggregate(d)
tm.assert_frame_equal(result, expected)

# Test 2
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
['D', ['sum']]])
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
expected = grouped.aggregate(d3)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped.aggregate(d)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
18 changes: 8 additions & 10 deletions pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# -*- coding: utf-8 -*-

"""
we test .agg behavior / note that .apply is tested
generally in test_groupby.py
test cython .agg behavior
"""

from __future__ import print_function
Expand Down Expand Up @@ -74,23 +73,23 @@ def test_cython_agg_boolean(self):
def test_cython_agg_nothing_to_agg(self):
frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': ['foo', 'bar'] * 25})
with tm.assert_raises_regex(DataError,
"No numeric types to aggregate"):
msg = "No numeric types to aggregate"

with tm.assert_raises_regex(DataError, msg):
frame.groupby('a')['b'].mean()

frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': ['foo', 'bar'] * 25})
with tm.assert_raises_regex(DataError,
"No numeric types to aggregate"):
with tm.assert_raises_regex(DataError, msg):
frame[['b']].groupby(frame['a']).mean()

def test_cython_agg_nothing_to_agg_with_dates(self):
frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': ['foo', 'bar'] * 25,
'dates': pd.date_range('now', periods=50,
freq='T')})
with tm.assert_raises_regex(DataError,
"No numeric types to aggregate"):
msg = "No numeric types to aggregate"
with tm.assert_raises_regex(DataError, msg):
frame.groupby('b').dates.mean()

def test_cython_agg_frame_columns(self):
Expand All @@ -110,8 +109,7 @@ def test_cython_agg_return_dict(self):
'C': np.random.randn(8),
'D': np.random.randn(8)})

ts = df.groupby('A')['B'].agg(
lambda x: x.value_counts().to_dict())
ts = df.groupby('A')['B'].agg(lambda x: x.value_counts().to_dict())
expected = Series([{'two': 1, 'one': 1, 'three': 1},
{'two': 2, 'one': 2, 'three': 1}],
index=Index(['bar', 'foo'], name='A'),
Expand Down
Loading

0 comments on commit dfd192e

Please sign in to comment.