Skip to content

Commit

Permalink
Delete cython class, all tests are grouped by module
Browse files Browse the repository at this point in the history
  • Loading branch information
alysivji committed Dec 30, 2017
1 parent d48a149 commit 1add9dd
Showing 1 changed file with 180 additions and 174 deletions.
354 changes: 180 additions & 174 deletions pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,179 +17,185 @@
import pandas.util.testing as tm


class TestGroupByAggregateCython(object):

@pytest.mark.parametrize('op', [
'count',
'sum',
'std',
'var',
'sem',
'mean',
'median',
'prod',
'min',
'max',
])
def test_cythonized_aggers(self, op):
data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan],
'B': ['A', 'B'] * 6,
'C': np.random.randn(12)}
df = DataFrame(data)
df.loc[2:10:2, 'C'] = nan

def _testit(name):

op = lambda x: getattr(x, name)()

# single column
grouped = df.drop(['B'], axis=1).groupby('A')
exp = {}
for cat, group in grouped:
exp[cat] = op(group['C'])
exp = DataFrame({'C': exp})
exp.index.name = 'A'
result = op(grouped)
tm.assert_frame_equal(result, exp)

# multiple columns
grouped = df.groupby(['A', 'B'])
expd = {}
for (cat1, cat2), group in grouped:
expd.setdefault(cat1, {})[cat2] = op(group['C'])
exp = DataFrame(expd).T.stack(dropna=False)
exp.index.names = ['A', 'B']
exp.name = 'C'

result = op(grouped)['C']
if name in ['sum', 'prod']:
tm.assert_series_equal(result, exp)

_testit(op)

def test_cython_agg_boolean(self):
frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': np.random.randint(0, 2, 50).astype('bool')})
result = frame.groupby('a')['b'].mean()
expected = frame.groupby('a')['b'].agg(np.mean)

tm.assert_series_equal(result, expected)

def test_cython_agg_nothing_to_agg(self):
frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': ['foo', 'bar'] * 25})
msg = "No numeric types to aggregate"

with tm.assert_raises_regex(DataError, msg):
frame.groupby('a')['b'].mean()

frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': ['foo', 'bar'] * 25})
with tm.assert_raises_regex(DataError, msg):
frame[['b']].groupby(frame['a']).mean()

def test_cython_agg_nothing_to_agg_with_dates(self):
frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': ['foo', 'bar'] * 25,
'dates': pd.date_range('now', periods=50,
freq='T')})
msg = "No numeric types to aggregate"
with tm.assert_raises_regex(DataError, msg):
frame.groupby('b').dates.mean()

def test_cython_agg_frame_columns(self):
# #2113
df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})

df.groupby(level=0, axis='columns').mean()
df.groupby(level=0, axis='columns').mean()
df.groupby(level=0, axis='columns').mean()
df.groupby(level=0, axis='columns').mean()

def test_cython_agg_return_dict(self):
# GH 16741
df = DataFrame(
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})

ts = df.groupby('A')['B'].agg(lambda x: x.value_counts().to_dict())
expected = Series([{'two': 1, 'one': 1, 'three': 1},
{'two': 2, 'one': 2, 'three': 1}],
index=Index(['bar', 'foo'], name='A'),
name='B')
tm.assert_series_equal(ts, expected)

def test_cython_fail_agg(self):
dr = bdate_range('1/1/2000', periods=50)
ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr)

grouped = ts.groupby(lambda x: x.month)
summed = grouped.sum()
expected = grouped.agg(np.sum)
tm.assert_series_equal(summed, expected)

@pytest.mark.parametrize('op, targop', [
('mean', np.mean),
('median', np.median),
('var', np.var),
('add', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
])
def test__cython_agg_general(self, op, targop):
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

result = df.groupby(labels)._cython_agg_general(op)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise

@pytest.mark.parametrize('op, targop', [
('mean', np.mean),
('median', lambda x: np.median(x) if len(x) > 0 else np.nan),
('var', lambda x: np.var(x, ddof=1)),
('min', np.min),
('max', np.max), ]
)
def test_cython_agg_empty_buckets(self, op, targop):
df = pd.DataFrame([11, 12, 13])
grps = range(0, 55, 5)

# calling _cython_agg_general directly, instead of via the user API
# which sets different values for min_count, so do that here.
result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op)
expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x))
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op,)
raise

def test_cython_agg_empty_buckets_nanops(self):
# GH-18869 can't call nanops on empty groups, so hardcode expected
# for these
df = pd.DataFrame([11, 12, 13], columns=['a'])
grps = range(0, 25, 5)
# add / sum
result = df.groupby(pd.cut(df['a'], grps))._cython_agg_general('add')
intervals = pd.interval_range(0, 20, freq=5)
expected = pd.DataFrame(
{"a": [0, 0, 36, 0]},
index=pd.CategoricalIndex(intervals, name='a', ordered=True))
@pytest.mark.parametrize('op', [
'count',
'sum',
'std',
'var',
'sem',
'mean',
'median',
'prod',
'min',
'max',
])
def test_cythonized_aggers(op):
data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan],
'B': ['A', 'B'] * 6,
'C': np.random.randn(12)}
df = DataFrame(data)
df.loc[2:10:2, 'C'] = nan

def _testit(name):

op = lambda x: getattr(x, name)()

# single column
grouped = df.drop(['B'], axis=1).groupby('A')
exp = {}
for cat, group in grouped:
exp[cat] = op(group['C'])
exp = DataFrame({'C': exp})
exp.index.name = 'A'
result = op(grouped)
tm.assert_frame_equal(result, exp)

# multiple columns
grouped = df.groupby(['A', 'B'])
expd = {}
for (cat1, cat2), group in grouped:
expd.setdefault(cat1, {})[cat2] = op(group['C'])
exp = DataFrame(expd).T.stack(dropna=False)
exp.index.names = ['A', 'B']
exp.name = 'C'

result = op(grouped)['C']
if name in ['sum', 'prod']:
tm.assert_series_equal(result, exp)

_testit(op)


def test_cython_agg_boolean():
frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': np.random.randint(0, 2, 50).astype('bool')})
result = frame.groupby('a')['b'].mean()
expected = frame.groupby('a')['b'].agg(np.mean)

tm.assert_series_equal(result, expected)


def test_cython_agg_nothing_to_agg():
frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': ['foo', 'bar'] * 25})
msg = "No numeric types to aggregate"

with tm.assert_raises_regex(DataError, msg):
frame.groupby('a')['b'].mean()

frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': ['foo', 'bar'] * 25})
with tm.assert_raises_regex(DataError, msg):
frame[['b']].groupby(frame['a']).mean()


def test_cython_agg_nothing_to_agg_with_dates():
frame = DataFrame({'a': np.random.randint(0, 5, 50),
'b': ['foo', 'bar'] * 25,
'dates': pd.date_range('now', periods=50, freq='T')})
msg = "No numeric types to aggregate"
with tm.assert_raises_regex(DataError, msg):
frame.groupby('b').dates.mean()


def test_cython_agg_frame_columns():
# #2113
df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})

df.groupby(level=0, axis='columns').mean()
df.groupby(level=0, axis='columns').mean()
df.groupby(level=0, axis='columns').mean()
df.groupby(level=0, axis='columns').mean()


def test_cython_agg_return_dict():
# GH 16741
df = DataFrame(
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})

ts = df.groupby('A')['B'].agg(lambda x: x.value_counts().to_dict())
expected = Series([{'two': 1, 'one': 1, 'three': 1},
{'two': 2, 'one': 2, 'three': 1}],
index=Index(['bar', 'foo'], name='A'),
name='B')
tm.assert_series_equal(ts, expected)


def test_cython_fail_agg():
dr = bdate_range('1/1/2000', periods=50)
ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr)

grouped = ts.groupby(lambda x: x.month)
summed = grouped.sum()
expected = grouped.agg(np.sum)
tm.assert_series_equal(summed, expected)


@pytest.mark.parametrize('op, targop', [
('mean', np.mean),
('median', np.median),
('var', np.var),
('add', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
])
def test__cython_agg_general(op, targop):
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

result = df.groupby(labels)._cython_agg_general(op)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)

# prod
result = df.groupby(pd.cut(df['a'], grps))._cython_agg_general('prod')
expected = pd.DataFrame(
{"a": [1, 1, 1716, 1]},
index=pd.CategoricalIndex(intervals, name='a', ordered=True))
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise


@pytest.mark.parametrize('op, targop', [
('mean', np.mean),
('median', lambda x: np.median(x) if len(x) > 0 else np.nan),
('var', lambda x: np.var(x, ddof=1)),
('min', np.min),
('max', np.max), ]
)
def test_cython_agg_empty_buckets(op, targop):
df = pd.DataFrame([11, 12, 13])
grps = range(0, 55, 5)

# calling _cython_agg_general directly, instead of via the user API
# which sets different values for min_count, so do that here.
result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op)
expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x))
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op,)
raise


def test_cython_agg_empty_buckets_nanops():
# GH-18869 can't call nanops on empty groups, so hardcode expected
# for these
df = pd.DataFrame([11, 12, 13], columns=['a'])
grps = range(0, 25, 5)
# add / sum
result = df.groupby(pd.cut(df['a'], grps))._cython_agg_general('add')
intervals = pd.interval_range(0, 20, freq=5)
expected = pd.DataFrame(
{"a": [0, 0, 36, 0]},
index=pd.CategoricalIndex(intervals, name='a', ordered=True))
tm.assert_frame_equal(result, expected)

# prod
result = df.groupby(pd.cut(df['a'], grps))._cython_agg_general('prod')
expected = pd.DataFrame(
{"a": [1, 1, 1716, 1]},
index=pd.CategoricalIndex(intervals, name='a', ordered=True))
tm.assert_frame_equal(result, expected)

0 comments on commit 1add9dd

Please sign in to comment.