Skip to content

Commit

Permalink
TST: Parameterized groupby sort parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
Licht-T committed Sep 22, 2017
1 parent 1e13713 commit 5567ac1
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 25 deletions.
40 changes: 23 additions & 17 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1791,18 +1791,19 @@ def aggfun(ser):
agged2 = df.groupby(keys).aggregate(aggfun)
assert len(agged2.columns) + 1 == len(df.columns)

def test_groupby_level(self):
@pytest.mark.parametrize('sort', [True, False])
def test_groupby_level(self, sort):
frame = self.mframe
deleveled = frame.reset_index()

result0 = frame.groupby(level=0, sort=False).sum()
result1 = frame.groupby(level=1, sort=False).sum()
result0 = frame.groupby(level=0, sort=sort).sum()
result1 = frame.groupby(level=1, sort=sort).sum()

expected0 = frame.groupby(deleveled['first'].values).sum()
expected1 = frame.groupby(deleveled['second'].values).sum()
expected0 = frame.groupby(deleveled['first'].values, sort=sort).sum()
expected1 = frame.groupby(deleveled['second'].values, sort=sort).sum()

expected0 = expected0.reindex(frame.index.levels[0])
expected1 = expected1.reindex(frame.index.levels[1])
expected0.index.name = 'first'
expected1.index.name = 'second'

assert result0.index.name == 'first'
assert result1.index.name == 'second'
Expand All @@ -1813,15 +1814,15 @@ def test_groupby_level(self):
assert result1.index.name == frame.index.names[1]

# groupby level name
result0 = frame.groupby(level='first', sort=False).sum()
result1 = frame.groupby(level='second', sort=False).sum()
result0 = frame.groupby(level='first', sort=sort).sum()
result1 = frame.groupby(level='second', sort=sort).sum()
assert_frame_equal(result0, expected0)
assert_frame_equal(result1, expected1)

# axis=1

result0 = frame.T.groupby(level=0, axis=1, sort=False).sum()
result1 = frame.T.groupby(level=1, axis=1, sort=False).sum()
result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum()
result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum()
assert_frame_equal(result0, expected0.T)
assert_frame_equal(result1, expected1.T)

Expand All @@ -1835,14 +1836,15 @@ def test_groupby_level_index_names(self):
df.groupby(level='exp')
pytest.raises(ValueError, df.groupby, level='foo')

def test_groupby_level_with_nas(self):
@pytest.mark.parametrize('sort', [True, False])
def test_groupby_level_with_nas(self, sort):
index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
labels=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1,
2, 3]])

# factorizing doesn't confuse things
s = Series(np.arange(8.), index=index)
result = s.groupby(level=0).sum()
result = s.groupby(level=0, sort=sort).sum()
expected = Series([6., 22.], index=[0, 1])
assert_series_equal(result, expected)

Expand All @@ -1852,7 +1854,7 @@ def test_groupby_level_with_nas(self):

# factorizing doesn't confuse things
s = Series(np.arange(8.), index=index)
result = s.groupby(level=0).sum()
result = s.groupby(level=0, sort=sort).sum()
expected = Series([6., 18.], index=[0.0, 1.0])
assert_series_equal(result, expected)

Expand Down Expand Up @@ -1936,9 +1938,13 @@ def test_groupby_complex(self):
result = a.sum(level=0)
assert_series_equal(result, expected)

def test_level_preserve_order(self):
grouped = self.mframe.groupby(level=0, sort=False)
exp_labels = np.array([0, 0, 0, 1, 1, 2, 2, 3, 3, 3], np.intp)
@pytest.mark.parametrize('sort,labels', [
[True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]],
[False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]]
])
def test_level_preserve_order(self, sort, labels):
grouped = self.mframe.groupby(level=0, sort=sort)
exp_labels = np.array(labels, np.intp)
assert_almost_equal(grouped.grouper.labels[0], exp_labels)

def test_grouping_labels(self):
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/groupby/test_whitelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,14 @@ def raw_frame():


@pytest.mark.parametrize(
"op, level, axis, skipna",
"op, level, axis, skipna, sort",
product(AGG_FUNCTIONS,
lrange(2), lrange(2),
[True, False],
[True, False]))
def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna):
def test_regression_whitelist_methods(
raw_frame, op, level,
axis, skipna, sort):
# GH6944
# explicity test the whitelest methods

Expand All @@ -188,15 +191,19 @@ def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna):
frame = raw_frame.T

if op in AGG_FUNCTIONS_WITH_SKIPNA:
grouped = frame.groupby(level=level, axis=axis, sort=False)
grouped = frame.groupby(level=level, axis=axis, sort=sort)
result = getattr(grouped, op)(skipna=skipna)
expected = getattr(frame, op)(level=level, axis=axis,
skipna=skipna)
if sort:
expected = expected.sort_index(axis=axis, level=level)
tm.assert_frame_equal(result, expected)
else:
grouped = frame.groupby(level=level, axis=axis, sort=False)
grouped = frame.groupby(level=level, axis=axis, sort=sort)
result = getattr(grouped, op)()
expected = getattr(frame, op)(level=level, axis=axis)
if sort:
expected = expected.sort_index(axis=axis, level=level)
tm.assert_frame_equal(result, expected)


Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1392,17 +1392,21 @@ def test_count(self):
AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
'mad', 'std', 'var', 'sem']

def test_series_group_min_max(self):
@pytest.mark.parametrize('sort', [True, False])
def test_series_group_min_max(self, sort):
for op, level, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2),
[False, True]):
grouped = self.series.groupby(level=level, sort=False)
grouped = self.series.groupby(level=level, sort=sort)
aggf = lambda x: getattr(x, op)(skipna=skipna)
# skipna=True
leftside = grouped.agg(aggf)
rightside = getattr(self.series, op)(level=level, skipna=skipna)
if sort:
rightside = rightside.sort_index(level=level)
tm.assert_series_equal(leftside, rightside)

def test_frame_group_ops(self):
@pytest.mark.parametrize('sort', [True, False])
def test_frame_group_ops(self, sort):
self.frame.iloc[1, [1, 2]] = np.nan
self.frame.iloc[7, [0, 1]] = np.nan

Expand All @@ -1415,7 +1419,7 @@ def test_frame_group_ops(self):
else:
frame = self.frame.T

grouped = frame.groupby(level=level, axis=axis, sort=False)
grouped = frame.groupby(level=level, axis=axis, sort=sort)

pieces = []

Expand All @@ -1426,6 +1430,9 @@ def aggf(x):
leftside = grouped.agg(aggf)
rightside = getattr(frame, op)(level=level, axis=axis,
skipna=skipna)
if sort:
rightside = rightside.sort_index(level=level, axis=axis)
frame = frame.sort_index(level=level, axis=axis)

# for good measure, groupby detail
level_index = frame._get_axis(axis).levels[level]
Expand Down

0 comments on commit 5567ac1

Please sign in to comment.