Skip to content

Commit

Permalink
make sort=None for groupby
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Mar 8, 2015
1 parent fa981f1 commit 3625dfe
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2861,7 +2861,7 @@ def clip_lower(self, threshold):

return self.where((self >= threshold) | isnull(self), threshold)

def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=None,
group_keys=True, squeeze=False):
"""
Group series using mapper (dict or key function, apply given function
Expand Down
44 changes: 34 additions & 10 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,19 +199,25 @@ def __new__(cls, *args, **kwargs):
cls = TimeGrouper
return super(Grouper, cls).__new__(cls)

def __init__(self, key=None, level=None, freq=None, axis=0, sort=False):
def __init__(self, key=None, level=None, freq=None, axis=0, sort=None):
self.key=key
self.level=level
self.freq=freq
self.axis=axis
self.sort=sort
self._sort=sort

self.grouper=None
self.obj=None
self.indexer=None
self.binner=None
self.grouper=None

@property
def sort(self):
if self._sort is None:
return True
return self._sort

@property
def ax(self):
return self.grouper
Expand All @@ -233,7 +239,7 @@ def _get_grouper(self, obj):
level=self.level, sort=self.sort)
return self.binner, self.grouper, self.obj

def _set_grouper(self, obj, sort=False):
def _set_grouper(self, obj, sort=None):
"""
given an object and the specifcations, setup the internal grouper for this particular specification
Expand Down Expand Up @@ -359,7 +365,7 @@ class GroupBy(PandasObject):

def __init__(self, obj, keys=None, axis=0, level=None,
grouper=None, exclusions=None, selection=None, as_index=True,
sort=True, group_keys=True, squeeze=False):
sort=None, group_keys=True, squeeze=False):
self._selection = selection

if isinstance(obj, NDFrame):
Expand All @@ -375,7 +381,7 @@ def __init__(self, obj, keys=None, axis=0, level=None,

self.as_index = as_index
self.keys = keys
self.sort = sort
self._sort = sort
self.group_keys = group_keys
self.squeeze = squeeze

Expand All @@ -388,6 +394,12 @@ def __init__(self, obj, keys=None, axis=0, level=None,
self.grouper = grouper
self.exclusions = set(exclusions) if exclusions else set()

@property
def sort(self):
if self._sort is None:
return True
return self._sort

def __len__(self):
return len(self.indices)

Expand Down Expand Up @@ -1214,11 +1226,17 @@ class BaseGrouper(object):
This is an internal Grouper class, which actually holds the generated groups
"""

def __init__(self, axis, groupings, sort=True, group_keys=True):
def __init__(self, axis, groupings, sort=None, group_keys=True):
self._filter_empty_groups = self.compressed = len(groupings) != 1
self.axis, self.groupings, self.sort, self.group_keys = \
self.axis, self.groupings, self._sort, self.group_keys = \
axis, groupings, sort, group_keys

@property
def sort(self):
if self._sort is None:
return True
return self._sort

@property
def shape(self):
return tuple(ping.ngroups for ping in self.groupings)
Expand Down Expand Up @@ -1857,13 +1875,13 @@ class Grouping(object):
"""

def __init__(self, index, grouper=None, obj=None, name=None, level=None,
sort=True, in_axis=False):
sort=None, in_axis=False):

self.name = name
self.level = level
self.grouper = _convert_grouper(index, grouper)
self.index = index
self.sort = sort
self._sort = sort
self.obj = obj
self.in_axis = in_axis

Expand Down Expand Up @@ -1926,7 +1944,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,

# must have an ordered categorical
if self.sort:
if not self.grouper.ordered:
if self._sort and not self.grouper.ordered:

This comment has been minimized.

Copy link
@jankatins

jankatins Mar 9, 2015

Maybe a comment would be nice here to prevent an accidental removal of one of the checks in the future

# check directly, so that only a user supplied `sort=True` raises, but the default 
# `sort=None`simple takes the order of the categories, even if they are 
# considered 'not ordered'
if self._sort is True and not self.grouper.ordered:
    raise...
raise ValueError("cannot sort by an unordered Categorical in the grouper\n"
"you can set sort=False in the groupby expression or\n"
"make the categorical ordered by using .set_ordered(True)\n")
Expand Down Expand Up @@ -1972,6 +1990,12 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
from pandas import to_timedelta
self.grouper = to_timedelta(self.grouper)

@property
def sort(self):
if self._sort is None:
return True
return self._sort

def __repr__(self):
return 'Grouping(%s)' % self.name

Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -4756,7 +4756,9 @@ def test_groupby_blacklist(self):

def test_tab_completion(self):
grp = self.mframe.groupby(level='second')
results = set([v for v in dir(grp) if not v.startswith('_')])

# sort is an accessor here
results = set([v for v in dir(grp) if not v.startswith('_')])-set(['sort'])
expected = set(['A','B','C',
'agg','aggregate','apply','boxplot','filter','first','get_group',
'groups','hist','indices','last','max','mean','median',
Expand Down

0 comments on commit 3625dfe

Please sign in to comment.