Skip to content

Commit

Permalink
PERF: Correct signature for group_nth / group_object (pandas-dev#19579)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger authored and harisbal committed Feb 28, 2018
1 parent 6c88f53 commit e30498a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
16 changes: 16 additions & 0 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,22 @@ def time_series_nth(self, df):
df[1].groupby(df[0]).nth(0)


class NthObject(object):

goal_time = 0.2

def setup_cache(self):
df = DataFrame(np.random.randint(1, 100, (10000,)), columns=['g'])
df['obj'] = ['a'] * 5000 + ['b'] * 5000
return df

def time_nth(self, df):
df.groupby('g').nth(5)

def time_nth_last(self, df):
df.groupby('g').last()


class DateAttributes(object):

goal_time = 0.2
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,7 @@ Groupby/Resample/Rolling
- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`)
- Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`)
- Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)

Sparse
^^^^^^
Expand Down
10 changes: 8 additions & 2 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def group_nth_object(ndarray[object, ndim=2] out,
ndarray[int64_t] counts,
ndarray[object, ndim=2] values,
ndarray[int64_t] labels,
int64_t rank):
int64_t rank,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
"""
Expand All @@ -47,6 +48,8 @@ def group_nth_object(ndarray[object, ndim=2] out,
ndarray[int64_t, ndim=2] nobs
ndarray[object, ndim=2] resx

assert min_count == -1, "'min_count' only used in add and prod"

nobs = np.zeros((<object> out).shape, dtype=np.int64)
resx = np.empty((<object> out).shape, dtype=object)

Expand Down Expand Up @@ -80,7 +83,8 @@ def group_nth_object(ndarray[object, ndim=2] out,
def group_last_object(ndarray[object, ndim=2] out,
ndarray[int64_t] counts,
ndarray[object, ndim=2] values,
ndarray[int64_t] labels):
ndarray[int64_t] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
"""
Expand All @@ -91,6 +95,8 @@ def group_last_object(ndarray[object, ndim=2] out,
ndarray[object, ndim=2] resx
ndarray[int64_t, ndim=2] nobs

assert min_count == -1, "'min_count' only used in add and prod"

nobs = np.zeros((<object> out).shape, dtype=np.int64)
resx = np.empty((<object> out).shape, dtype=object)

Expand Down

0 comments on commit e30498a

Please sign in to comment.