Skip to content

Commit

Permalink
BUG: resample fixes
Browse files Browse the repository at this point in the history
make sure .resample(...).plot() warns and returns a correct plotting object
make sure that .groupby(...).resample(....) is hitting warnings when appropriate

closes #12448
  • Loading branch information
jreback committed Mar 8, 2016
1 parent 07c84d5 commit 4a49f1b
Show file tree
Hide file tree
Showing 8 changed files with 245 additions and 81 deletions.
25 changes: 24 additions & 1 deletion doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ other anchored offsets like ``MonthBegin`` and ``YearBegin``.
Resample API
^^^^^^^^^^^^

Like the change in the window functions API :ref:`above <whatsnew_0180.enhancements.moments>`, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`, :issue:`12332`, :issue:`12334`, :issue:`12348`).
Like the change in the window functions API :ref:`above <whatsnew_0180.enhancements.moments>`, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`, :issue:`12332`, :issue:`12334`, :issue:`12348`, :issue:`12448`).

.. ipython:: python

Expand Down Expand Up @@ -774,6 +774,29 @@ You could also specify a ``how`` directly
use .resample(...).mean() instead of .resample(...)
assignment will have no effect as you are working on a copy

There is a situation where the new API can not perform all the operations when using original code.
This code is intending to resample every 2s, take the ``mean`` AND then take the ``min` of those results.

.. code-block:: python

In [4]: df.resample('2s').min()
Out[4]:
A 0.433985
B 0.314582
C 0.357096
D 0.531096
dtype: float64

The new API will:

.. ipython: python

df.resample('2s').min()

Good news is the return dimensions will differ (between the new API and the old API), so this should loudly raise
an exception.


**New API**:

Now, you can write ``.resample`` as a 2-stage operation like groupby, which
Expand Down
55 changes: 8 additions & 47 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3932,57 +3932,18 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
Freq: 3T, dtype: int64
"""
from pandas.tseries.resample import resample
from pandas.tseries.resample import (resample,
_maybe_process_deprecations)

axis = self._get_axis_number(axis)
r = resample(self, freq=rule, label=label, closed=closed,
axis=axis, kind=kind, loffset=loffset,
fill_method=fill_method, convention=convention,
limit=limit, base=base)

# deprecation warnings
# but call methods anyhow

if how is not None:

# .resample(..., how='sum')
if isinstance(how, compat.string_types):
method = "{0}()".format(how)

# .resample(..., how=lambda x: ....)
else:
method = ".apply(<func>)"

# if we have both a how and fill_method, then show
# the following warning
if fill_method is None:
warnings.warn("how in .resample() is deprecated\n"
"the new syntax is "
".resample(...).{method}".format(
method=method),
FutureWarning, stacklevel=2)
r = r.aggregate(how)

if fill_method is not None:

# show the prior function call
method = '.' + method if how is not None else ''

args = "limit={0}".format(limit) if limit is not None else ""
warnings.warn("fill_method is deprecated to .resample()\n"
"the new syntax is .resample(...){method}"
".{fill_method}({args})".format(
method=method,
fill_method=fill_method,
args=args),
FutureWarning, stacklevel=2)

if how is not None:
r = getattr(r, fill_method)(limit=limit)
else:
r = r.aggregate(fill_method, limit=limit)

return r
convention=convention,
base=base)
return _maybe_process_deprecations(r,
how=how,
fill_method=fill_method,
limit=limit)

def first(self, offset):
"""
Expand Down
66 changes: 55 additions & 11 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1044,27 +1044,71 @@ def ohlc(self):

@Substitution(name='groupby')
@Appender(_doc_template)
def resample(self, rule, **kwargs):
def resample(self, rule, how=None, fill_method=None, limit=None, **kwargs):
"""
Provide resampling when using a TimeGrouper
Return a new grouper with our resampler appended
"""
from pandas.tseries.resample import TimeGrouper
from pandas.tseries.resample import (TimeGrouper,
_maybe_process_deprecations)
gpr = TimeGrouper(axis=self.axis, freq=rule, **kwargs)

# we by definition have at least 1 key as we are already a grouper
groupings = list(self.grouper.groupings)
groupings.append(gpr)

return self.__class__(self.obj,
keys=groupings,
axis=self.axis,
level=self.level,
as_index=self.as_index,
sort=self.sort,
group_keys=self.group_keys,
squeeze=self.squeeze,
selection=self._selection)
result = self.__class__(self.obj,
keys=groupings,
axis=self.axis,
level=self.level,
as_index=self.as_index,
sort=self.sort,
group_keys=self.group_keys,
squeeze=self.squeeze,
selection=self._selection)

return _maybe_process_deprecations(result,
how=how,
fill_method=fill_method,
limit=limit)

@Substitution(name='groupby')
@Appender(_doc_template)
def pad(self, limit=None):
"""
Forward fill the values
Parameters
----------
limit : integer, optional
limit of how many values to fill
See Also
--------
Series.fillna
DataFrame.fillna
"""
return self.apply(lambda x: x.ffill(limit=limit))
ffill = pad

@Substitution(name='groupby')
@Appender(_doc_template)
def backfill(self, limit=None):
"""
Backward fill the values
Parameters
----------
limit : integer, optional
limit of how many values to fill
See Also
--------
Series.fillna
DataFrame.fillna
"""
return self.apply(lambda x: x.bfill(limit=limit))
bfill = backfill

@Substitution(name='groupby')
@Appender(_doc_template)
Expand Down
19 changes: 3 additions & 16 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
from pandas.util.decorators import cache_readonly
import pandas.core.common as com
import pandas.util.testing as tm
from pandas.util.testing import ensure_clean
from pandas.util.testing import (ensure_clean,
assert_is_valid_plot_return_object)

from pandas.core.config import set_option

import numpy as np
Expand Down Expand Up @@ -3916,21 +3918,6 @@ def test_plot_kwargs(self):
self.assertEqual(len(res['a'].collections), 1)


def assert_is_valid_plot_return_object(objs):
import matplotlib.pyplot as plt
if isinstance(objs, np.ndarray):
for el in objs.flat:
assert isinstance(el, plt.Axes), ('one of \'objs\' is not a '
'matplotlib Axes instance, '
'type encountered {0!r}'
''.format(el.__class__.__name__))
else:
assert isinstance(objs, (plt.Artist, tuple, dict)), \
('objs is neither an ndarray of Artist instances nor a '
'single Artist instance, tuple, or dict, "objs" is a {0!r} '
''.format(objs.__class__.__name__))


def _check_plot_works(f, filterwarnings='always', **kwargs):
import matplotlib.pyplot as plt
ret = None
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5610,7 +5610,8 @@ def test_tab_completion(self):
'cumprod', 'tail', 'resample', 'cummin', 'fillna', 'cumsum',
'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', 'take',
'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', 'cov',
'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin'])
'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin',
'ffill', 'bfill', 'pad', 'backfill'])
self.assertEqual(results, expected)

def test_lexsort_indexer(self):
Expand Down
68 changes: 64 additions & 4 deletions pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def _typ(self):
def _deprecated(self):
warnings.warn(".resample() is now a deferred operation\n"
"use .resample(...).mean() instead of .resample(...)",
FutureWarning, stacklevel=2)
FutureWarning, stacklevel=3)
return self.mean()

def _make_deprecated_binop(op):
Expand Down Expand Up @@ -154,9 +154,7 @@ def __getattr__(self, attr):
if attr in self._deprecated_invalids:
raise ValueError(".resample() is now a deferred operation\n"
"\tuse .resample(...).mean() instead of "
".resample(...)\n"
"\tassignment will have no effect as you "
"are working on a copy")
".resample(...)")
if attr not in self._deprecated_valids:
self = self._deprecated()
return object.__getattribute__(self, attr)
Expand All @@ -167,6 +165,17 @@ def __setattr__(self, attr, value):
self.__class__.__name__))
object.__setattr__(self, attr, value)

def __getitem__(self, key):
try:
return super(Resampler, self).__getitem__(key)
except (KeyError, com.AbstractMethodError):

# compat for deprecated
if isinstance(self.obj, com.ABCSeries):
return self._deprecated()[key]

raise

def __setitem__(self, attr, value):
raise ValueError("cannot set items on {0}".format(
self.__class__.__name__))
Expand Down Expand Up @@ -208,6 +217,11 @@ def _assure_grouper(self):
""" make sure that we are creating our binner & grouper """
self._set_binner()

def plot(self, *args, **kwargs):
# for compat with prior versions, we want to
# have the warnings shown here and just have this work
return self._deprecated().plot(*args, **kwargs)

def aggregate(self, arg, *args, **kwargs):
"""
Apply aggregation function or functions to resampled groups, yielding
Expand Down Expand Up @@ -468,6 +482,52 @@ def f(self, _method=method):
setattr(Resampler, method, f)


def _maybe_process_deprecations(r, how=None, fill_method=None, limit=None):
""" potentially we might have a deprecation warning, show it
but call the appropriate methods anyhow """

if how is not None:

# .resample(..., how='sum')
if isinstance(how, compat.string_types):
method = "{0}()".format(how)

# .resample(..., how=lambda x: ....)
else:
method = ".apply(<func>)"

# if we have both a how and fill_method, then show
# the following warning
if fill_method is None:
warnings.warn("how in .resample() is deprecated\n"
"the new syntax is "
".resample(...).{method}".format(
method=method),
FutureWarning, stacklevel=3)
r = r.aggregate(how)

if fill_method is not None:

# show the prior function call
method = '.' + method if how is not None else ''

args = "limit={0}".format(limit) if limit is not None else ""
warnings.warn("fill_method is deprecated to .resample()\n"
"the new syntax is .resample(...){method}"
".{fill_method}({args})".format(
method=method,
fill_method=fill_method,
args=args),
FutureWarning, stacklevel=3)

if how is not None:
r = getattr(r, fill_method)(limit=limit)
else:
r = r.aggregate(fill_method, limit=limit)

return r


class DatetimeIndexResampler(Resampler):

def _get_binner_for_time(self):
Expand Down
Loading

0 comments on commit 4a49f1b

Please sign in to comment.