From 4a49f1b6a955b1620ecc20ee321a7bd384a11751 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 25 Feb 2016 20:29:22 -0500 Subject: [PATCH] BUG: resample fixes make sure .resample(...).plot() warns and returns a correct plotting object make sure that .groupby(...).resample(....) is hitting warnings when appropriate closes #12448 --- doc/source/whatsnew/v0.18.0.txt | 25 +++++++++- pandas/core/generic.py | 55 ++++------------------ pandas/core/groupby.py | 66 +++++++++++++++++++++----- pandas/tests/test_graphics.py | 19 ++------ pandas/tests/test_groupby.py | 3 +- pandas/tseries/resample.py | 68 +++++++++++++++++++++++++-- pandas/tseries/tests/test_resample.py | 68 +++++++++++++++++++++++++++ pandas/util/testing.py | 22 ++++++++- 8 files changed, 245 insertions(+), 81 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index bdc20d964a06a..4dde6bcdb5038 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -705,7 +705,7 @@ other anchored offsets like ``MonthBegin`` and ``YearBegin``. Resample API ^^^^^^^^^^^^ -Like the change in the window functions API :ref:`above `, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`, :issue:`12332`, :issue:`12334`, :issue:`12348`). +Like the change in the window functions API :ref:`above `, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`, :issue:`12332`, :issue:`12334`, :issue:`12348`, :issue:`12448`). .. ipython:: python @@ -774,6 +774,29 @@ You could also specify a ``how`` directly use .resample(...).mean() instead of .resample(...) assignment will have no effect as you are working on a copy + There is a situation where the new API can not perform all the operations when using original code. + This code is intending to resample every 2s, take the ``mean`` AND then take the ``min` of those results. + + .. code-block:: python + + In [4]: df.resample('2s').min() + Out[4]: + A 0.433985 + B 0.314582 + C 0.357096 + D 0.531096 + dtype: float64 + + The new API will: + + .. ipython: python + + df.resample('2s').min() + + Good news is the return dimensions will differ (between the new API and the old API), so this should loudly raise + an exception. + + **New API**: Now, you can write ``.resample`` as a 2-stage operation like groupby, which diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1684768eec2c4..bc92733e7c2fd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3932,57 +3932,18 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, Freq: 3T, dtype: int64 """ - from pandas.tseries.resample import resample + from pandas.tseries.resample import (resample, + _maybe_process_deprecations) axis = self._get_axis_number(axis) r = resample(self, freq=rule, label=label, closed=closed, axis=axis, kind=kind, loffset=loffset, - fill_method=fill_method, convention=convention, - limit=limit, base=base) - - # deprecation warnings - # but call methods anyhow - - if how is not None: - - # .resample(..., how='sum') - if isinstance(how, compat.string_types): - method = "{0}()".format(how) - - # .resample(..., how=lambda x: ....) - else: - method = ".apply()" - - # if we have both a how and fill_method, then show - # the following warning - if fill_method is None: - warnings.warn("how in .resample() is deprecated\n" - "the new syntax is " - ".resample(...).{method}".format( - method=method), - FutureWarning, stacklevel=2) - r = r.aggregate(how) - - if fill_method is not None: - - # show the prior function call - method = '.' + method if how is not None else '' - - args = "limit={0}".format(limit) if limit is not None else "" - warnings.warn("fill_method is deprecated to .resample()\n" - "the new syntax is .resample(...){method}" - ".{fill_method}({args})".format( - method=method, - fill_method=fill_method, - args=args), - FutureWarning, stacklevel=2) - - if how is not None: - r = getattr(r, fill_method)(limit=limit) - else: - r = r.aggregate(fill_method, limit=limit) - - return r + convention=convention, + base=base) + return _maybe_process_deprecations(r, + how=how, + fill_method=fill_method, + limit=limit) def first(self, offset): """ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 442f2132847ee..c8598639d9fad 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1044,27 +1044,71 @@ def ohlc(self): @Substitution(name='groupby') @Appender(_doc_template) - def resample(self, rule, **kwargs): + def resample(self, rule, how=None, fill_method=None, limit=None, **kwargs): """ Provide resampling when using a TimeGrouper Return a new grouper with our resampler appended """ - from pandas.tseries.resample import TimeGrouper + from pandas.tseries.resample import (TimeGrouper, + _maybe_process_deprecations) gpr = TimeGrouper(axis=self.axis, freq=rule, **kwargs) # we by definition have at least 1 key as we are already a grouper groupings = list(self.grouper.groupings) groupings.append(gpr) - return self.__class__(self.obj, - keys=groupings, - axis=self.axis, - level=self.level, - as_index=self.as_index, - sort=self.sort, - group_keys=self.group_keys, - squeeze=self.squeeze, - selection=self._selection) + result = self.__class__(self.obj, + keys=groupings, + axis=self.axis, + level=self.level, + as_index=self.as_index, + sort=self.sort, + group_keys=self.group_keys, + squeeze=self.squeeze, + selection=self._selection) + + return _maybe_process_deprecations(result, + how=how, + fill_method=fill_method, + limit=limit) + + @Substitution(name='groupby') + @Appender(_doc_template) + def pad(self, limit=None): + """ + Forward fill the values + + Parameters + ---------- + limit : integer, optional + limit of how many values to fill + + See Also + -------- + Series.fillna + DataFrame.fillna + """ + return self.apply(lambda x: x.ffill(limit=limit)) + ffill = pad + + @Substitution(name='groupby') + @Appender(_doc_template) + def backfill(self, limit=None): + """ + Backward fill the values + + Parameters + ---------- + limit : integer, optional + limit of how many values to fill + + See Also + -------- + Series.fillna + DataFrame.fillna + """ + return self.apply(lambda x: x.bfill(limit=limit)) + bfill = backfill @Substitution(name='groupby') @Appender(_doc_template) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 2fdfc7ccc37ef..45d3fd0dad855 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -17,7 +17,9 @@ from pandas.util.decorators import cache_readonly import pandas.core.common as com import pandas.util.testing as tm -from pandas.util.testing import ensure_clean +from pandas.util.testing import (ensure_clean, + assert_is_valid_plot_return_object) + from pandas.core.config import set_option import numpy as np @@ -3916,21 +3918,6 @@ def test_plot_kwargs(self): self.assertEqual(len(res['a'].collections), 1) -def assert_is_valid_plot_return_object(objs): - import matplotlib.pyplot as plt - if isinstance(objs, np.ndarray): - for el in objs.flat: - assert isinstance(el, plt.Axes), ('one of \'objs\' is not a ' - 'matplotlib Axes instance, ' - 'type encountered {0!r}' - ''.format(el.__class__.__name__)) - else: - assert isinstance(objs, (plt.Artist, tuple, dict)), \ - ('objs is neither an ndarray of Artist instances nor a ' - 'single Artist instance, tuple, or dict, "objs" is a {0!r} ' - ''.format(objs.__class__.__name__)) - - def _check_plot_works(f, filterwarnings='always', **kwargs): import matplotlib.pyplot as plt ret = None diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index d301016aa1316..947daab2017d3 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -5610,7 +5610,8 @@ def test_tab_completion(self): 'cumprod', 'tail', 'resample', 'cummin', 'fillna', 'cumsum', 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', 'cov', - 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin']) + 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', + 'ffill', 'bfill', 'pad', 'backfill']) self.assertEqual(results, expected) def test_lexsort_indexer(self): diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index ba2eb3463d169..4e1ca42710cc5 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -102,7 +102,7 @@ def _typ(self): def _deprecated(self): warnings.warn(".resample() is now a deferred operation\n" "use .resample(...).mean() instead of .resample(...)", - FutureWarning, stacklevel=2) + FutureWarning, stacklevel=3) return self.mean() def _make_deprecated_binop(op): @@ -154,9 +154,7 @@ def __getattr__(self, attr): if attr in self._deprecated_invalids: raise ValueError(".resample() is now a deferred operation\n" "\tuse .resample(...).mean() instead of " - ".resample(...)\n" - "\tassignment will have no effect as you " - "are working on a copy") + ".resample(...)") if attr not in self._deprecated_valids: self = self._deprecated() return object.__getattribute__(self, attr) @@ -167,6 +165,17 @@ def __setattr__(self, attr, value): self.__class__.__name__)) object.__setattr__(self, attr, value) + def __getitem__(self, key): + try: + return super(Resampler, self).__getitem__(key) + except (KeyError, com.AbstractMethodError): + + # compat for deprecated + if isinstance(self.obj, com.ABCSeries): + return self._deprecated()[key] + + raise + def __setitem__(self, attr, value): raise ValueError("cannot set items on {0}".format( self.__class__.__name__)) @@ -208,6 +217,11 @@ def _assure_grouper(self): """ make sure that we are creating our binner & grouper """ self._set_binner() + def plot(self, *args, **kwargs): + # for compat with prior versions, we want to + # have the warnings shown here and just have this work + return self._deprecated().plot(*args, **kwargs) + def aggregate(self, arg, *args, **kwargs): """ Apply aggregation function or functions to resampled groups, yielding @@ -468,6 +482,52 @@ def f(self, _method=method): setattr(Resampler, method, f) +def _maybe_process_deprecations(r, how=None, fill_method=None, limit=None): + """ potentially we might have a deprecation warning, show it + but call the appropriate methods anyhow """ + + if how is not None: + + # .resample(..., how='sum') + if isinstance(how, compat.string_types): + method = "{0}()".format(how) + + # .resample(..., how=lambda x: ....) + else: + method = ".apply()" + + # if we have both a how and fill_method, then show + # the following warning + if fill_method is None: + warnings.warn("how in .resample() is deprecated\n" + "the new syntax is " + ".resample(...).{method}".format( + method=method), + FutureWarning, stacklevel=3) + r = r.aggregate(how) + + if fill_method is not None: + + # show the prior function call + method = '.' + method if how is not None else '' + + args = "limit={0}".format(limit) if limit is not None else "" + warnings.warn("fill_method is deprecated to .resample()\n" + "the new syntax is .resample(...){method}" + ".{fill_method}({args})".format( + method=method, + fill_method=fill_method, + args=args), + FutureWarning, stacklevel=3) + + if how is not None: + r = getattr(r, fill_method)(limit=limit) + else: + r = r.aggregate(fill_method, limit=limit) + + return r + + class DatetimeIndexResampler(Resampler): def _get_binner_for_time(self): diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index b0e315ead2acb..4ddfc6ac573e4 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -151,6 +151,74 @@ def f(): check_stacklevel=False): self.assertIsInstance(getattr(r, op)(2), pd.Series) + # getitem compat + df = self.series.to_frame('foo') + + # same as prior versions for DataFrame + self.assertRaises(KeyError, lambda: df.resample('H')[0]) + + # compat for Series + # but we cannot be sure that we need a warning here + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.series.resample('H')[0] + expected = self.series.resample('H').mean()[0] + self.assertEqual(result, expected) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.series.resample('H')['2005-01-09 23:00:00'] + expected = self.series.resample('H').mean()['2005-01-09 23:00:00'] + self.assertEqual(result, expected) + + def test_groupby_resample_api(self): + + # GH 12448 + # .groupby(...).resample(...) hitting warnings + # when appropriate + df = DataFrame({'date': pd.date_range(start='2016-01-01', + periods=4, + freq='W'), + 'group': [1, 1, 2, 2], + 'val': [5, 6, 7, 8]}).set_index('date') + + # replication step + i = pd.date_range('2016-01-03', periods=8).tolist() + \ + pd.date_range('2016-01-17', periods=8).tolist() + index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], + names=['group', 'date']) + expected = DataFrame({'val': [5] * 7 + [6] + [7] * 7 + [8]}, + index=index) + result = df.groupby('group').apply( + lambda x: x.resample('1D').ffill())[['val']] + assert_frame_equal(result, expected) + + # deferred operations are currently disabled + # GH 12486 + # + # with tm.assert_produces_warning(FutureWarning, + # check_stacklevel=False): + # result = df.groupby('group').resample('1D').ffill() + # assert_frame_equal(result, expected) + + def test_plot_api(self): + tm._skip_if_no_mpl() + + # .resample(....).plot(...) + # hitting warnings + # GH 12448 + s = Series(np.random.randn(60), + index=date_range('2016-01-01', periods=60, freq='1min')) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s.resample('15min').plot() + tm.assert_is_valid_plot_return_object(result) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s.resample('15min', how='sum').plot() + tm.assert_is_valid_plot_return_object(result) + def test_getitem(self): r = self.frame.resample('H') diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c32239daaaf12..ba869efbc5837 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -201,6 +201,12 @@ def setUpClass(cls): cls.setUpClass = setUpClass return cls +def _skip_if_no_mpl(): + try: + import matplotlib + except ImportError: + import nose + raise nose.SkipTest("matplotlib not installed") def _skip_if_mpl_1_5(): import matplotlib @@ -209,7 +215,6 @@ def _skip_if_mpl_1_5(): import nose raise nose.SkipTest("matplotlib 1.5") - def _skip_if_no_scipy(): try: import scipy.stats @@ -767,6 +772,21 @@ def assert_attr_equal(attr, left, right, obj='Attributes'): left_attr, right_attr) +def assert_is_valid_plot_return_object(objs): + import matplotlib.pyplot as plt + if isinstance(objs, np.ndarray): + for el in objs.flat: + assert isinstance(el, plt.Axes), ('one of \'objs\' is not a ' + 'matplotlib Axes instance, ' + 'type encountered {0!r}' + ''.format(el.__class__.__name__)) + else: + assert isinstance(objs, (plt.Artist, tuple, dict)), \ + ('objs is neither an ndarray of Artist instances nor a ' + 'single Artist instance, tuple, or dict, "objs" is a {0!r} ' + ''.format(objs.__class__.__name__)) + + def isiterable(obj): return hasattr(obj, '__iter__')