From de0f2636dd9a9c88d45988228c614bae6b5cd166 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 3 Jun 2020 18:33:06 +0800 Subject: [PATCH 1/6] deprecated tshift and integrated it to shift (GH11631) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/generic.py | 184 +++++++++++++++------- pandas/tests/frame/methods/test_shift.py | 54 ++++++- pandas/tests/series/methods/test_shift.py | 52 +++++- 4 files changed, 228 insertions(+), 63 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 91948d6f287d9..efffd96fcc4af 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -737,6 +737,7 @@ Deprecations - :meth:`DatetimeIndex.week` and `DatetimeIndex.weekofyear` are deprecated and will be removed in a future version, use :meth:`DatetimeIndex.isocalendar().week` instead (:issue:`33595`) - :meth:`DatetimeArray.week` and `DatetimeArray.weekofyear` are deprecated and will be removed in a future version, use :meth:`DatetimeArray.isocalendar().week` instead (:issue:`33595`) - :meth:`DateOffset.__call__` is deprecated and will be removed in a future version, use ``offset + other`` instead (:issue:`34171`) +- :meth:`DataFrame.tshift` and :meth:`Series.tshift` are deprecated and will be removed in a future version, use :meth:`DataFrame.shift` and :meth:`Series.shift` instead (:issue:`11631`) - Indexing an :class:`Index` object with a float key is deprecated, and will raise an ``IndexError`` in the future. You can manually convert to an integer key instead (:issue:`34191`). diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 197696f8ed4fe..7994491cf198c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9120,6 +9120,60 @@ def mask( errors=errors, ) + def _tshift( + self: FrameOrSeries, periods: int = 1, freq="infer", axis: Axis = 0 + ) -> FrameOrSeries: + """ + Shift the time index, using the index's frequency if available. + + Parameters + ---------- + periods : int + Number of periods to move, can be positive or negative. + freq : DateOffset, timedelta, or str, default None + Increment to use from the tseries module + or time rule expressed as a string (e.g. 'EOM'). + axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0 + Corresponds to the axis that contains the Index. + + Returns + ------- + shifted : Series/DataFrame + """ + if periods == 0: + return self + + index = self._get_axis(axis) + if freq == "infer": + freq = getattr(index, "freq", None) + + if freq is None: + freq = getattr(index, "inferred_freq", None) + + if freq is None: + msg = "Freq was not set in the index hence cannot be inferred" + raise ValueError(msg) + + if isinstance(freq, str): + freq = to_offset(freq) + + axis = self._get_axis_number(axis) + if isinstance(index, PeriodIndex): + orig_freq = to_offset(index.freq) + if freq != orig_freq: + assert orig_freq is not None # for mypy + raise ValueError( + f"Given freq {freq.rule_code} does not match " + f"PeriodIndex freq {orig_freq.rule_code}" + ) + new_ax = index.shift(periods) + else: + new_ax = index.shift(periods, freq) + + result = self.copy() + result.set_axis(new_ax, axis, inplace=True) + return result.__finalize__(self, method="_tshift") + @doc(klass=_shared_doc_kwargs["klass"]) def shift( self: FrameOrSeries, periods=1, freq=None, axis=0, fill_value=None @@ -9130,7 +9184,9 @@ def shift( When `freq` is not passed, shift the index without realigning the data. If `freq` is passed (in this case, the index must be date or datetime, or it will raise a `NotImplementedError`), the index will be - increased using the periods and the `freq`. + increased using the periods and the `freq`. `freq` can be inferred + when specified as "infer" as long as either freq or inferred_freq + attribute is set in the index. Parameters ---------- @@ -9141,6 +9197,9 @@ def shift( If `freq` is specified then the index values are shifted but the data is not realigned. That is, use `freq` if you would like to extend the index when shifting and preserve the original data. + If `freq` is specified as "infer" then it will be inferred from + the freq or inferred_freq attributes of the index. If neither of + those attributes exist, a ValueError is thrown axis : {{0 or 'index', 1 or 'columns', None}}, default None Shift direction. fill_value : object, optional @@ -9150,7 +9209,7 @@ def shift( For datetime, timedelta, or period data, etc. :attr:`NaT` is used. For extension dtypes, ``self.dtype.na_value`` is used. - .. versionchanged:: 0.24.0 + .. versionchanged:: 1.1.0 Returns ------- @@ -9167,45 +9226,68 @@ def shift( Examples -------- - >>> df = pd.DataFrame({{'Col1': [10, 20, 15, 30, 45], - ... 'Col2': [13, 23, 18, 33, 48], - ... 'Col3': [17, 27, 22, 37, 52]}}) + >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45], + ... "Col2": [13, 23, 18, 33, 48], + ... "Col3": [17, 27, 22, 37, 52]}}, + ... index=pd.date_range("2020-01-01", "2020-01-05")) + >>> df + Col1 Col2 Col3 + 2020-01-01 10 13 17 + 2020-01-02 20 23 27 + 2020-01-03 15 18 22 + 2020-01-04 30 33 37 + 2020-01-05 45 48 52 >>> df.shift(periods=3) - Col1 Col2 Col3 - 0 NaN NaN NaN - 1 NaN NaN NaN - 2 NaN NaN NaN - 3 10.0 13.0 17.0 - 4 20.0 23.0 27.0 - - >>> df.shift(periods=1, axis='columns') - Col1 Col2 Col3 - 0 NaN 10.0 13.0 - 1 NaN 20.0 23.0 - 2 NaN 15.0 18.0 - 3 NaN 30.0 33.0 - 4 NaN 45.0 48.0 + Col1 Col2 Col3 + 2020-01-01 NaN NaN NaN + 2020-01-02 NaN NaN NaN + 2020-01-03 NaN NaN NaN + 2020-01-04 10.0 13.0 17.0 + 2020-01-05 20.0 23.0 27.0 + + >>> df.shift(periods=1, axis="columns") + Col1 Col2 Col3 + 2020-01-01 NaN 10.0 13.0 + 2020-01-02 NaN 20.0 23.0 + 2020-01-03 NaN 15.0 18.0 + 2020-01-04 NaN 30.0 33.0 + 2020-01-05 NaN 45.0 48.0 >>> df.shift(periods=3, fill_value=0) - Col1 Col2 Col3 - 0 0 0 0 - 1 0 0 0 - 2 0 0 0 - 3 10 13 17 - 4 20 23 27 + Col1 Col2 Col3 + 2020-01-01 0 0 0 + 2020-01-02 0 0 0 + 2020-01-03 0 0 0 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + + >>> df.shift(periods=3, freq="D") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 + + >>> df.shift(periods=3, freq="infer") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 """ if periods == 0: return self.copy() block_axis = self._get_block_manager_axis(axis) - if freq is None: - new_data = self._mgr.shift( - periods=periods, axis=block_axis, fill_value=fill_value - ) - else: - return self.tshift(periods, freq) + if freq is not None: + return self._tshift(periods, freq, axis) + new_data = self._mgr.shift( + periods=periods, axis=block_axis, fill_value=fill_value + ) return self._constructor(new_data).__finalize__(self, method="shift") def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: @@ -9271,39 +9353,19 @@ def tshift( attributes of the index. If neither of those attributes exist, a ValueError is thrown """ - index = self._get_axis(axis) - if freq is None: - freq = getattr(index, "freq", None) - - if freq is None: - freq = getattr(index, "inferred_freq", None) + warnings.warn( + ( + "tshift is deprecated and will be removed in a future version. " + "Please use shift instead." + ), + FutureWarning, + stacklevel=2, + ) if freq is None: - msg = "Freq was not given and was not set in the index" - raise ValueError(msg) + freq = "infer" - if periods == 0: - return self - - if isinstance(freq, str): - freq = to_offset(freq) - - axis = self._get_axis_number(axis) - if isinstance(index, PeriodIndex): - orig_freq = to_offset(index.freq) - if freq != orig_freq: - assert orig_freq is not None # for mypy - raise ValueError( - f"Given freq {freq.rule_code} does not match " - f"PeriodIndex freq {orig_freq.rule_code}" - ) - new_ax = index.shift(periods) - else: - new_ax = index.shift(periods, freq) - - result = self.copy() - result.set_axis(new_ax, axis, inplace=True) - return result.__finalize__(self, method="tshift") + return self._tshift(periods, freq, axis) def truncate( self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 95f9fd9d7caf3..fd23d18923681 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -146,6 +146,8 @@ def test_shift_duplicate_columns(self): tm.assert_frame_equal(shifted[0], shifted[2]) def test_tshift(self, datetime_frame): + # TODO: remove this test when tshift deprecation is enforced + # PeriodIndex ps = tm.makePeriodFrame() shifted = ps.tshift(1) @@ -186,10 +188,60 @@ def test_tshift(self, datetime_frame): tm.assert_frame_equal(unshifted, inferred_ts) no_freq = datetime_frame.iloc[[0, 5, 7], :] - msg = "Freq was not given and was not set in the index" + msg = "Freq was not set in the index hence cannot be inferred" with pytest.raises(ValueError, match=msg): no_freq.tshift() + def test_tshift_deprecated(self, datetime_frame): + # GH#11631 + with tm.assert_produces_warning(FutureWarning): + datetime_frame.tshift() + + def test_shift_with_freq(self, datetime_frame): + # PeriodIndex + ps = tm.makePeriodFrame() + shifted = ps.shift(1, freq="infer") + unshifted = shifted.shift(-1, freq="infer") + + tm.assert_frame_equal(unshifted, ps) + + shifted2 = ps.shift(freq="B") + tm.assert_frame_equal(shifted, shifted2) + + shifted3 = ps.tshift(freq=offsets.BDay()) + tm.assert_frame_equal(shifted, shifted3) + + with pytest.raises(ValueError, match="does not match"): + ps.tshift(freq="M") + + # DatetimeIndex + shifted = datetime_frame.shift(1, freq="infer") + unshifted = shifted.shift(-1, freq="infer") + + tm.assert_frame_equal(datetime_frame, unshifted) + + shifted2 = datetime_frame.tshift(freq=datetime_frame.index.freq) + tm.assert_frame_equal(shifted, shifted2) + + inferred_ts = DataFrame( + datetime_frame.values, + Index(np.asarray(datetime_frame.index)), + columns=datetime_frame.columns, + ) + shifted = inferred_ts.shift(1, freq="infer") + + expected = datetime_frame.shift(1, freq="infer") + expected.index = expected.index._with_freq(None) + tm.assert_frame_equal(shifted, expected) + + unshifted = shifted.shift(-1, freq="infer") + tm.assert_frame_equal(unshifted, inferred_ts) + + no_freq = datetime_frame.iloc[[0, 5, 7], :] + msg = "Freq was not set in the index hence cannot be inferred" + with pytest.raises(ValueError, match=msg): + no_freq.shift(freq="infer") + def test_shift_dt64values_int_fill_deprecated(self): # GH#31971 ser = pd.Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) diff --git a/pandas/tests/series/methods/test_shift.py b/pandas/tests/series/methods/test_shift.py index 686e66162fe0b..3bbb5dfeaf532 100644 --- a/pandas/tests/series/methods/test_shift.py +++ b/pandas/tests/series/methods/test_shift.py @@ -182,6 +182,8 @@ def test_shift_dst(self): assert res.dtype == "datetime64[ns, US/Eastern]" def test_tshift(self, datetime_series): + # TODO: remove this test when tshift deprecation is enforced + # PeriodIndex ps = tm.makePeriodSeries() shifted = ps.tshift(1) @@ -220,10 +222,58 @@ def test_tshift(self, datetime_series): tm.assert_series_equal(unshifted, inferred_ts) no_freq = datetime_series[[0, 5, 7]] - msg = "Freq was not given and was not set in the index" + msg = "Freq was not set in the index hence cannot be inferred" with pytest.raises(ValueError, match=msg): no_freq.tshift() + def test_tshift_deprecated(self, datetime_series): + # GH#11631 + with tm.assert_produces_warning(FutureWarning): + datetime_series.tshift() + + def test_shift_with_freq(self, datetime_series): + # PeriodIndex + ps = tm.makePeriodSeries() + shifted = ps.shift(1, freq="infer") + unshifted = shifted.shift(-1, freq="infer") + + tm.assert_series_equal(unshifted, ps) + + shifted2 = ps.tshift(freq="B") + tm.assert_series_equal(shifted, shifted2) + + shifted3 = ps.tshift(freq=BDay()) + tm.assert_series_equal(shifted, shifted3) + + msg = "Given freq M does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): + ps.tshift(freq="M") + + # DatetimeIndex + shifted = datetime_series.shift(1, freq="infer") + unshifted = shifted.shift(-1, freq="infer") + + tm.assert_series_equal(datetime_series, unshifted) + + shifted2 = datetime_series.tshift(freq=datetime_series.index.freq) + tm.assert_series_equal(shifted, shifted2) + + inferred_ts = Series( + datetime_series.values, Index(np.asarray(datetime_series.index)), name="ts" + ) + shifted = inferred_ts.shift(1, freq="infer") + expected = datetime_series.shift(1, freq="infer") + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(shifted, expected) + + unshifted = shifted.shift(-1, freq="infer") + tm.assert_series_equal(unshifted, inferred_ts) + + no_freq = datetime_series[[0, 5, 7]] + msg = "Freq was not set in the index hence cannot be inferred" + with pytest.raises(ValueError, match=msg): + no_freq.tshift(freq="infer") + def test_shift_int(self, datetime_series): ts = datetime_series.astype(int) shifted = ts.shift(1) From 51c6aae15aff39ed13f8b54af8161dbfc55c57ae Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 3 Jun 2020 20:11:53 +0800 Subject: [PATCH 2/6] added deprecated sphinx directive --- pandas/core/generic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7994491cf198c..0cdfb13b22e7e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9333,6 +9333,9 @@ def tshift( """ Shift the time index, using the index's frequency if available. + .. deprecated:: 1.1.0 + Use `shift` instead. + Parameters ---------- periods : int From a62b0ea42bd409ffb2efebf98114570d449f8d5b Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 3 Jun 2020 23:19:04 +0800 Subject: [PATCH 3/6] removed mention of tshift in timeseries user guide --- doc/source/user_guide/timeseries.rst | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 5351c3ee6b624..648d93a45d210 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -516,7 +516,7 @@ The ``DatetimeIndex`` class contains many time series related optimizations: * A large range of dates for various offsets are pre-computed and cached under the hood in order to make generating subsequent date ranges very fast (just have to grab a slice). -* Fast shifting using the ``shift`` and ``tshift`` method on pandas objects. +* Fast shifting using the ``shift`` method on pandas objects. * Unioning of overlapping ``DatetimeIndex`` objects with the same frequency is very fast (important for fast data alignment). * Quick access to date fields via properties such as ``year``, ``month``, etc. @@ -1462,23 +1462,19 @@ the pandas objects. The ``shift`` method accepts an ``freq`` argument which can accept a ``DateOffset`` class or other ``timedelta``-like object or also an -:ref:`offset alias `: +:ref:`offset alias `. + +When ``freq`` is specified, ``shift`` method changes all the dates in the index +rather than changing the alignment of the data and the index: .. ipython:: python + ts.shift(5, freq='D') ts.shift(5, freq=pd.offsets.BDay()) ts.shift(5, freq='BM') -Rather than changing the alignment of the data and the index, ``DataFrame`` and -``Series`` objects also have a :meth:`~Series.tshift` convenience method that -changes all the dates in the index by a specified number of offsets: - -.. ipython:: python - - ts.tshift(5, freq='D') - -Note that with ``tshift``, the leading entry is no longer NaN because the data -is not being realigned. +Note that with when ``freq`` is specified, the leading entry is no longer NaN +because the data is not being realigned. Frequency conversion ~~~~~~~~~~~~~~~~~~~~ From 159b2a68238b0b6e3b90aaaa662493c34e1064d2 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Thu, 4 Jun 2020 11:00:31 +0800 Subject: [PATCH 4/6] adjusted tests, added filterwarnings mark where applicable --- pandas/core/generic.py | 2 +- pandas/tests/frame/methods/test_shift.py | 7 ++++--- pandas/tests/generic/test_finalize.py | 20 ++++++++++++++----- pandas/tests/groupby/test_groupby.py | 1 + pandas/tests/groupby/test_groupby_subclass.py | 1 + pandas/tests/groupby/test_whitelist.py | 1 + pandas/tests/resample/test_datetime_index.py | 4 ++-- pandas/tests/series/methods/test_shift.py | 11 +++++----- 8 files changed, 31 insertions(+), 16 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0cdfb13b22e7e..b4ae991d48a10 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -188,7 +188,7 @@ class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): ] _internal_names_set: Set[str] = set(_internal_names) _accessors: Set[str] = set() - _deprecations: FrozenSet[str] = frozenset(["get_values"]) + _deprecations: FrozenSet[str] = frozenset(["get_values", "tshift"]) _metadata: List[str] = [] _is_copy = None _mgr: BlockManager diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index fd23d18923681..6760464babbb7 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -145,6 +145,7 @@ def test_shift_duplicate_columns(self): tm.assert_frame_equal(shifted[0], shifted[1]) tm.assert_frame_equal(shifted[0], shifted[2]) + @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_tshift(self, datetime_frame): # TODO: remove this test when tshift deprecation is enforced @@ -208,11 +209,11 @@ def test_shift_with_freq(self, datetime_frame): shifted2 = ps.shift(freq="B") tm.assert_frame_equal(shifted, shifted2) - shifted3 = ps.tshift(freq=offsets.BDay()) + shifted3 = ps.shift(freq=offsets.BDay()) tm.assert_frame_equal(shifted, shifted3) with pytest.raises(ValueError, match="does not match"): - ps.tshift(freq="M") + ps.shift(freq="M") # DatetimeIndex shifted = datetime_frame.shift(1, freq="infer") @@ -220,7 +221,7 @@ def test_shift_with_freq(self, datetime_frame): tm.assert_frame_equal(datetime_frame, unshifted) - shifted2 = datetime_frame.tshift(freq=datetime_frame.index.freq) + shifted2 = datetime_frame.shift(freq=datetime_frame.index.freq) tm.assert_frame_equal(shifted, shifted2) inferred_ts = DataFrame( diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index d307eef8beb62..a152bc203721f 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -438,11 +438,21 @@ (pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))), (pd.Series, ([1, 2],), operator.methodcaller("slice_shift")), (pd.DataFrame, frame_data, operator.methodcaller("slice_shift")), - (pd.Series, (1, pd.date_range("2000", periods=4)), operator.methodcaller("tshift")), - ( - pd.DataFrame, - ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), - operator.methodcaller("tshift"), + pytest.param( + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("tshift"), + ), + marks=pytest.mark.filterwarnings("ignore::FutureWarning"), + ), + pytest.param( + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("tshift"), + ), + marks=pytest.mark.filterwarnings("ignore::FutureWarning"), ), (pd.Series, ([1, 2],), operator.methodcaller("truncate", before=0)), (pd.DataFrame, frame_data, operator.methodcaller("truncate", before=0)), diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 80f34bb91cdfd..9cb7e4acfbf2a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1979,6 +1979,7 @@ def test_bool_aggs_dup_column_labels(bool_agg_func): @pytest.mark.parametrize( "idx", [pd.Index(["a", "a"]), pd.MultiIndex.from_tuples((("a", "a"), ("a", "a")))] ) +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_dup_labels_output_shape(groupby_func, idx): if groupby_func in {"size", "ngroup", "cumcount"}: pytest.skip("Not applicable") diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 6adae19005c3a..7271911c5f80f 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -14,6 +14,7 @@ tm.SubclassedSeries(np.arange(0, 10), name="A"), ], ) +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_groupby_preserves_subclass(obj, groupby_func): # GH28330 -- preserve subclass through groupby operations diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 6b33049a664de..8f5f88094d362 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -339,6 +339,7 @@ def test_groupby_function_rename(mframe): assert f.__name__ == name +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_groupby_selection_with_methods(df): # some methods which require DatetimeIndex rng = date_range("2014", periods=len(df)) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 9909e554aa14d..1fb519f1f6e9b 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1067,7 +1067,7 @@ def test_resample_anchored_intraday(simple_date_range_series): tm.assert_frame_equal(result, expected) result = df.resample("M", closed="left").mean() - exp = df.tshift(1, freq="D").resample("M", kind="period").mean() + exp = df.shift(1, freq="D").resample("M", kind="period").mean() exp = exp.to_timestamp(how="end") exp.index = exp.index + Timedelta(1, "ns") - Timedelta(1, "D") @@ -1086,7 +1086,7 @@ def test_resample_anchored_intraday(simple_date_range_series): tm.assert_frame_equal(result, expected) result = df.resample("Q", closed="left").mean() - expected = df.tshift(1, freq="D").resample("Q", kind="period", closed="left").mean() + expected = df.shift(1, freq="D").resample("Q", kind="period", closed="left").mean() expected = expected.to_timestamp(how="end") expected.index += Timedelta(1, "ns") - Timedelta(1, "D") expected.index._data.freq = "Q" diff --git a/pandas/tests/series/methods/test_shift.py b/pandas/tests/series/methods/test_shift.py index 3bbb5dfeaf532..750a6653509fe 100644 --- a/pandas/tests/series/methods/test_shift.py +++ b/pandas/tests/series/methods/test_shift.py @@ -181,6 +181,7 @@ def test_shift_dst(self): tm.assert_series_equal(res, exp) assert res.dtype == "datetime64[ns, US/Eastern]" + @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_tshift(self, datetime_series): # TODO: remove this test when tshift deprecation is enforced @@ -239,15 +240,15 @@ def test_shift_with_freq(self, datetime_series): tm.assert_series_equal(unshifted, ps) - shifted2 = ps.tshift(freq="B") + shifted2 = ps.shift(freq="B") tm.assert_series_equal(shifted, shifted2) - shifted3 = ps.tshift(freq=BDay()) + shifted3 = ps.shift(freq=BDay()) tm.assert_series_equal(shifted, shifted3) msg = "Given freq M does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): - ps.tshift(freq="M") + ps.shift(freq="M") # DatetimeIndex shifted = datetime_series.shift(1, freq="infer") @@ -255,7 +256,7 @@ def test_shift_with_freq(self, datetime_series): tm.assert_series_equal(datetime_series, unshifted) - shifted2 = datetime_series.tshift(freq=datetime_series.index.freq) + shifted2 = datetime_series.shift(freq=datetime_series.index.freq) tm.assert_series_equal(shifted, shifted2) inferred_ts = Series( @@ -272,7 +273,7 @@ def test_shift_with_freq(self, datetime_series): no_freq = datetime_series[[0, 5, 7]] msg = "Freq was not set in the index hence cannot be inferred" with pytest.raises(ValueError, match=msg): - no_freq.tshift(freq="infer") + no_freq.shift(freq="infer") def test_shift_int(self, datetime_series): ts = datetime_series.astype(int) From b8cf1569be81d8aea64d6a8a7f209cac78d9e923 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 5 Jun 2020 10:55:53 +0800 Subject: [PATCH 5/6] refactored and simplified shift and tshift --- pandas/core/generic.py | 100 ++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 62 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b4ae991d48a10..47b31369b2315 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9120,60 +9120,6 @@ def mask( errors=errors, ) - def _tshift( - self: FrameOrSeries, periods: int = 1, freq="infer", axis: Axis = 0 - ) -> FrameOrSeries: - """ - Shift the time index, using the index's frequency if available. - - Parameters - ---------- - periods : int - Number of periods to move, can be positive or negative. - freq : DateOffset, timedelta, or str, default None - Increment to use from the tseries module - or time rule expressed as a string (e.g. 'EOM'). - axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0 - Corresponds to the axis that contains the Index. - - Returns - ------- - shifted : Series/DataFrame - """ - if periods == 0: - return self - - index = self._get_axis(axis) - if freq == "infer": - freq = getattr(index, "freq", None) - - if freq is None: - freq = getattr(index, "inferred_freq", None) - - if freq is None: - msg = "Freq was not set in the index hence cannot be inferred" - raise ValueError(msg) - - if isinstance(freq, str): - freq = to_offset(freq) - - axis = self._get_axis_number(axis) - if isinstance(index, PeriodIndex): - orig_freq = to_offset(index.freq) - if freq != orig_freq: - assert orig_freq is not None # for mypy - raise ValueError( - f"Given freq {freq.rule_code} does not match " - f"PeriodIndex freq {orig_freq.rule_code}" - ) - new_ax = index.shift(periods) - else: - new_ax = index.shift(periods, freq) - - result = self.copy() - result.set_axis(new_ax, axis, inplace=True) - return result.__finalize__(self, method="_tshift") - @doc(klass=_shared_doc_kwargs["klass"]) def shift( self: FrameOrSeries, periods=1, freq=None, axis=0, fill_value=None @@ -9281,14 +9227,44 @@ def shift( if periods == 0: return self.copy() - block_axis = self._get_block_manager_axis(axis) - if freq is not None: - return self._tshift(periods, freq, axis) + if freq is None: + # when freq is None, data is shifted, index is not + block_axis = self._get_block_manager_axis(axis) + new_data = self._mgr.shift( + periods=periods, axis=block_axis, fill_value=fill_value + ) + return self._constructor(new_data).__finalize__(self, method="shift") - new_data = self._mgr.shift( - periods=periods, axis=block_axis, fill_value=fill_value - ) - return self._constructor(new_data).__finalize__(self, method="shift") + # when freq is given, index is shifted, data is not + index = self._get_axis(axis) + + if freq == "infer": + freq = getattr(index, "freq", None) + + if freq is None: + freq = getattr(index, "inferred_freq", None) + + if freq is None: + msg = "Freq was not set in the index hence cannot be inferred" + raise ValueError(msg) + + elif isinstance(freq, str): + freq = to_offset(freq) + + if isinstance(index, PeriodIndex): + orig_freq = to_offset(index.freq) + if freq != orig_freq: + assert orig_freq is not None # for mypy + raise ValueError( + f"Given freq {freq.rule_code} does not match " + f"PeriodIndex freq {orig_freq.rule_code}" + ) + new_ax = index.shift(periods) + else: + new_ax = index.shift(periods, freq) + + result = self.set_axis(new_ax, axis) + return result.__finalize__(self, method="shift") def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: """ @@ -9368,7 +9344,7 @@ def tshift( if freq is None: freq = "infer" - return self._tshift(periods, freq, axis) + return self.shift(periods, freq, axis) def truncate( self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True From 0aef60e7bdf01315733c8d1bf65ffcb3f8dd4a0f Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Mon, 15 Jun 2020 20:18:10 +0800 Subject: [PATCH 6/6] logically split up some tests --- pandas/tests/frame/methods/test_shift.py | 22 ++++++++++++---------- pandas/tests/series/methods/test_shift.py | 19 ++++++++++--------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 6760464babbb7..9ec029a6c4304 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -162,7 +162,8 @@ def test_tshift(self, datetime_frame): shifted3 = ps.tshift(freq=offsets.BDay()) tm.assert_frame_equal(shifted, shifted3) - with pytest.raises(ValueError, match="does not match"): + msg = "Given freq M does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): ps.tshift(freq="M") # DatetimeIndex @@ -198,12 +199,11 @@ def test_tshift_deprecated(self, datetime_frame): with tm.assert_produces_warning(FutureWarning): datetime_frame.tshift() - def test_shift_with_freq(self, datetime_frame): - # PeriodIndex + def test_period_index_frame_shift_with_freq(self): ps = tm.makePeriodFrame() + shifted = ps.shift(1, freq="infer") unshifted = shifted.shift(-1, freq="infer") - tm.assert_frame_equal(unshifted, ps) shifted2 = ps.shift(freq="B") @@ -212,13 +212,9 @@ def test_shift_with_freq(self, datetime_frame): shifted3 = ps.shift(freq=offsets.BDay()) tm.assert_frame_equal(shifted, shifted3) - with pytest.raises(ValueError, match="does not match"): - ps.shift(freq="M") - - # DatetimeIndex + def test_datetime_frame_shift_with_freq(self, datetime_frame): shifted = datetime_frame.shift(1, freq="infer") unshifted = shifted.shift(-1, freq="infer") - tm.assert_frame_equal(datetime_frame, unshifted) shifted2 = datetime_frame.shift(freq=datetime_frame.index.freq) @@ -230,7 +226,6 @@ def test_shift_with_freq(self, datetime_frame): columns=datetime_frame.columns, ) shifted = inferred_ts.shift(1, freq="infer") - expected = datetime_frame.shift(1, freq="infer") expected.index = expected.index._with_freq(None) tm.assert_frame_equal(shifted, expected) @@ -238,6 +233,13 @@ def test_shift_with_freq(self, datetime_frame): unshifted = shifted.shift(-1, freq="infer") tm.assert_frame_equal(unshifted, inferred_ts) + def test_period_index_frame_shift_with_freq_error(self): + ps = tm.makePeriodFrame() + msg = "Given freq M does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): + ps.shift(freq="M") + + def test_datetime_frame_shift_with_freq_error(self, datetime_frame): no_freq = datetime_frame.iloc[[0, 5, 7], :] msg = "Freq was not set in the index hence cannot be inferred" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/series/methods/test_shift.py b/pandas/tests/series/methods/test_shift.py index 7b26fc3e7a2db..6257eecf4fc08 100644 --- a/pandas/tests/series/methods/test_shift.py +++ b/pandas/tests/series/methods/test_shift.py @@ -232,12 +232,11 @@ def test_tshift_deprecated(self, datetime_series): with tm.assert_produces_warning(FutureWarning): datetime_series.tshift() - def test_shift_with_freq(self, datetime_series): - # PeriodIndex + def test_period_index_series_shift_with_freq(self): ps = tm.makePeriodSeries() + shifted = ps.shift(1, freq="infer") unshifted = shifted.shift(-1, freq="infer") - tm.assert_series_equal(unshifted, ps) shifted2 = ps.shift(freq="B") @@ -246,14 +245,9 @@ def test_shift_with_freq(self, datetime_series): shifted3 = ps.shift(freq=BDay()) tm.assert_series_equal(shifted, shifted3) - msg = "Given freq M does not match PeriodIndex freq B" - with pytest.raises(ValueError, match=msg): - ps.shift(freq="M") - - # DatetimeIndex + def test_datetime_series_shift_with_freq(self, datetime_series): shifted = datetime_series.shift(1, freq="infer") unshifted = shifted.shift(-1, freq="infer") - tm.assert_series_equal(datetime_series, unshifted) shifted2 = datetime_series.shift(freq=datetime_series.index.freq) @@ -270,6 +264,13 @@ def test_shift_with_freq(self, datetime_series): unshifted = shifted.shift(-1, freq="infer") tm.assert_series_equal(unshifted, inferred_ts) + def test_period_index_series_shift_with_freq_error(self): + ps = tm.makePeriodSeries() + msg = "Given freq M does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): + ps.shift(freq="M") + + def test_datetime_series_shift_with_freq_error(self, datetime_series): no_freq = datetime_series[[0, 5, 7]] msg = "Freq was not set in the index hence cannot be inferred" with pytest.raises(ValueError, match=msg):