Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix PeriodIndex +/- TimedeltaIndex #23031

Merged
merged 14 commits into from
Oct 15, 2018
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,7 @@ Datetimelike
- Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`)
- Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`)
- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`)
- Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`)

Timedelta
^^^^^^^^^
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,11 @@ def _add_delta_tdi(self, other):
if not len(self) == len(other):
raise ValueError("cannot add indices of unequal length")

if isinstance(other, np.ndarray):
# ndarray[timedelta64]; wrap in TimedeltaIndex for op
from pandas import TimedeltaIndex
other = TimedeltaIndex(other)

self_i8 = self.asi8
other_i8 = other.asi8
new_values = checked_add_with_arr(self_i8, other_i8,
Expand Down Expand Up @@ -632,11 +637,17 @@ def __add__(self, other):
return self._add_datelike(other)
elif is_integer_dtype(other):
result = self._addsub_int_array(other, operator.add)
elif is_float_dtype(other) or is_period_dtype(other):
elif is_float_dtype(other):
# Explicitly catch invalid dtypes
raise TypeError("cannot add {dtype}-dtype to {cls}"
.format(dtype=other.dtype,
cls=type(self).__name__))
elif is_period_dtype(other):
# if self is a TimedeltaArray and other is a PeriodArray with
# a timedelta-like (i.e. Tick) freq, this operation is valid.
# Defer to the PeriodArray implementation.
# In remaining cases, this will end up raising TypeError.
return NotImplemented
elif is_extension_array_dtype(other):
# Categorical op will raise; defer explicitly
return NotImplemented
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def _add_delta(self, delta):
Parameters
----------
delta : {timedelta, np.timedelta64, DateOffset,
TimedelaIndex, ndarray[timedelta64]}
TimedeltaIndex, ndarray[timedelta64]}

Returns
-------
Expand Down
124 changes: 101 additions & 23 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
import operator
import warnings

import numpy as np
Expand All @@ -17,8 +18,8 @@
from pandas.util._decorators import (cache_readonly, deprecate_kwarg)

from pandas.core.dtypes.common import (
is_integer_dtype, is_float_dtype, is_period_dtype,
is_datetime64_dtype)
is_integer_dtype, is_float_dtype, is_period_dtype, is_timedelta64_dtype,
is_datetime64_dtype, _TD_DTYPE)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCSeries

Expand Down Expand Up @@ -355,24 +356,54 @@ def _add_offset(self, other):
return self._time_shift(other.n)

def _add_delta_td(self, other):
assert isinstance(self.freq, Tick) # checked by calling function
assert isinstance(other, (timedelta, np.timedelta64, Tick))
nanos = delta_to_nanoseconds(other)
own_offset = frequencies.to_offset(self.freq.rule_code)

if isinstance(own_offset, Tick):
offset_nanos = delta_to_nanoseconds(own_offset)
if np.all(nanos % offset_nanos == 0):
return self._time_shift(nanos // offset_nanos)
delta = self._check_timedeltalike_freq_compat(other)

# raise when input doesn't have freq
raise IncompatibleFrequency("Input has different freq from "
"{cls}(freq={freqstr})"
.format(cls=type(self).__name__,
freqstr=self.freqstr))
# Note: when calling parent class's _add_delta_td, it will call
# delta_to_nanoseconds(delta). Because delta here is an integer,
# delta_to_nanoseconds will return it unchanged.
return DatetimeLikeArrayMixin._add_delta_td(self, delta)

def _add_delta_tdi(self, other):
assert isinstance(self.freq, Tick) # checked by calling function

delta = self._check_timedeltalike_freq_compat(other)
return self._addsub_int_array(delta, operator.add)

def _add_delta(self, other):
ordinal_delta = self._maybe_convert_timedelta(other)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getting the _maybe_convert_timedelta call out of the arithmetic ops makes everything simpler

return self._time_shift(ordinal_delta)
"""
Add a timedelta-like, Tick, or TimedeltaIndex-like object
to self.

Parameters
----------
other : {timedelta, np.timedelta64, Tick,
TimedeltaIndex, ndarray[timedelta64]}

Returns
-------
result : same type as self
"""
if not isinstance(self.freq, Tick):
# We cannot add timedelta-like to non-tick PeriodArray
raise IncompatibleFrequency("Input has different freq from "
"{cls}(freq={freqstr})"
.format(cls=type(self).__name__,
freqstr=self.freqstr))

# TODO: standardize across datetimelike subclasses whether to return
# i8 view or _shallow_copy
if isinstance(other, (Tick, timedelta, np.timedelta64)):
new_values = self._add_delta_td(other)
return self._shallow_copy(new_values)
elif is_timedelta64_dtype(other):
# ndarray[timedelta64] or TimedeltaArray/index
new_values = self._add_delta_tdi(other)
return self._shallow_copy(new_values)
else: # pragma: no cover
raise TypeError(type(other).__name__)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at this and some of the other methods in the datetime/timedelta/period array mixins, I think some de-duplication can be done on the next pass


@deprecate_kwarg(old_arg_name='n', new_arg_name='periods')
def shift(self, periods):
Expand Down Expand Up @@ -428,14 +459,9 @@ def _maybe_convert_timedelta(self, other):
other, (timedelta, np.timedelta64, Tick, np.ndarray)):
offset = frequencies.to_offset(self.freq.rule_code)
if isinstance(offset, Tick):
if isinstance(other, np.ndarray):
nanos = np.vectorize(delta_to_nanoseconds)(other)
else:
nanos = delta_to_nanoseconds(other)
offset_nanos = delta_to_nanoseconds(offset)
check = np.all(nanos % offset_nanos == 0)
if check:
return nanos // offset_nanos
# _check_timedeltalike_freq_compat will raise if incompatible
delta = self._check_timedeltalike_freq_compat(other)
return delta
elif isinstance(other, DateOffset):
freqstr = other.rule_code
base = frequencies.get_base_alias(freqstr)
Expand All @@ -454,6 +480,58 @@ def _maybe_convert_timedelta(self, other):
raise IncompatibleFrequency(msg.format(cls=type(self).__name__,
freqstr=self.freqstr))

def _check_timedeltalike_freq_compat(self, other):
"""
Arithmetic operations with timedelta-like scalars or array `other`
are only valid if `other` is an integer multiple of `self.freq`.
If the operation is valid, find that integer multiple. Otherwise,
raise because the operation is invalid.

Parameters
----------
other : timedelta, np.timedelta64, Tick,
ndarray[timedelta64], TimedeltaArray, TimedeltaIndex

Returns
-------
multiple : int or ndarray[int64]

Raises
------
IncompatibleFrequency
"""
assert isinstance(self.freq, Tick) # checked by calling function
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can u add a doc string here

own_offset = frequencies.to_offset(self.freq.rule_code)
base_nanos = delta_to_nanoseconds(own_offset)

if isinstance(other, (timedelta, np.timedelta64, Tick)):
nanos = delta_to_nanoseconds(other)

elif isinstance(other, np.ndarray):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can u comments these cases a bit

# numpy timedelta64 array; all entries must be compatible
assert other.dtype.kind == 'm'
if other.dtype != _TD_DTYPE:
# i.e. non-nano unit
# TODO: disallow unit-less timedelta64
other = other.astype(_TD_DTYPE)
nanos = other.view('i8')
else:
# TimedeltaArray/Index
nanos = other.asi8

if np.all(nanos % base_nanos == 0):
# nanos being added is an integer multiple of the
# base-frequency to self.freq
delta = nanos // base_nanos
# delta is the integer (or integer-array) number of periods
# by which will be added to self.
return delta

raise IncompatibleFrequency("Input has different freq from "
"{cls}(freq={freqstr})"
.format(cls=type(self).__name__,
freqstr=self.freqstr))


PeriodArrayMixin._add_comparison_ops()
PeriodArrayMixin._add_datetimelike_methods()
Expand Down
70 changes: 65 additions & 5 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,26 +446,36 @@ def test_pi_add_sub_td64_array_non_tick_raises(self):
with pytest.raises(period.IncompatibleFrequency):
tdarr - rng

@pytest.mark.xfail(reason='op with TimedeltaIndex raises, with ndarray OK',
strict=True)
def test_pi_add_sub_td64_array_tick(self):
rng = pd.period_range('1/1/2000', freq='Q', periods=3)
# PeriodIndex + Timedelta-like is allowed only with
# tick-like frequencies
rng = pd.period_range('1/1/2000', freq='90D', periods=3)
tdi = pd.TimedeltaIndex(['-1 Day', '-1 Day', '-1 Day'])
tdarr = tdi.values

expected = rng + tdi
expected = pd.period_range('12/31/1999', freq='90D', periods=3)
result = rng + tdi
tm.assert_index_equal(result, expected)
result = rng + tdarr
tm.assert_index_equal(result, expected)
result = tdi + rng
tm.assert_index_equal(result, expected)
result = tdarr + rng
tm.assert_index_equal(result, expected)

expected = rng - tdi
expected = pd.period_range('1/2/2000', freq='90D', periods=3)

result = rng - tdi
tm.assert_index_equal(result, expected)
result = rng - tdarr
tm.assert_index_equal(result, expected)

with pytest.raises(TypeError):
tdarr - rng

with pytest.raises(TypeError):
tdi - rng

# -----------------------------------------------------------------
# operations with array/Index of DateOffset objects

Expand Down Expand Up @@ -596,6 +606,56 @@ def test_pi_sub_intarray(self, box):
# Timedelta-like (timedelta, timedelta64, Timedelta, Tick)
# TODO: Some of these are misnomers because of non-Tick DateOffsets

def test_pi_add_timedeltalike_minute_gt1(self, three_days):
# GH#23031 adding a time-delta-like offset to a PeriodArray that has
# minute frequency with n != 1. A more general case is tested below
# in test_pi_add_timedeltalike_tick_gt1, but here we write out the
# expected result more explicitly.
other = three_days
rng = pd.period_range('2014-05-01', periods=3, freq='2D')

expected = pd.PeriodIndex(['2014-05-04', '2014-05-06', '2014-05-08'],
freq='2D')

result = rng + other
tm.assert_index_equal(result, expected)

result = other + rng
tm.assert_index_equal(result, expected)

# subtraction
expected = pd.PeriodIndex(['2014-04-28', '2014-04-30', '2014-05-02'],
freq='2D')
result = rng - other
tm.assert_index_equal(result, expected)

with pytest.raises(TypeError):
other - rng

@pytest.mark.parametrize('freqstr', ['5ns', '5us', '5ms',
'5s', '5T', '5h', '5d'])
def test_pi_add_timedeltalike_tick_gt1(self, three_days, freqstr):
# GH#23031 adding a time-delta-like offset to a PeriodArray that has
# tick-like frequency with n != 1
other = three_days
rng = pd.period_range('2014-05-01', periods=6, freq=freqstr)

expected = pd.period_range(rng[0] + other, periods=6, freq=freqstr)

result = rng + other
tm.assert_index_equal(result, expected)

result = other + rng
tm.assert_index_equal(result, expected)

# subtraction
expected = pd.period_range(rng[0] - other, periods=6, freq=freqstr)
result = rng - other
tm.assert_index_equal(result, expected)

with pytest.raises(TypeError):
other - rng

def test_pi_add_iadd_timedeltalike_daily(self, three_days):
# Tick
other = three_days
Expand Down
3 changes: 3 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,7 @@ def assert_index_equal(left, right, exact='equiv', check_names=True,
Specify object name being compared, internally used to show appropriate
assertion message
"""
__tracebackhide__ = True
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #22643


def _check_types(l, r, obj='Index'):
if exact:
Expand Down Expand Up @@ -1048,6 +1049,8 @@ def assert_interval_array_equal(left, right, exact='equiv',


def raise_assert_detail(obj, message, left, right, diff=None):
__tracebackhide__ = True

if isinstance(left, np.ndarray):
left = pprint_thing(left)
elif is_categorical_dtype(left):
Expand Down