Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit 23e5cfc
Author: Tom Augspurger <tom.w.augspurger@gmail.com>
Date:   Tue Oct 2 13:10:41 2018 -0500

    Use ._tshift internally for datetimelike ops

    In preperation for PeriodArray / DatetimeArray / TimedeltaArray.

    Index.shift has a different meaning from ExtensionArray.shift.

    - Index.shift pointwise shifts each element by some amount
    - ExtensionArray.shift shits the *position* of each value in the array
      padding the end with NA

    This is going to get confusing. This PR tries to avoid some of that by
    internally using a new `_tshift` method (time-shift) when we want to do pointwise
    shifting of each value. Places that know they want that behavior (like in the
    datetimelike ops) should use that.

commit 1d9f76c
Author: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date:   Tue Oct 2 17:11:11 2018 +0200

    CLN: remove Index._to_embed (pandas-dev#22879)

    * CLN: remove Index._to_embed

    * pep8

commit 6247da0
Author: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date:   Tue Oct 2 08:50:41 2018 -0500

    Provide default implementation for `data_repated` (pandas-dev#22935)

commit 5ce06b5
Author: Matthew Roeschke <emailformattr@gmail.com>
Date:   Mon Oct 1 14:22:20 2018 -0700

     BUG: to_datetime preserves name of Index argument in the result (pandas-dev#22918)

    * BUG: to_datetime preserves name of Index argument in the result

    * correct test
  • Loading branch information
TomAugspurger committed Oct 2, 2018
1 parent 9d17fd2 commit 959cd72
Show file tree
Hide file tree
Showing 13 changed files with 81 additions and 88 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,7 @@ Datetimelike
- Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`)
- Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`)
- Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`)
- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`)

Timedelta
^^^^^^^^^
Expand Down
16 changes: 15 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def _sub_period_array(self, other):
def _addsub_int_array(self, other, op):
"""
Add or subtract array-like of integers equivalent to applying
`shift` pointwise.
`_tshift` pointwise.
Parameters
----------
Expand Down Expand Up @@ -555,6 +555,20 @@ def shift(self, periods, freq=None):
return self._tshift(periods, freq=freq)

def _tshift(self, periods, freq=None):
"""
Shift each value by `periods`.
Note this is different from ExtensionArray.shift, which
shifts the *position* of each element, padding the end with
missing values.
Parameters
----------
periods : int
Number of periods to shift by.
freq : pandas.DateOffset, pandas.Timedelta, or string
Frequency increment to shift by.
"""
if freq is not None and freq != self.freq:
if isinstance(freq, compat.string_types):
freq = frequencies.to_offset(freq)
Expand Down
26 changes: 15 additions & 11 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,22 +572,26 @@ def shift(self, periods=1):
-------
shifted : Period Array/Index
"""
# We have two kinds of shift.
# 1. ExtensionArray.shift: move positions of each value,
# fill NA on the end
# 2. Datelike.tshift: move each value through time
# Each Datelike array will implement both. It's up to the
# caller to call the correct one.
return self._ea_shift(periods=periods)

def _ea_shift(self, periods=1):
# TODO: remove from DatetimeLikeArrayMixin
# TODO(DatetimeArray): remove from DatetimeLikeArrayMixin
# The semantics for Index.shift differ from EA.shift
# then just call super.
return ExtensionArray.shift(self, periods)

def _tshift(self, n, freq=None):
# TODO: docs
"""
Shift each value by `periods`.
Note this is different from ExtensionArray.shift, which
shifts the *position* of each element, padding the end with
missing values.
Parameters
----------
periods : int
Number of periods to shift by.
freq : pandas.DateOffset, pandas.Timedelta, or string
Frequency increment to shift by.
"""
values = self.values + n * self.freq.n
if self.hasnans:
values[self._isnan] = iNaT
Expand Down
14 changes: 1 addition & 13 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,7 +1124,7 @@ def to_series(self, index=None, name=None):
if name is None:
name = self.name

return Series(self._to_embed(), index=index, name=name)
return Series(self.values.copy(), index=index, name=name)

def to_frame(self, index=True, name=None):
"""
Expand Down Expand Up @@ -1187,18 +1187,6 @@ def to_frame(self, index=True, name=None):
result.index = self
return result

def _to_embed(self, keep_tz=False, dtype=None):
"""
*this is an internal non-public method*
return an array repr of this object, potentially casting to object
"""
if dtype is not None:
return self.astype(dtype)._to_embed(keep_tz=keep_tz)

return self.values.copy()

_index_shared_docs['astype'] = """
Create an Index with values cast to dtypes. The class of a new Index
is determined by dtype. When conversion is impossible, a ValueError
Expand Down
18 changes: 4 additions & 14 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,23 +665,13 @@ def to_series(self, keep_tz=False, index=None, name=None):
if name is None:
name = self.name

return Series(self._to_embed(keep_tz), index=index, name=name)

def _to_embed(self, keep_tz=False, dtype=None):
"""
return an array repr of this object, potentially casting to object
This is for internal compat
"""
if dtype is not None:
return self.astype(dtype)._to_embed(keep_tz=keep_tz)

if keep_tz and self.tz is not None:

# preserve the tz & copy
return self.copy(deep=True)
values = self.copy(deep=True)
else:
values = self.values.copy()

return self.values.copy()
return Series(values, index=index, name=name)

def to_period(self, freq=None):
"""
Expand Down
10 changes: 0 additions & 10 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,16 +412,6 @@ def __array_wrap__(self, result, context=None):
# cannot pass _simple_new as it is
return self._shallow_copy(result, freq=self.freq, name=self.name)

def _to_embed(self, keep_tz=False, dtype=None):
"""
return an array repr of this object, potentially casting to object
"""

if dtype is not None:
return self.astype(dtype)._to_embed(keep_tz=keep_tz)

return self.astype(object).values

@property
def _formatter_func(self):
return lambda x: "'%s'" % x
Expand Down
13 changes: 8 additions & 5 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,13 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
result = Series(arg).map(cache_array)
if box:
if errors == 'ignore':
return Index(result)
return Index(result, name=name)
else:
return DatetimeIndex(result, name=name)
return result.values


def _return_parsed_timezone_results(result, timezones, box, tz):
def _return_parsed_timezone_results(result, timezones, box, tz, name):
"""
Return results from array_strptime if a %z or %Z directive was passed.
Expand All @@ -119,6 +119,9 @@ def _return_parsed_timezone_results(result, timezones, box, tz):
True boxes result as an Index-like, False returns an ndarray
tz : object
None or pytz timezone object
name : string, default None
Name for a DatetimeIndex
Returns
-------
tz_result : ndarray of parsed dates with timezone
Expand All @@ -136,7 +139,7 @@ def _return_parsed_timezone_results(result, timezones, box, tz):
in zip(result, timezones)])
if box:
from pandas import Index
return Index(tz_results)
return Index(tz_results, name=name)
return tz_results


Expand Down Expand Up @@ -209,7 +212,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
if box:
if errors == 'ignore':
from pandas import Index
return Index(result)
return Index(result, name=name)

return DatetimeIndex(result, tz=tz, name=name)
return result
Expand Down Expand Up @@ -252,7 +255,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
arg, format, exact=exact, errors=errors)
if '%Z' in format or '%z' in format:
return _return_parsed_timezone_results(
result, timezones, box, tz)
result, timezones, box, tz, name)
except tslibs.OutOfBoundsDatetime:
if errors == 'raise':
raise
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/extension/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,31 @@ def all_data(request, data, data_missing):

@pytest.fixture
def data_repeated(data):
<<<<<<< HEAD
"""Return different versions of data for count times"""
def gen(count):
for _ in range(count):
yield data
yield gen
=======
"""
Generate many datasets.
Parameters
----------
data : fixture implementing `data`
Returns
-------
Callable[[int], Generator]:
A callable that takes a `count` argument and
returns a generator yielding `count` datasets.
"""
def gen(count):
for _ in range(count):
yield data
return gen
>>>>>>> datetimelike-tshift


@pytest.fixture
Expand Down
8 changes: 0 additions & 8 deletions pandas/tests/extension/decimal/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,6 @@ def data_missing():
return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])


@pytest.fixture
def data_repeated():
def gen(count):
for _ in range(count):
yield DecimalArray(make_data())
yield gen


@pytest.fixture
def data_for_sorting():
return DecimalArray([decimal.Decimal('1'),
Expand Down
9 changes: 0 additions & 9 deletions pandas/tests/extension/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,6 @@ def data_missing():
return Categorical([np.nan, 'A'])


@pytest.fixture
def data_repeated():
"""Return different versions of data for count times"""
def gen(count):
for _ in range(count):
yield Categorical(make_data())
yield gen


@pytest.fixture
def data_for_sorting():
return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'],
Expand Down
8 changes: 0 additions & 8 deletions pandas/tests/extension/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,6 @@ def data_missing(dtype):
return integer_array([np.nan, 1], dtype=dtype)


@pytest.fixture
def data_repeated(data):
def gen(count):
for _ in range(count):
yield data
yield gen


@pytest.fixture
def data_for_sorting(dtype):
return integer_array([1, 2, 0], dtype=dtype)
Expand Down
9 changes: 0 additions & 9 deletions pandas/tests/extension/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,6 @@ def data_missing():
return IntervalArray.from_tuples([None, (0, 1)])


@pytest.fixture
def data_repeated():
"""Return different versions of data for count times"""
def gen(count):
for _ in range(count):
yield IntervalArray(make_data())
yield gen


@pytest.fixture
def data_for_sorting():
return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)])
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,15 @@ def test_to_datetime_parse_timezone_malformed(self, offset):
with pytest.raises(ValueError):
pd.to_datetime([date], format=fmt)

def test_to_datetime_parse_timezone_keeps_name(self):
# GH 21697
fmt = '%Y-%m-%d %H:%M:%S %z'
arg = pd.Index(['2010-01-01 12:00:00 Z'], name='foo')
result = pd.to_datetime(arg, format=fmt)
expected = pd.DatetimeIndex(['2010-01-01 12:00:00'], tz='UTC',
name='foo')
tm.assert_index_equal(result, expected)


class TestToDatetime(object):
def test_to_datetime_pydatetime(self):
Expand Down Expand Up @@ -765,6 +774,14 @@ def test_unit_rounding(self, cache):
expected = pd.Timestamp('2015-06-19 19:55:31.877000093')
assert result == expected

@pytest.mark.parametrize('cache', [True, False])
def test_unit_ignore_keeps_name(self, cache):
# GH 21697
expected = pd.Index([15e9] * 2, name='name')
result = pd.to_datetime(expected, errors='ignore', box=True, unit='s',
cache=cache)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('cache', [True, False])
def test_dataframe(self, cache):

Expand Down

0 comments on commit 959cd72

Please sign in to comment.