Squashed commit of the following:

commit 23e5cfc Author: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Tue Oct 2 13:10:41 2018 -0500 Use ._tshift internally for datetimelike ops In preperation for PeriodArray / DatetimeArray / TimedeltaArray. Index.shift has a different meaning from ExtensionArray.shift. - Index.shift pointwise shifts each element by some amount - ExtensionArray.shift shits the *position* of each value in the array padding the end with NA This is going to get confusing. This PR tries to avoid some of that by internally using a new `_tshift` method (time-shift) when we want to do pointwise shifting of each value. Places that know they want that behavior (like in the datetimelike ops) should use that. commit 1d9f76c Author: Joris Van den Bossche <jorisvandenbossche@gmail.com> Date: Tue Oct 2 17:11:11 2018 +0200 CLN: remove Index._to_embed (pandas-dev#22879) * CLN: remove Index._to_embed * pep8 commit 6247da0 Author: Tom Augspurger <TomAugspurger@users.noreply.github.com> Date: Tue Oct 2 08:50:41 2018 -0500 Provide default implementation for `data_repated` (pandas-dev#22935) commit 5ce06b5 Author: Matthew Roeschke <emailformattr@gmail.com> Date: Mon Oct 1 14:22:20 2018 -0700 BUG: to_datetime preserves name of Index argument in the result (pandas-dev#22918) * BUG: to_datetime preserves name of Index argument in the result * correct test
TomAugspurger · Oct 2, 2018 · 959cd72 · 959cd72
1 parent 9d17fd2
commit 959cd72
Show file tree

Hide file tree

Showing 13 changed files with 81 additions and 88 deletions.
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -655,6 +655,7 @@ Datetimelike
 - Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`)
 - Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`)
 - Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`)
+- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`)
 
 Timedelta
 ^^^^^^^^^

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -455,7 +455,7 @@ def _sub_period_array(self, other):
     def _addsub_int_array(self, other, op):
         """
         Add or subtract array-like of integers equivalent to applying
-        `shift` pointwise.
+        `_tshift` pointwise.
 
         Parameters
         ----------
@@ -555,6 +555,20 @@ def shift(self, periods, freq=None):
         return self._tshift(periods, freq=freq)
 
     def _tshift(self, periods, freq=None):
+        """
+        Shift each value by `periods`.
+
+        Note this is different from ExtensionArray.shift, which
+        shifts the *position* of each element, padding the end with
+        missing values.
+
+        Parameters
+        ----------
+        periods : int
+            Number of periods to shift by.
+        freq : pandas.DateOffset, pandas.Timedelta, or string
+            Frequency increment to shift by.
+        """
         if freq is not None and freq != self.freq:
             if isinstance(freq, compat.string_types):
                 freq = frequencies.to_offset(freq)

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -572,22 +572,26 @@ def shift(self, periods=1):
         -------
         shifted : Period Array/Index
         """
-        # We have two kinds of shift.
-        # 1. ExtensionArray.shift: move positions of each value,
-        #    fill NA on the end
-        # 2. Datelike.tshift: move each value through time
-        # Each Datelike array will implement both. It's up to the
-        # caller to call the correct one.
-        return self._ea_shift(periods=periods)
-
-    def _ea_shift(self, periods=1):
-        # TODO: remove from DatetimeLikeArrayMixin
+        # TODO(DatetimeArray): remove from DatetimeLikeArrayMixin
         # The semantics for Index.shift differ from EA.shift
         # then just call super.
         return ExtensionArray.shift(self, periods)
 
     def _tshift(self, n, freq=None):
-        # TODO: docs
+        """
+        Shift each value by `periods`.
+
+        Note this is different from ExtensionArray.shift, which
+        shifts the *position* of each element, padding the end with
+        missing values.
+
+        Parameters
+        ----------
+        periods : int
+            Number of periods to shift by.
+        freq : pandas.DateOffset, pandas.Timedelta, or string
+            Frequency increment to shift by.
+        """
         values = self.values + n * self.freq.n
         if self.hasnans:
             values[self._isnan] = iNaT

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1124,7 +1124,7 @@ def to_series(self, index=None, name=None):
         if name is None:
             name = self.name
 
-        return Series(self._to_embed(), index=index, name=name)
+        return Series(self.values.copy(), index=index, name=name)
 
     def to_frame(self, index=True, name=None):
         """
@@ -1187,18 +1187,6 @@ def to_frame(self, index=True, name=None):
             result.index = self
         return result
 
-    def _to_embed(self, keep_tz=False, dtype=None):
-        """
-        *this is an internal non-public method*
-
-        return an array repr of this object, potentially casting to object
-
-        """
-        if dtype is not None:
-            return self.astype(dtype)._to_embed(keep_tz=keep_tz)
-
-        return self.values.copy()
-
     _index_shared_docs['astype'] = """
         Create an Index with values cast to dtypes. The class of a new Index
         is determined by dtype. When conversion is impossible, a ValueError

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -665,23 +665,13 @@ def to_series(self, keep_tz=False, index=None, name=None):
         if name is None:
             name = self.name
 
-        return Series(self._to_embed(keep_tz), index=index, name=name)
-
-    def _to_embed(self, keep_tz=False, dtype=None):
-        """
-        return an array repr of this object, potentially casting to object
-
-        This is for internal compat
-        """
-        if dtype is not None:
-            return self.astype(dtype)._to_embed(keep_tz=keep_tz)
-
         if keep_tz and self.tz is not None:
-
             # preserve the tz & copy
-            return self.copy(deep=True)
+            values = self.copy(deep=True)
+        else:
+            values = self.values.copy()
 
-        return self.values.copy()
+        return Series(values, index=index, name=name)
 
     def to_period(self, freq=None):
         """

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
@@ -412,16 +412,6 @@ def __array_wrap__(self, result, context=None):
         # cannot pass _simple_new as it is
         return self._shallow_copy(result, freq=self.freq, name=self.name)
 
-    def _to_embed(self, keep_tz=False, dtype=None):
-        """
-        return an array repr of this object, potentially casting to object
-        """
-
-        if dtype is not None:
-            return self.astype(dtype)._to_embed(keep_tz=keep_tz)
-
-        return self.astype(object).values
-
     @property
     def _formatter_func(self):
         return lambda x: "'%s'" % x

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -99,13 +99,13 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
     result = Series(arg).map(cache_array)
     if box:
         if errors == 'ignore':
-            return Index(result)
+            return Index(result, name=name)
         else:
             return DatetimeIndex(result, name=name)
     return result.values
 
 
-def _return_parsed_timezone_results(result, timezones, box, tz):
+def _return_parsed_timezone_results(result, timezones, box, tz, name):
     """
     Return results from array_strptime if a %z or %Z directive was passed.
 
@@ -119,6 +119,9 @@ def _return_parsed_timezone_results(result, timezones, box, tz):
         True boxes result as an Index-like, False returns an ndarray
     tz : object
         None or pytz timezone object
+    name : string, default None
+        Name for a DatetimeIndex
+
     Returns
     -------
     tz_result : ndarray of parsed dates with timezone
@@ -136,7 +139,7 @@ def _return_parsed_timezone_results(result, timezones, box, tz):
                            in zip(result, timezones)])
     if box:
         from pandas import Index
-        return Index(tz_results)
+        return Index(tz_results, name=name)
     return tz_results
 
 
@@ -209,7 +212,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
         if box:
             if errors == 'ignore':
                 from pandas import Index
-                return Index(result)
+                return Index(result, name=name)
 
             return DatetimeIndex(result, tz=tz, name=name)
         return result
@@ -252,7 +255,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
                         arg, format, exact=exact, errors=errors)
                     if '%Z' in format or '%z' in format:
                         return _return_parsed_timezone_results(
-                            result, timezones, box, tz)
+                            result, timezones, box, tz, name)
                 except tslibs.OutOfBoundsDatetime:
                     if errors == 'raise':
                         raise

diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
@@ -32,11 +32,31 @@ def all_data(request, data, data_missing):
 
 @pytest.fixture
 def data_repeated(data):
+<<<<<<< HEAD
     """Return different versions of data for count times"""
     def gen(count):
         for _ in range(count):
             yield data
     yield gen
+=======
+    """
+    Generate many datasets.
+
+    Parameters
+    ----------
+    data : fixture implementing `data`
+
+    Returns
+    -------
+    Callable[[int], Generator]:
+        A callable that takes a `count` argument and
+        returns a generator yielding `count` datasets.
+    """
+    def gen(count):
+        for _ in range(count):
+            yield data
+    return gen
+>>>>>>> datetimelike-tshift
 
 
 @pytest.fixture

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -30,14 +30,6 @@ def data_missing():
     return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
 
 
-@pytest.fixture
-def data_repeated():
-    def gen(count):
-        for _ in range(count):
-            yield DecimalArray(make_data())
-    yield gen
-
-
 @pytest.fixture
 def data_for_sorting():
     return DecimalArray([decimal.Decimal('1'),

diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
@@ -45,15 +45,6 @@ def data_missing():
     return Categorical([np.nan, 'A'])
 
 
-@pytest.fixture
-def data_repeated():
-    """Return different versions of data for count times"""
-    def gen(count):
-        for _ in range(count):
-            yield Categorical(make_data())
-    yield gen
-
-
 @pytest.fixture
 def data_for_sorting():
     return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'],

diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
@@ -47,14 +47,6 @@ def data_missing(dtype):
     return integer_array([np.nan, 1], dtype=dtype)
 
 
-@pytest.fixture
-def data_repeated(data):
-    def gen(count):
-        for _ in range(count):
-            yield data
-    yield gen
-
-
 @pytest.fixture
 def data_for_sorting(dtype):
     return integer_array([1, 2, 0], dtype=dtype)

diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py
@@ -47,15 +47,6 @@ def data_missing():
     return IntervalArray.from_tuples([None, (0, 1)])
 
 
-@pytest.fixture
-def data_repeated():
-    """Return different versions of data for count times"""
-    def gen(count):
-        for _ in range(count):
-            yield IntervalArray(make_data())
-    yield gen
-
-
 @pytest.fixture
 def data_for_sorting():
     return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)])

diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
@@ -233,6 +233,15 @@ def test_to_datetime_parse_timezone_malformed(self, offset):
         with pytest.raises(ValueError):
             pd.to_datetime([date], format=fmt)
 
+    def test_to_datetime_parse_timezone_keeps_name(self):
+        # GH 21697
+        fmt = '%Y-%m-%d %H:%M:%S %z'
+        arg = pd.Index(['2010-01-01 12:00:00 Z'], name='foo')
+        result = pd.to_datetime(arg, format=fmt)
+        expected = pd.DatetimeIndex(['2010-01-01 12:00:00'], tz='UTC',
+                                    name='foo')
+        tm.assert_index_equal(result, expected)
+
 
 class TestToDatetime(object):
     def test_to_datetime_pydatetime(self):
@@ -765,6 +774,14 @@ def test_unit_rounding(self, cache):
         expected = pd.Timestamp('2015-06-19 19:55:31.877000093')
         assert result == expected
 
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_unit_ignore_keeps_name(self, cache):
+        # GH 21697
+        expected = pd.Index([15e9] * 2, name='name')
+        result = pd.to_datetime(expected, errors='ignore', box=True, unit='s',
+                                cache=cache)
+        tm.assert_index_equal(result, expected)
+
     @pytest.mark.parametrize('cache', [True, False])
     def test_dataframe(self, cache):