From 5ce06b5bdb8c44043c6463bf8ce3da758800a189 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 1 Oct 2018 14:22:20 -0700 Subject: [PATCH 1/3] BUG: to_datetime preserves name of Index argument in the result (#22918) * BUG: to_datetime preserves name of Index argument in the result * correct test --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/tools/datetimes.py | 13 ++++++++----- pandas/tests/indexes/datetimes/test_tools.py | 17 +++++++++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b71edcf1f6f51..851c1a3fbd6e9 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -655,6 +655,7 @@ Datetimelike - Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`) - Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`) - Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`) +- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 57387b9ea870a..4a5290a90313d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -99,13 +99,13 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): result = Series(arg).map(cache_array) if box: if errors == 'ignore': - return Index(result) + return Index(result, name=name) else: return DatetimeIndex(result, name=name) return result.values -def _return_parsed_timezone_results(result, timezones, box, tz): +def _return_parsed_timezone_results(result, timezones, box, tz, name): """ Return results from array_strptime if a %z or %Z directive was passed. @@ -119,6 +119,9 @@ def _return_parsed_timezone_results(result, timezones, box, tz): True boxes result as an Index-like, False returns an ndarray tz : object None or pytz timezone object + name : string, default None + Name for a DatetimeIndex + Returns ------- tz_result : ndarray of parsed dates with timezone @@ -136,7 +139,7 @@ def _return_parsed_timezone_results(result, timezones, box, tz): in zip(result, timezones)]) if box: from pandas import Index - return Index(tz_results) + return Index(tz_results, name=name) return tz_results @@ -209,7 +212,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if box: if errors == 'ignore': from pandas import Index - return Index(result) + return Index(result, name=name) return DatetimeIndex(result, tz=tz, name=name) return result @@ -252,7 +255,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( - result, timezones, box, tz) + result, timezones, box, tz, name) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index cc6db8f5854c8..3b7d6a709230b 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -233,6 +233,15 @@ def test_to_datetime_parse_timezone_malformed(self, offset): with pytest.raises(ValueError): pd.to_datetime([date], format=fmt) + def test_to_datetime_parse_timezone_keeps_name(self): + # GH 21697 + fmt = '%Y-%m-%d %H:%M:%S %z' + arg = pd.Index(['2010-01-01 12:00:00 Z'], name='foo') + result = pd.to_datetime(arg, format=fmt) + expected = pd.DatetimeIndex(['2010-01-01 12:00:00'], tz='UTC', + name='foo') + tm.assert_index_equal(result, expected) + class TestToDatetime(object): def test_to_datetime_pydatetime(self): @@ -765,6 +774,14 @@ def test_unit_rounding(self, cache): expected = pd.Timestamp('2015-06-19 19:55:31.877000093') assert result == expected + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_ignore_keeps_name(self, cache): + # GH 21697 + expected = pd.Index([15e9] * 2, name='name') + result = pd.to_datetime(expected, errors='ignore', box=True, unit='s', + cache=cache) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('cache', [True, False]) def test_dataframe(self, cache): From 6247da0db4835ff723126640145b4fad3ce17343 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Oct 2018 08:50:41 -0500 Subject: [PATCH 2/3] Provide default implementation for `data_repated` (#22935) --- pandas/tests/extension/conftest.py | 20 +++++++++++++++---- .../tests/extension/decimal/test_decimal.py | 8 -------- pandas/tests/extension/test_categorical.py | 9 --------- pandas/tests/extension/test_integer.py | 8 -------- pandas/tests/extension/test_interval.py | 9 --------- 5 files changed, 16 insertions(+), 38 deletions(-) diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index 4bbbb7df2f399..8e397d228a5b6 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -31,12 +31,24 @@ def all_data(request, data, data_missing): @pytest.fixture -def data_repeated(): - """Return different versions of data for count times""" +def data_repeated(data): + """ + Generate many datasets. + + Parameters + ---------- + data : fixture implementing `data` + + Returns + ------- + Callable[[int], Generator]: + A callable that takes a `count` argument and + returns a generator yielding `count` datasets. + """ def gen(count): for _ in range(count): - yield NotImplementedError - yield gen + yield data + return gen @pytest.fixture diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 03fdd25826b79..93b8ea786ef5b 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -30,14 +30,6 @@ def data_missing(): return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)]) -@pytest.fixture -def data_repeated(): - def gen(count): - for _ in range(count): - yield DecimalArray(make_data()) - yield gen - - @pytest.fixture def data_for_sorting(): return DecimalArray([decimal.Decimal('1'), diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 6c6cf80c16da6..ff66f53eab6f6 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -45,15 +45,6 @@ def data_missing(): return Categorical([np.nan, 'A']) -@pytest.fixture -def data_repeated(): - """Return different versions of data for count times""" - def gen(count): - for _ in range(count): - yield Categorical(make_data()) - yield gen - - @pytest.fixture def data_for_sorting(): return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'], diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 57e0922a0b7d9..7aa33006dadda 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -47,14 +47,6 @@ def data_missing(dtype): return integer_array([np.nan, 1], dtype=dtype) -@pytest.fixture -def data_repeated(data): - def gen(count): - for _ in range(count): - yield data - yield gen - - @pytest.fixture def data_for_sorting(dtype): return integer_array([1, 2, 0], dtype=dtype) diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 34b98f590df0d..7302c5757d144 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -47,15 +47,6 @@ def data_missing(): return IntervalArray.from_tuples([None, (0, 1)]) -@pytest.fixture -def data_repeated(): - """Return different versions of data for count times""" - def gen(count): - for _ in range(count): - yield IntervalArray(make_data()) - yield gen - - @pytest.fixture def data_for_sorting(): return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)]) From 1d9f76c5055d1ef31ce76134e88b5568a119f498 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 2 Oct 2018 17:11:11 +0200 Subject: [PATCH 3/3] CLN: remove Index._to_embed (#22879) * CLN: remove Index._to_embed * pep8 --- pandas/core/indexes/base.py | 14 +------------- pandas/core/indexes/datetimes.py | 18 ++++-------------- pandas/core/indexes/period.py | 10 ---------- 3 files changed, 5 insertions(+), 37 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b42bbdafcab45..af04a846ed787 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1114,7 +1114,7 @@ def to_series(self, index=None, name=None): if name is None: name = self.name - return Series(self._to_embed(), index=index, name=name) + return Series(self.values.copy(), index=index, name=name) def to_frame(self, index=True, name=None): """ @@ -1177,18 +1177,6 @@ def to_frame(self, index=True, name=None): result.index = self return result - def _to_embed(self, keep_tz=False, dtype=None): - """ - *this is an internal non-public method* - - return an array repr of this object, potentially casting to object - - """ - if dtype is not None: - return self.astype(dtype)._to_embed(keep_tz=keep_tz) - - return self.values.copy() - _index_shared_docs['astype'] = """ Create an Index with values cast to dtypes. The class of a new Index is determined by dtype. When conversion is impossible, a ValueError diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9b00f21668bf5..a6cdaa0c2163a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -665,23 +665,13 @@ def to_series(self, keep_tz=False, index=None, name=None): if name is None: name = self.name - return Series(self._to_embed(keep_tz), index=index, name=name) - - def _to_embed(self, keep_tz=False, dtype=None): - """ - return an array repr of this object, potentially casting to object - - This is for internal compat - """ - if dtype is not None: - return self.astype(dtype)._to_embed(keep_tz=keep_tz) - if keep_tz and self.tz is not None: - # preserve the tz & copy - return self.copy(deep=True) + values = self.copy(deep=True) + else: + values = self.values.copy() - return self.values.copy() + return Series(values, index=index, name=name) def to_period(self, freq=None): """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0f86e18103e3c..969391569ce50 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -365,16 +365,6 @@ def __array_wrap__(self, result, context=None): # cannot pass _simple_new as it is return self._shallow_copy(result, freq=self.freq, name=self.name) - def _to_embed(self, keep_tz=False, dtype=None): - """ - return an array repr of this object, potentially casting to object - """ - - if dtype is not None: - return self.astype(dtype)._to_embed(keep_tz=keep_tz) - - return self.astype(object).values - @property def size(self): # Avoid materializing self._values