From 0277ee7821fad9300a87200e3a5e193c492b21cd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 28 Dec 2018 12:31:28 -0800 Subject: [PATCH] implement astype portion of #24024 (#24405) --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/arrays/datetimelike.py | 59 +++++++++++++++++-- pandas/core/arrays/datetimes.py | 36 ++++++++++- pandas/core/arrays/period.py | 43 ++------------ pandas/core/arrays/timedeltas.py | 36 ++++++++++- pandas/core/dtypes/missing.py | 6 +- pandas/core/indexes/base.py | 5 +- pandas/core/indexes/datetimelike.py | 35 +++++------ pandas/core/indexes/datetimes.py | 22 ++----- pandas/core/indexes/period.py | 9 +-- pandas/core/indexes/timedeltas.py | 14 ++--- pandas/tests/arrays/test_datetimes.py | 32 ++++++++++ pandas/tests/arrays/test_period.py | 26 +++++--- pandas/tests/arrays/test_timedeltas.py | 17 ++++++ pandas/tests/indexes/datetimes/test_astype.py | 24 +++++++- pandas/tests/indexes/period/test_astype.py | 6 ++ .../tests/indexes/timedeltas/test_astype.py | 24 +++++++- 17 files changed, 280 insertions(+), 115 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 16f327a9006da6..b2e52bbb3cb8fc 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1328,6 +1328,7 @@ Datetimelike - Bug in :func:`to_datetime` where ``box`` and ``utc`` arguments were ignored when passing a :class:`DataFrame` or ``dict`` of unit mappings (:issue:`23760`) - Bug in :attr:`Series.dt` where the cache would not update properly after an in-place operation (:issue:`24408`) - Bug in :class:`PeriodIndex` where comparisons against an array-like object with length 1 failed to raise ``ValueError`` (:issue:`23078`) +- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`). Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 149bb07d232547..2acb08b6965066 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -17,10 +17,12 @@ from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.core.dtypes.common import ( - is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype, - is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike, - is_period_dtype, is_timedelta64_dtype, needs_i8_conversion) + is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, + is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype, + is_list_like, is_object_dtype, is_offsetlike, is_period_dtype, + is_string_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype, + needs_i8_conversion, pandas_dtype) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -315,7 +317,7 @@ def _ndarray_values(self): # ---------------------------------------------------------------- # Rendering Methods - def _format_native_types(self, na_rep=u'NaT', date_format=None): + def _format_native_types(self, na_rep='NaT', date_format=None): """ Helper method for astype when converting to strings. @@ -403,9 +405,54 @@ def __getitem__(self, key): return self._simple_new(result, **attribs) def astype(self, dtype, copy=True): + # Some notes on cases we don't have to handle here in the base class: + # 1. PeriodArray.astype handles period -> period + # 2. DatetimeArray.astype handles conversion between tz. + # 3. DatetimeArray.astype handles datetime -> period + from pandas import Categorical + dtype = pandas_dtype(dtype) + if is_object_dtype(dtype): return self._box_values(self.asi8) - return super(DatetimeLikeArrayMixin, self).astype(dtype, copy) + elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): + return self._format_native_types() + elif is_integer_dtype(dtype): + # we deliberately ignore int32 vs. int64 here. + # See https://github.com/pandas-dev/pandas/issues/24381 for more. + values = self.asi8 + + if is_unsigned_integer_dtype(dtype): + # Again, we ignore int32 vs. int64 + values = values.view("uint64") + + if copy: + values = values.copy() + return values + elif (is_datetime_or_timedelta_dtype(dtype) and + not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): + # disallow conversion between datetime/timedelta, + # and conversions for any datetimelike to float + msg = 'Cannot cast {name} to dtype {dtype}' + raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) + elif is_categorical_dtype(dtype): + return Categorical(self, dtype=dtype) + else: + return np.asarray(self, dtype=dtype) + + def view(self, dtype=None): + """ + New view on this array with the same data. + + Parameters + ---------- + dtype : numpy dtype, optional + + Returns + ------- + ndarray + With the specified `dtype`. + """ + return self._data.view(dtype=dtype) # ------------------------------------------------------------------ # ExtensionArray Interface diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f7a8bdb201bfdd..966511d048421c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -15,8 +15,9 @@ from pandas.core.dtypes.common import ( _INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype, - is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype) + is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, + is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype, + is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -473,6 +474,35 @@ def __iter__(self): for v in converted: yield v + def astype(self, dtype, copy=True): + # We handle + # --> datetime + # --> period + # DatetimeLikeArrayMixin Super handles the rest. + dtype = pandas_dtype(dtype) + + if (is_datetime64_ns_dtype(dtype) and + not is_dtype_equal(dtype, self.dtype)): + # GH#18951: datetime64_ns dtype but not equal means different tz + new_tz = getattr(dtype, 'tz', None) + if getattr(self.dtype, 'tz', None) is None: + return self.tz_localize(new_tz) + result = self.tz_convert(new_tz) + if new_tz is None: + # Do we want .astype('datetime64[ns]') to be an ndarray. + # The astype in Block._astype expects this to return an + # ndarray, but we could maybe work around it there. + result = result._data + return result + elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype, + dtype): + if copy: + return self.copy() + return self + elif is_period_dtype(dtype): + return self.to_period(freq=dtype.freq) + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) + # ---------------------------------------------------------------- # ExtensionArray Interface @@ -495,7 +525,7 @@ def _validate_fill_value(self, fill_value): # ----------------------------------------------------------------- # Rendering Methods - def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): + def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): from pandas.io.formats.format import _get_format_datetime64_from_values fmt = _get_format_datetime64_from_values(self, date_format) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 2fcc6ab4cc3bfe..d45cd9402d45bd 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -15,10 +15,8 @@ from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.common import ( - _TD_DTYPE, ensure_object, is_array_like, is_categorical_dtype, - is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, - is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, - is_period_dtype, is_string_dtype, pandas_dtype) + _TD_DTYPE, ensure_object, is_array_like, is_datetime64_dtype, + is_float_dtype, is_list_like, is_period_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries from pandas.core.dtypes.missing import isna, notna @@ -599,42 +597,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # ------------------------------------------------------------------ def astype(self, dtype, copy=True): - # TODO: Figure out something better here... - # We have DatetimeLikeArrayMixin -> - # super(...), which ends up being... DatetimeIndexOpsMixin? - # this is complicated. - # need a pandas_astype(arr, dtype). - from pandas import Categorical - + # We handle Period[T] -> Period[U] + # Our parent handles everything else. dtype = pandas_dtype(dtype) - if is_object_dtype(dtype): - return np.asarray(self, dtype=object) - elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): - return self._format_native_types() - elif is_integer_dtype(dtype): - values = self._data - - if values.dtype != dtype: - # int32 vs. int64 - values = values.astype(dtype) - - elif copy: - values = values.copy() - - return values - elif (is_datetime_or_timedelta_dtype(dtype) and - not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): - # disallow conversion between datetime/timedelta, - # and conversions for any datetimelike to float - msg = 'Cannot cast {name} to dtype {dtype}' - raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) - elif is_categorical_dtype(dtype): - return Categorical(self, dtype=dtype) - elif is_period_dtype(dtype): + if is_period_dtype(dtype): return self.asfreq(dtype.freq) - else: - return np.asarray(self, dtype=dtype) + return super(PeriodArray, self).astype(dtype, copy=copy) @property def flags(self): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 8721e0ce3ace54..2c7187f85517fc 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -17,7 +17,8 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, - is_string_dtype, is_timedelta64_dtype) + is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, + pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) @@ -234,6 +235,32 @@ def _validate_fill_value(self, fill_value): "Got '{got}'.".format(got=fill_value)) return fill_value + def astype(self, dtype, copy=True): + # We handle + # --> timedelta64[ns] + # --> timedelta64 + # DatetimeLikeArrayMixin super call handles other cases + dtype = pandas_dtype(dtype) + + if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype): + # by pandas convention, converting to non-nano timedelta64 + # returns an int64-dtyped array with ints representing multiples + # of the desired timedelta unit. This is essentially division + if self._hasnans: + # avoid double-copying + result = self._data.astype(dtype, copy=False) + values = self._maybe_mask_results(result, + fill_value=None, + convert='float64') + return values + result = self._data.astype(dtype, copy=copy) + return result.astype('i8') + elif is_timedelta64_ns_dtype(dtype): + if copy: + return self.copy() + return self + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) + # ---------------------------------------------------------------- # Rendering Methods @@ -241,8 +268,11 @@ def _formatter(self, boxed=False): from pandas.io.formats.format import _get_format_timedelta64 return _get_format_timedelta64(self, box=True) - def _format_native_types(self): - return self.astype(object) + def _format_native_types(self, na_rep='NaT', date_format=None): + from pandas.io.formats.format import _get_format_timedelta64 + + formatter = _get_format_timedelta64(self._data, na_rep) + return np.array([formatter(x) for x in self._data]) # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 809dcbd054ea0a..1d0ea034559492 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -14,7 +14,8 @@ is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype) from .generic import ( - ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries) + ABCDatetimeArray, ABCExtensionArray, ABCGeneric, ABCIndexClass, + ABCMultiIndex, ABCSeries, ABCTimedeltaArray) from .inference import is_list_like isposinf_scalar = libmissing.isposinf_scalar @@ -108,7 +109,8 @@ def _isna_new(obj): elif isinstance(obj, ABCMultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, - ABCExtensionArray)): + ABCExtensionArray, + ABCDatetimeArray, ABCTimedeltaArray)): return _isna_ndarraylike(obj) elif isinstance(obj, ABCGeneric): return obj._constructor(obj._data.isna(func=isna)) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 71fb8c7179109c..36bf4ba5d98511 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -742,8 +742,9 @@ def view(self, cls=None): Parameters ---------- dtype : numpy dtype or pandas type - Note that any integer `dtype` is treated as ``'int64'``, - regardless of the sign and size. + Note that any signed integer `dtype` is treated as ``'int64'``, + and any unsigned integer `dtype` is treated as ``'uint64'``, + regardless of the size. copy : bool, default True By default, astype always returns a newly allocated object. If copy is set to False and internal requirements on dtype are diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 2338fbe896eb0c..8178f7e9c6469b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -13,10 +13,8 @@ from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - ensure_int64, is_bool_dtype, is_categorical_dtype, - is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype, - is_integer, is_integer_dtype, is_list_like, is_object_dtype, - is_period_dtype, is_scalar, is_string_dtype) + ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer, + is_list_like, is_period_dtype, is_scalar) from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core import algorithms, ops @@ -39,6 +37,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): # override DatetimeLikeArrayMixin method copy = Index.copy + view = Index.view # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index @@ -550,24 +549,18 @@ def _maybe_box_as_values(self, values, **attribs): # - sort_values return values + @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): - if is_object_dtype(dtype): - return self._box_values_as_index() - elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): - return Index(self.format(), name=self.name, dtype=object) - elif is_integer_dtype(dtype): - # TODO(DatetimeArray): use self._values here. - # Can't use ._values currently, because that returns a - # DatetimeIndex, which throws us in an infinite loop. - return Index(self.values.astype('i8', copy=copy), name=self.name, - dtype='i8') - elif (is_datetime_or_timedelta_dtype(dtype) and - not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): - # disallow conversion between datetime/timedelta, - # and conversions for any datetimelike to float - msg = 'Cannot cast {name} to dtype {dtype}' - raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) - return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) + if is_dtype_equal(self.dtype, dtype) and copy is False: + # Ensure that self.astype(self.dtype) is self + return self + + new_values = self._eadata.astype(dtype, copy=copy) + + # pass copy=False because any copying will be done in the + # _eadata.astype call above + return Index(new_values, + dtype=new_values.dtype, name=self.name, copy=False) @Appender(DatetimeLikeArrayMixin._time_shift.__doc__) def _time_shift(self, periods, freq=None): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 4c51b112252def..1e6daabcc04456 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -14,9 +14,8 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.common import ( - _NS_DTYPE, ensure_int64, is_datetime64_ns_dtype, is_dtype_equal, is_float, - is_integer, is_list_like, is_period_dtype, is_scalar, is_string_like, - pandas_dtype) + _NS_DTYPE, ensure_int64, is_float, is_integer, is_list_like, is_scalar, + is_string_like) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna @@ -25,7 +24,7 @@ DatetimeArrayMixin as DatetimeArray, _to_m8) from pandas.core.base import _shared_docs import pandas.core.common as com -from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.base import Index from pandas.core.indexes.datetimelike import ( DatetimeIndexOpsMixin, DatetimelikeDelegateMixin) from pandas.core.indexes.numeric import Int64Index @@ -668,20 +667,6 @@ def intersection(self, other): # -------------------------------------------------------------------- - @Appender(_index_shared_docs['astype']) - def astype(self, dtype, copy=True): - dtype = pandas_dtype(dtype) - if (is_datetime64_ns_dtype(dtype) and - not is_dtype_equal(dtype, self.dtype)): - # GH 18951: datetime64_ns dtype but not equal means different tz - new_tz = getattr(dtype, 'tz', None) - if getattr(self.dtype, 'tz', None) is None: - return self.tz_localize(new_tz) - return self.tz_convert(new_tz) - elif is_period_dtype(dtype): - return self.to_period(freq=dtype.freq) - return super(DatetimeIndex, self).astype(dtype, copy=copy) - def _get_time_micros(self): values = self.asi8 if self.tz is not None and not timezones.is_utc(self.tz): @@ -1154,6 +1139,7 @@ def _eadata(self): _is_monotonic_increasing = Index.is_monotonic_increasing _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique + astype = DatetimeIndexOpsMixin.astype _timezone = cache_readonly(DatetimeArray._timezone.fget) is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index cdbbb6bb9127c9..051c5ef3262ef1 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -580,16 +580,13 @@ def asof_locs(self, where, mask): def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) - # We have a few special-cases for `dtype`. - # Failing those, we fall back to astyping the values - if is_datetime64_any_dtype(dtype): - # 'how' is index-speicifc, isn't part of the EA interface. + # 'how' is index-specific, isn't part of the EA interface. tz = getattr(dtype, 'tz', None) return self.to_timestamp(how=how).tz_localize(tz) - result = self._data.astype(dtype, copy=copy) - return Index(result, name=self.name, dtype=dtype, copy=False) + # TODO: should probably raise on `how` here, so we don't ignore it. + return super(PeriodIndex, self).astype(dtype, copy=copy) @Substitution(klass='PeriodIndex') @Appender(_shared_docs['searchsorted']) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 2f63a486d7b98a..aa0e1edf06af03 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -272,7 +272,7 @@ def _formatter_func(self): from pandas.io.formats.format import _get_format_timedelta64 return _get_format_timedelta64(self, box=True) - def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): + def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): from pandas.io.formats.format import Timedelta64Formatter return Timedelta64Formatter(values=self, nat_rep=na_rep, @@ -310,14 +310,14 @@ def _eadata(self): def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype): - # return an index (essentially this is division) - result = self.values.astype(dtype, copy=copy) + # Have to repeat the check for 'timedelta64' (not ns) dtype + # so that we can return a numeric index, since pandas will return + # a TimedeltaIndex when dtype='timedelta' + result = self._eadata.astype(dtype, copy=copy) if self.hasnans: - values = self._maybe_mask_results(result, fill_value=None, - convert='float64') - return Index(values, name=self.name) + return Index(result, name=self.name) return Index(result.astype('i8'), name=self.name) - return super(TimedeltaIndex, self).astype(dtype, copy=copy) + return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy) def union(self, other): """ diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 2b630b98b69a21..871bc440825bf0 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -5,6 +5,9 @@ import operator import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray @@ -42,3 +45,32 @@ def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators): result = op(other, arr) tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeArray(object): + def test_astype_to_same(self): + arr = DatetimeArray._from_sequence(['2000'], tz='US/Central') + result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) + assert result is arr + + @pytest.mark.parametrize("dtype", [ + int, np.int32, np.int64, 'uint32', 'uint64', + ]) + def test_astype_int(self, dtype): + arr = DatetimeArray._from_sequence([pd.Timestamp('2000'), + pd.Timestamp('2001')]) + result = arr.astype(dtype) + + if np.dtype(dtype).kind == 'u': + expected_dtype = np.dtype('uint64') + else: + expected_dtype = np.dtype('int64') + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + def test_tz_setter_raises(self): + arr = DatetimeArray._from_sequence(['2000'], tz='US/Central') + with pytest.raises(AttributeError, match='tz_localize'): + arr.tz = 'UTC' diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 4425cc8eb1139c..7fb88640e5fb43 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -4,7 +4,6 @@ from pandas._libs.tslibs import iNaT from pandas._libs.tslibs.period import IncompatibleFrequency -from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd @@ -88,24 +87,35 @@ def test_take_raises(): arr.take([0, -1], allow_fill=True, fill_value='foo') -@pytest.mark.parametrize('dtype', [int, np.int32, np.int64]) +@pytest.mark.parametrize('dtype', [ + int, np.int32, np.int64, 'uint32', 'uint64', +]) def test_astype(dtype): - # Need to ensure ordinals are astyped correctly for both - # int32 and 64 + # We choose to ignore the sign and size of integers for + # Period/Datetime/Timedelta astype arr = period_array(['2000', '2001', None], freq='D') result = arr.astype(dtype) - # need pandas_dtype to handle int32 vs. int64 correctly - expected = pandas_dtype(dtype) - assert result.dtype == expected + + if np.dtype(dtype).kind == 'u': + expected_dtype = np.dtype('uint64') + else: + expected_dtype = np.dtype('int64') + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) def test_astype_copies(): arr = period_array(['2000', '2001', None], freq='D') result = arr.astype(np.int64, copy=False) - assert result is arr._data + # Add the `.base`, since we now use `.asi8` which returns a view. + # We could maybe override it in PeriodArray to return ._data directly. + assert result.base is arr._data result = arr.astype(np.int64, copy=True) assert result is not arr._data + tm.assert_numpy_array_equal(result, arr._data.view('i8')) def test_astype_categorical(): diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 97ac3fce070880..287079165284b5 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -55,3 +55,20 @@ def test_neg_freq(self): result = -arr tm.assert_timedelta_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", [ + int, np.int32, np.int64, 'uint32', 'uint64', + ]) + def test_astype_int(self, dtype): + arr = TimedeltaArray._from_sequence([pd.Timedelta('1H'), + pd.Timedelta('2H')]) + result = arr.astype(dtype) + + if np.dtype(dtype).kind == 'u': + expected_dtype = np.dtype('uint64') + else: + expected_dtype = np.dtype('int64') + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index d47d1016ee6539..cda7a005c40c73 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -33,6 +33,15 @@ def test_astype(self): tm.assert_index_equal(result, Index(rng.asi8)) tm.assert_numpy_array_equal(result.values, rng.asi8) + def test_astype_uint(self): + arr = date_range('2000', periods=2) + expected = pd.UInt64Index( + np.array([946684800000000000, 946771200000000000], dtype="uint64") + ) + + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + def test_astype_with_tz(self): # with tz @@ -168,7 +177,7 @@ def test_astype_object_with_nat(self): def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - msg = 'Cannot cast DatetimeIndex to dtype' + msg = 'Cannot cast DatetimeArrayMixin to dtype' with pytest.raises(TypeError, match=msg): idx.astype(dtype) @@ -301,6 +310,19 @@ def test_to_period_nofreq(self): assert idx.freqstr is None tm.assert_index_equal(idx.to_period(), expected) + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_astype_category(self, tz): + obj = pd.date_range("2000", periods=2, tz=tz) + result = obj.astype('category') + expected = pd.CategoricalIndex([pd.Timestamp('2000-01-01', tz=tz), + pd.Timestamp('2000-01-02', tz=tz)]) + tm.assert_index_equal(result, expected) + + # TODO: use \._data following composition changeover + result = obj._eadata.astype('category') + expected = expected.values + tm.assert_categorical_equal(result, expected) + @pytest.mark.parametrize('tz', [None, 'US/Central']) def test_astype_array_fallback(self, tz): obj = pd.date_range("2000", periods=2, tz=tz) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index 68c338c6cb6884..6abdf5962d6cf8 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -41,6 +41,12 @@ def test_astype_conversion(self): tm.assert_index_equal(result, Index(idx.asi8)) tm.assert_numpy_array_equal(result.values, idx.asi8) + def test_astype_uint(self): + arr = period_range('2000', periods=2) + expected = pd.UInt64Index(np.array([10957, 10958], dtype='uint64')) + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + def test_astype_object(self): idx = pd.PeriodIndex([], freq='M') diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 6afbe9cff42c23..088322d9f9a977 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -54,6 +54,15 @@ def test_astype(self): tm.assert_index_equal(result, Index(rng.asi8)) tm.assert_numpy_array_equal(rng.asi8, result.values) + def test_astype_uint(self): + arr = timedelta_range('1H', periods=2) + expected = pd.UInt64Index( + np.array([3600000000000, 90000000000000], dtype="uint64") + ) + + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + def test_astype_timedelta64(self): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) @@ -75,10 +84,23 @@ def test_astype_timedelta64(self): def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) - msg = 'Cannot cast TimedeltaIndex to dtype' + msg = 'Cannot cast TimedeltaArrayMixin to dtype' with pytest.raises(TypeError, match=msg): idx.astype(dtype) + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_astype_category(self, tz): + obj = pd.date_range("2000", periods=2, tz=tz) + result = obj.astype('category') + expected = pd.CategoricalIndex([pd.Timestamp('2000-01-01', tz=tz), + pd.Timestamp('2000-01-02', tz=tz)]) + tm.assert_index_equal(result, expected) + + # TODO: Use \._data following composition changeover + result = obj._eadata.astype('category') + expected = expected.values + tm.assert_categorical_equal(result, expected) + def test_astype_array_fallback(self): obj = pd.timedelta_range("1H", periods=2) result = obj.astype(bool)