implement astype portion of pandas-dev#24024 (pandas-dev#24405)

Pingviinituutti · Feb 28, 2019 · 0277ee7 · 0277ee7
1 parent e819d4a
commit 0277ee7
Show file tree

Hide file tree

Showing 17 changed files with 280 additions and 115 deletions.
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1328,6 +1328,7 @@ Datetimelike
 - Bug in :func:`to_datetime` where ``box`` and ``utc`` arguments were ignored when passing a :class:`DataFrame` or ``dict`` of unit mappings (:issue:`23760`)
 - Bug in :attr:`Series.dt` where the cache would not update properly after an in-place operation (:issue:`24408`)
 - Bug in :class:`PeriodIndex` where comparisons against an array-like object with length 1 failed to raise ``ValueError`` (:issue:`23078`)
+- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`).
 
 Timedelta
 ^^^^^^^^^

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -17,10 +17,12 @@
 from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
 
 from pandas.core.dtypes.common import (
-    is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype,
-    is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype,
-    is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike,
-    is_period_dtype, is_timedelta64_dtype, needs_i8_conversion)
+    is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype,
+    is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
+    is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype,
+    is_list_like, is_object_dtype, is_offsetlike, is_period_dtype,
+    is_string_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype,
+    needs_i8_conversion, pandas_dtype)
 from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
 
@@ -315,7 +317,7 @@ def _ndarray_values(self):
     # ----------------------------------------------------------------
     # Rendering Methods
 
-    def _format_native_types(self, na_rep=u'NaT', date_format=None):
+    def _format_native_types(self, na_rep='NaT', date_format=None):
         """
         Helper method for astype when converting to strings.
 
@@ -403,9 +405,54 @@ def __getitem__(self, key):
         return self._simple_new(result, **attribs)
 
     def astype(self, dtype, copy=True):
+        # Some notes on cases we don't have to handle here in the base class:
+        #   1. PeriodArray.astype handles period -> period
+        #   2. DatetimeArray.astype handles conversion between tz.
+        #   3. DatetimeArray.astype handles datetime -> period
+        from pandas import Categorical
+        dtype = pandas_dtype(dtype)
+
         if is_object_dtype(dtype):
             return self._box_values(self.asi8)
-        return super(DatetimeLikeArrayMixin, self).astype(dtype, copy)
+        elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
+            return self._format_native_types()
+        elif is_integer_dtype(dtype):
+            # we deliberately ignore int32 vs. int64 here.
+            # See https://github.com/pandas-dev/pandas/issues/24381 for more.
+            values = self.asi8
+
+            if is_unsigned_integer_dtype(dtype):
+                # Again, we ignore int32 vs. int64
+                values = values.view("uint64")
+
+            if copy:
+                values = values.copy()
+            return values
+        elif (is_datetime_or_timedelta_dtype(dtype) and
+              not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
+            # disallow conversion between datetime/timedelta,
+            # and conversions for any datetimelike to float
+            msg = 'Cannot cast {name} to dtype {dtype}'
+            raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
+        elif is_categorical_dtype(dtype):
+            return Categorical(self, dtype=dtype)
+        else:
+            return np.asarray(self, dtype=dtype)
+
+    def view(self, dtype=None):
+        """
+        New view on this array with the same data.
+
+        Parameters
+        ----------
+        dtype : numpy dtype, optional
+
+        Returns
+        -------
+        ndarray
+            With the specified `dtype`.
+        """
+        return self._data.view(dtype=dtype)
 
     # ------------------------------------------------------------------
     # ExtensionArray Interface

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -15,8 +15,9 @@
 
 from pandas.core.dtypes.common import (
     _INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype,
-    is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype,
-    is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype)
+    is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal,
+    is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
+    is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
@@ -473,6 +474,35 @@ def __iter__(self):
             for v in converted:
                 yield v
 
+    def astype(self, dtype, copy=True):
+        # We handle
+        #   --> datetime
+        #   --> period
+        # DatetimeLikeArrayMixin Super handles the rest.
+        dtype = pandas_dtype(dtype)
+
+        if (is_datetime64_ns_dtype(dtype) and
+                not is_dtype_equal(dtype, self.dtype)):
+            # GH#18951: datetime64_ns dtype but not equal means different tz
+            new_tz = getattr(dtype, 'tz', None)
+            if getattr(self.dtype, 'tz', None) is None:
+                return self.tz_localize(new_tz)
+            result = self.tz_convert(new_tz)
+            if new_tz is None:
+                # Do we want .astype('datetime64[ns]') to be an ndarray.
+                # The astype in Block._astype expects this to return an
+                # ndarray, but we could maybe work around it there.
+                result = result._data
+            return result
+        elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype,
+                                                                  dtype):
+            if copy:
+                return self.copy()
+            return self
+        elif is_period_dtype(dtype):
+            return self.to_period(freq=dtype.freq)
+        return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
+
     # ----------------------------------------------------------------
     # ExtensionArray Interface
 
@@ -495,7 +525,7 @@ def _validate_fill_value(self, fill_value):
     # -----------------------------------------------------------------
     # Rendering Methods
 
-    def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
+    def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
         from pandas.io.formats.format import _get_format_datetime64_from_values
         fmt = _get_format_datetime64_from_values(self, date_format)
 

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -15,10 +15,8 @@
 from pandas.util._validators import validate_fillna_kwargs
 
 from pandas.core.dtypes.common import (
-    _TD_DTYPE, ensure_object, is_array_like, is_categorical_dtype,
-    is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal,
-    is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype,
-    is_period_dtype, is_string_dtype, pandas_dtype)
+    _TD_DTYPE, ensure_object, is_array_like, is_datetime64_dtype,
+    is_float_dtype, is_list_like, is_period_dtype, pandas_dtype)
 from pandas.core.dtypes.dtypes import PeriodDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries
 from pandas.core.dtypes.missing import isna, notna
@@ -599,42 +597,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
     # ------------------------------------------------------------------
 
     def astype(self, dtype, copy=True):
-        # TODO: Figure out something better here...
-        # We have DatetimeLikeArrayMixin ->
-        #     super(...), which ends up being... DatetimeIndexOpsMixin?
-        # this is complicated.
-        # need a pandas_astype(arr, dtype).
-        from pandas import Categorical
-
+        # We handle Period[T] -> Period[U]
+        # Our parent handles everything else.
         dtype = pandas_dtype(dtype)
 
-        if is_object_dtype(dtype):
-            return np.asarray(self, dtype=object)
-        elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
-            return self._format_native_types()
-        elif is_integer_dtype(dtype):
-            values = self._data
-
-            if values.dtype != dtype:
-                # int32 vs. int64
-                values = values.astype(dtype)
-
-            elif copy:
-                values = values.copy()
-
-            return values
-        elif (is_datetime_or_timedelta_dtype(dtype) and
-              not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
-            # disallow conversion between datetime/timedelta,
-            # and conversions for any datetimelike to float
-            msg = 'Cannot cast {name} to dtype {dtype}'
-            raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
-        elif is_categorical_dtype(dtype):
-            return Categorical(self, dtype=dtype)
-        elif is_period_dtype(dtype):
+        if is_period_dtype(dtype):
             return self.asfreq(dtype.freq)
-        else:
-            return np.asarray(self, dtype=dtype)
+        return super(PeriodArray, self).astype(dtype, copy=copy)
 
     @property
     def flags(self):

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -17,7 +17,8 @@
 from pandas.core.dtypes.common import (
     _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
     is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
-    is_string_dtype, is_timedelta64_dtype)
+    is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
+    pandas_dtype)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.generic import (
     ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
@@ -234,15 +235,44 @@ def _validate_fill_value(self, fill_value):
                              "Got '{got}'.".format(got=fill_value))
         return fill_value
 
+    def astype(self, dtype, copy=True):
+        # We handle
+        #   --> timedelta64[ns]
+        #   --> timedelta64
+        # DatetimeLikeArrayMixin super call handles other cases
+        dtype = pandas_dtype(dtype)
+
+        if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
+            # by pandas convention, converting to non-nano timedelta64
+            #  returns an int64-dtyped array with ints representing multiples
+            #  of the desired timedelta unit.  This is essentially division
+            if self._hasnans:
+                # avoid double-copying
+                result = self._data.astype(dtype, copy=False)
+                values = self._maybe_mask_results(result,
+                                                  fill_value=None,
+                                                  convert='float64')
+                return values
+            result = self._data.astype(dtype, copy=copy)
+            return result.astype('i8')
+        elif is_timedelta64_ns_dtype(dtype):
+            if copy:
+                return self.copy()
+            return self
+        return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
+
     # ----------------------------------------------------------------
     # Rendering Methods
 
     def _formatter(self, boxed=False):
         from pandas.io.formats.format import _get_format_timedelta64
         return _get_format_timedelta64(self, box=True)
 
-    def _format_native_types(self):
-        return self.astype(object)
+    def _format_native_types(self, na_rep='NaT', date_format=None):
+        from pandas.io.formats.format import _get_format_timedelta64
+
+        formatter = _get_format_timedelta64(self._data, na_rep)
+        return np.array([formatter(x) for x in self._data])
 
     # ----------------------------------------------------------------
     # Arithmetic Methods

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -14,7 +14,8 @@
     is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype,
     is_timedelta64_dtype, needs_i8_conversion, pandas_dtype)
 from .generic import (
-    ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries)
+    ABCDatetimeArray, ABCExtensionArray, ABCGeneric, ABCIndexClass,
+    ABCMultiIndex, ABCSeries, ABCTimedeltaArray)
 from .inference import is_list_like
 
 isposinf_scalar = libmissing.isposinf_scalar
@@ -108,7 +109,8 @@ def _isna_new(obj):
     elif isinstance(obj, ABCMultiIndex):
         raise NotImplementedError("isna is not defined for MultiIndex")
     elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
-                          ABCExtensionArray)):
+                          ABCExtensionArray,
+                          ABCDatetimeArray, ABCTimedeltaArray)):
         return _isna_ndarraylike(obj)
     elif isinstance(obj, ABCGeneric):
         return obj._constructor(obj._data.isna(func=isna))

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -742,8 +742,9 @@ def view(self, cls=None):
         Parameters
         ----------
         dtype : numpy dtype or pandas type
-            Note that any integer `dtype` is treated as ``'int64'``,
-            regardless of the sign and size.
+            Note that any signed integer `dtype` is treated as ``'int64'``,
+            and any unsigned integer `dtype` is treated as ``'uint64'``,
+            regardless of the size.
         copy : bool, default True
             By default, astype always returns a newly allocated object.
             If copy is set to False and internal requirements on dtype are

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -13,10 +13,8 @@
 from pandas.util._decorators import Appender, cache_readonly
 
 from pandas.core.dtypes.common import (
-    ensure_int64, is_bool_dtype, is_categorical_dtype,
-    is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype,
-    is_integer, is_integer_dtype, is_list_like, is_object_dtype,
-    is_period_dtype, is_scalar, is_string_dtype)
+    ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer,
+    is_list_like, is_period_dtype, is_scalar)
 from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
 
 from pandas.core import algorithms, ops
@@ -39,6 +37,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
 
     # override DatetimeLikeArrayMixin method
     copy = Index.copy
+    view = Index.view
 
     # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
     # properties there.  They can be made into cache_readonly for Index
@@ -550,24 +549,18 @@ def _maybe_box_as_values(self, values, **attribs):
         # - sort_values
         return values
 
+    @Appender(_index_shared_docs['astype'])
     def astype(self, dtype, copy=True):
-        if is_object_dtype(dtype):
-            return self._box_values_as_index()
-        elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
-            return Index(self.format(), name=self.name, dtype=object)
-        elif is_integer_dtype(dtype):
-            # TODO(DatetimeArray): use self._values here.
-            # Can't use ._values currently, because that returns a
-            # DatetimeIndex, which throws us in an infinite loop.
-            return Index(self.values.astype('i8', copy=copy), name=self.name,
-                         dtype='i8')
-        elif (is_datetime_or_timedelta_dtype(dtype) and
-              not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
-            # disallow conversion between datetime/timedelta,
-            # and conversions for any datetimelike to float
-            msg = 'Cannot cast {name} to dtype {dtype}'
-            raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
-        return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
+        if is_dtype_equal(self.dtype, dtype) and copy is False:
+            # Ensure that self.astype(self.dtype) is self
+            return self
+
+        new_values = self._eadata.astype(dtype, copy=copy)
+
+        # pass copy=False because any copying will be done in the
+        #  _eadata.astype call above
+        return Index(new_values,
+                     dtype=new_values.dtype, name=self.name, copy=False)
 
     @Appender(DatetimeLikeArrayMixin._time_shift.__doc__)
     def _time_shift(self, periods, freq=None):