Skip to content

Commit

Permalink
implement astype portion of pandas-dev#24024 (pandas-dev#24405)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent e819d4a commit 0277ee7
Show file tree
Hide file tree
Showing 17 changed files with 280 additions and 115 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1328,6 +1328,7 @@ Datetimelike
- Bug in :func:`to_datetime` where ``box`` and ``utc`` arguments were ignored when passing a :class:`DataFrame` or ``dict`` of unit mappings (:issue:`23760`)
- Bug in :attr:`Series.dt` where the cache would not update properly after an in-place operation (:issue:`24408`)
- Bug in :class:`PeriodIndex` where comparisons against an array-like object with length 1 failed to raise ``ValueError`` (:issue:`23078`)
- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`).

Timedelta
^^^^^^^^^
Expand Down
59 changes: 53 additions & 6 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg

from pandas.core.dtypes.common import (
is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype,
is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike,
is_period_dtype, is_timedelta64_dtype, needs_i8_conversion)
is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype,
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype,
is_list_like, is_object_dtype, is_offsetlike, is_period_dtype,
is_string_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype,
needs_i8_conversion, pandas_dtype)
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -315,7 +317,7 @@ def _ndarray_values(self):
# ----------------------------------------------------------------
# Rendering Methods

def _format_native_types(self, na_rep=u'NaT', date_format=None):
def _format_native_types(self, na_rep='NaT', date_format=None):
"""
Helper method for astype when converting to strings.
Expand Down Expand Up @@ -403,9 +405,54 @@ def __getitem__(self, key):
return self._simple_new(result, **attribs)

def astype(self, dtype, copy=True):
# Some notes on cases we don't have to handle here in the base class:
# 1. PeriodArray.astype handles period -> period
# 2. DatetimeArray.astype handles conversion between tz.
# 3. DatetimeArray.astype handles datetime -> period
from pandas import Categorical
dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
return self._box_values(self.asi8)
return super(DatetimeLikeArrayMixin, self).astype(dtype, copy)
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return self._format_native_types()
elif is_integer_dtype(dtype):
# we deliberately ignore int32 vs. int64 here.
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
values = self.asi8

if is_unsigned_integer_dtype(dtype):
# Again, we ignore int32 vs. int64
values = values.view("uint64")

if copy:
values = values.copy()
return values
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
elif is_categorical_dtype(dtype):
return Categorical(self, dtype=dtype)
else:
return np.asarray(self, dtype=dtype)

def view(self, dtype=None):
"""
New view on this array with the same data.
Parameters
----------
dtype : numpy dtype, optional
Returns
-------
ndarray
With the specified `dtype`.
"""
return self._data.view(dtype=dtype)

# ------------------------------------------------------------------
# ExtensionArray Interface
Expand Down
36 changes: 33 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@

from pandas.core.dtypes.common import (
_INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype,
is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype,
is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype)
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal,
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -473,6 +474,35 @@ def __iter__(self):
for v in converted:
yield v

def astype(self, dtype, copy=True):
# We handle
# --> datetime
# --> period
# DatetimeLikeArrayMixin Super handles the rest.
dtype = pandas_dtype(dtype)

if (is_datetime64_ns_dtype(dtype) and
not is_dtype_equal(dtype, self.dtype)):
# GH#18951: datetime64_ns dtype but not equal means different tz
new_tz = getattr(dtype, 'tz', None)
if getattr(self.dtype, 'tz', None) is None:
return self.tz_localize(new_tz)
result = self.tz_convert(new_tz)
if new_tz is None:
# Do we want .astype('datetime64[ns]') to be an ndarray.
# The astype in Block._astype expects this to return an
# ndarray, but we could maybe work around it there.
result = result._data
return result
elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype,
dtype):
if copy:
return self.copy()
return self
elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)

# ----------------------------------------------------------------
# ExtensionArray Interface

Expand All @@ -495,7 +525,7 @@ def _validate_fill_value(self, fill_value):
# -----------------------------------------------------------------
# Rendering Methods

def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
from pandas.io.formats.format import _get_format_datetime64_from_values
fmt = _get_format_datetime64_from_values(self, date_format)

Expand Down
43 changes: 6 additions & 37 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
from pandas.util._validators import validate_fillna_kwargs

from pandas.core.dtypes.common import (
_TD_DTYPE, ensure_object, is_array_like, is_categorical_dtype,
is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal,
is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype,
is_period_dtype, is_string_dtype, pandas_dtype)
_TD_DTYPE, ensure_object, is_array_like, is_datetime64_dtype,
is_float_dtype, is_list_like, is_period_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries
from pandas.core.dtypes.missing import isna, notna
Expand Down Expand Up @@ -599,42 +597,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
# ------------------------------------------------------------------

def astype(self, dtype, copy=True):
# TODO: Figure out something better here...
# We have DatetimeLikeArrayMixin ->
# super(...), which ends up being... DatetimeIndexOpsMixin?
# this is complicated.
# need a pandas_astype(arr, dtype).
from pandas import Categorical

# We handle Period[T] -> Period[U]
# Our parent handles everything else.
dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
return np.asarray(self, dtype=object)
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return self._format_native_types()
elif is_integer_dtype(dtype):
values = self._data

if values.dtype != dtype:
# int32 vs. int64
values = values.astype(dtype)

elif copy:
values = values.copy()

return values
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
elif is_categorical_dtype(dtype):
return Categorical(self, dtype=dtype)
elif is_period_dtype(dtype):
if is_period_dtype(dtype):
return self.asfreq(dtype.freq)
else:
return np.asarray(self, dtype=dtype)
return super(PeriodArray, self).astype(dtype, copy=copy)

@property
def flags(self):
Expand Down
36 changes: 33 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from pandas.core.dtypes.common import (
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_string_dtype, is_timedelta64_dtype)
is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
Expand Down Expand Up @@ -234,15 +235,44 @@ def _validate_fill_value(self, fill_value):
"Got '{got}'.".format(got=fill_value))
return fill_value

def astype(self, dtype, copy=True):
# We handle
# --> timedelta64[ns]
# --> timedelta64
# DatetimeLikeArrayMixin super call handles other cases
dtype = pandas_dtype(dtype)

if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
# by pandas convention, converting to non-nano timedelta64
# returns an int64-dtyped array with ints representing multiples
# of the desired timedelta unit. This is essentially division
if self._hasnans:
# avoid double-copying
result = self._data.astype(dtype, copy=False)
values = self._maybe_mask_results(result,
fill_value=None,
convert='float64')
return values
result = self._data.astype(dtype, copy=copy)
return result.astype('i8')
elif is_timedelta64_ns_dtype(dtype):
if copy:
return self.copy()
return self
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)

# ----------------------------------------------------------------
# Rendering Methods

def _formatter(self, boxed=False):
from pandas.io.formats.format import _get_format_timedelta64
return _get_format_timedelta64(self, box=True)

def _format_native_types(self):
return self.astype(object)
def _format_native_types(self, na_rep='NaT', date_format=None):
from pandas.io.formats.format import _get_format_timedelta64

formatter = _get_format_timedelta64(self._data, na_rep)
return np.array([formatter(x) for x in self._data])

# ----------------------------------------------------------------
# Arithmetic Methods
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype,
is_timedelta64_dtype, needs_i8_conversion, pandas_dtype)
from .generic import (
ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries)
ABCDatetimeArray, ABCExtensionArray, ABCGeneric, ABCIndexClass,
ABCMultiIndex, ABCSeries, ABCTimedeltaArray)
from .inference import is_list_like

isposinf_scalar = libmissing.isposinf_scalar
Expand Down Expand Up @@ -108,7 +109,8 @@ def _isna_new(obj):
elif isinstance(obj, ABCMultiIndex):
raise NotImplementedError("isna is not defined for MultiIndex")
elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
ABCExtensionArray)):
ABCExtensionArray,
ABCDatetimeArray, ABCTimedeltaArray)):
return _isna_ndarraylike(obj)
elif isinstance(obj, ABCGeneric):
return obj._constructor(obj._data.isna(func=isna))
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,9 @@ def view(self, cls=None):
Parameters
----------
dtype : numpy dtype or pandas type
Note that any integer `dtype` is treated as ``'int64'``,
regardless of the sign and size.
Note that any signed integer `dtype` is treated as ``'int64'``,
and any unsigned integer `dtype` is treated as ``'uint64'``,
regardless of the size.
copy : bool, default True
By default, astype always returns a newly allocated object.
If copy is set to False and internal requirements on dtype are
Expand Down
35 changes: 14 additions & 21 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import (
ensure_int64, is_bool_dtype, is_categorical_dtype,
is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype,
is_integer, is_integer_dtype, is_list_like, is_object_dtype,
is_period_dtype, is_scalar, is_string_dtype)
ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer,
is_list_like, is_period_dtype, is_scalar)
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries

from pandas.core import algorithms, ops
Expand All @@ -39,6 +37,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):

# override DatetimeLikeArrayMixin method
copy = Index.copy
view = Index.view

# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
# properties there. They can be made into cache_readonly for Index
Expand Down Expand Up @@ -550,24 +549,18 @@ def _maybe_box_as_values(self, values, **attribs):
# - sort_values
return values

@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return Index(self.format(), name=self.name, dtype=object)
elif is_integer_dtype(dtype):
# TODO(DatetimeArray): use self._values here.
# Can't use ._values currently, because that returns a
# DatetimeIndex, which throws us in an infinite loop.
return Index(self.values.astype('i8', copy=copy), name=self.name,
dtype='i8')
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
if is_dtype_equal(self.dtype, dtype) and copy is False:
# Ensure that self.astype(self.dtype) is self
return self

new_values = self._eadata.astype(dtype, copy=copy)

# pass copy=False because any copying will be done in the
# _eadata.astype call above
return Index(new_values,
dtype=new_values.dtype, name=self.name, copy=False)

@Appender(DatetimeLikeArrayMixin._time_shift.__doc__)
def _time_shift(self, periods, freq=None):
Expand Down
Loading

0 comments on commit 0277ee7

Please sign in to comment.