Skip to content

Commit

Permalink
CLN/BUG: Consolidate Index.astype and fix tz aware bugs (pandas-dev#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
jschendel authored and hexgnu committed Dec 28, 2017
1 parent e248340 commit e577e2f
Show file tree
Hide file tree
Showing 13 changed files with 133 additions and 141 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ Conversion
- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`)
- Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`)
- Bug in :class:`WeekOfMonth` and class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`,:issue:`18672`,:issue:`18864`)
- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`)


Indexing
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,12 +1065,18 @@ def _to_embed(self, keep_tz=False, dtype=None):

@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
if is_categorical_dtype(dtype):
if is_dtype_equal(self.dtype, dtype):
return self.copy() if copy else self
elif is_categorical_dtype(dtype):
from .category import CategoricalIndex
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
return Index(self.values.astype(dtype, copy=copy), name=self.name,
dtype=dtype)
try:
return Index(self.values.astype(dtype, copy=copy), name=self.name,
dtype=dtype)
except (TypeError, ValueError):
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))

def _to_safe_for_reshape(self):
""" convert to object if we are a categorical """
Expand Down
37 changes: 31 additions & 6 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,22 @@

import numpy as np
from pandas.core.dtypes.common import (
is_integer, is_float,
is_bool_dtype, _ensure_int64,
is_scalar, is_dtype_equal,
is_list_like, is_timedelta64_dtype)
_ensure_int64,
is_dtype_equal,
is_float,
is_integer,
is_list_like,
is_scalar,
is_bool_dtype,
is_categorical_dtype,
is_datetime_or_timedelta_dtype,
is_float_dtype,
is_integer_dtype,
is_object_dtype,
is_string_dtype,
is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCIndex, ABCSeries,
ABCPeriodIndex, ABCIndexClass)
ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass)
from pandas.core.dtypes.missing import isna
from pandas.core import common as com, algorithms
from pandas.core.algorithms import checked_add_with_arr
Expand Down Expand Up @@ -859,6 +868,22 @@ def _concat_same_dtype(self, to_concat, name):
new_data = np.concatenate([c.asi8 for c in to_concat])
return self._simple_new(new_data, **attribs)

def astype(self, dtype, copy=True):
if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return Index(self.format(), name=self.name, dtype=object)
elif is_integer_dtype(dtype):
return Index(self.values.astype('i8', copy=copy), name=self.name,
dtype='i8')
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)


def _ensure_datetimelike_to_i8(other):
""" helper for coercing an input scalar or array to i8 """
Expand Down
48 changes: 19 additions & 29 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@
from pandas.core.base import _shared_docs

from pandas.core.dtypes.common import (
_NS_DTYPE, _INT64_DTYPE,
is_object_dtype, is_datetime64_dtype,
is_datetimetz, is_dtype_equal,
_INT64_DTYPE,
_NS_DTYPE,
is_object_dtype,
is_datetime64_dtype,
is_datetimetz,
is_dtype_equal,
is_timedelta64_dtype,
is_integer, is_float,
is_integer,
is_float,
is_integer_dtype,
is_datetime64_ns_dtype,
is_period_dtype,
is_bool_dtype,
is_string_dtype,
is_categorical_dtype,
is_string_like,
is_list_like,
is_scalar,
Expand All @@ -36,20 +38,17 @@
from pandas.core.algorithms import checked_add_with_arr

from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.numeric import Int64Index, Float64Index
import pandas.compat as compat
from pandas.tseries.frequencies import (
to_offset, get_period_alias,
Resolution)
from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution
from pandas.core.indexes.datetimelike import (
DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin)
from pandas.tseries.offsets import (
DateOffset, generate_range, Tick, CDay, prefix_mapping)

from pandas.core.tools.timedeltas import to_timedelta
from pandas.util._decorators import (Appender, cache_readonly,
deprecate_kwarg, Substitution)
from pandas.util._decorators import (
Appender, cache_readonly, deprecate_kwarg, Substitution)
import pandas.core.common as com
import pandas.tseries.offsets as offsets
import pandas.core.tools.datetimes as tools
Expand Down Expand Up @@ -906,25 +905,16 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)
if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_integer_dtype(dtype):
return Index(self.values.astype('i8', copy=copy), name=self.name,
dtype='i8')
elif is_datetime64_ns_dtype(dtype):
if self.tz is not None:
return self.tz_convert('UTC').tz_localize(None)
elif copy is True:
return self.copy()
return self
elif is_categorical_dtype(dtype):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
elif is_string_dtype(dtype):
return Index(self.format(), name=self.name, dtype=object)
if (is_datetime64_ns_dtype(dtype) and
not is_dtype_equal(dtype, self.dtype)):
# GH 18951: datetime64_ns dtype but not equal means different tz
new_tz = getattr(dtype, 'tz', None)
if getattr(self.dtype, 'tz', None) is None:
return self.tz_localize(new_tz)
return self.tz_convert(new_tz)
elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
raise TypeError('Cannot cast DatetimeIndex to dtype %s' % dtype)
return super(DatetimeIndex, self).astype(dtype, copy=copy)

def _get_time_micros(self):
values = self.asi8
Expand Down
15 changes: 2 additions & 13 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
is_datetime_or_timedelta_dtype,
is_datetime64tz_dtype,
is_integer_dtype,
is_object_dtype,
is_categorical_dtype,
is_float_dtype,
is_interval_dtype,
is_scalar,
Expand All @@ -29,7 +27,6 @@
Interval, IntervalMixin, IntervalTree,
intervals_to_interval_bounds)

from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.timedeltas import timedelta_range
from pandas.core.indexes.multi import MultiIndex
Expand Down Expand Up @@ -671,16 +668,8 @@ def copy(self, deep=False, name=None):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
if is_interval_dtype(dtype):
if copy:
self = self.copy()
return self
elif is_object_dtype(dtype):
return Index(self.values, dtype=object)
elif is_categorical_dtype(dtype):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
raise ValueError('Cannot cast IntervalIndex to dtype {dtype}'
.format(dtype=dtype))
return self.copy() if copy else self
return super(IntervalIndex, self).astype(dtype, copy=copy)

@cache_readonly
def dtype(self):
Expand Down
29 changes: 9 additions & 20 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
from pandas.core.dtypes.common import (
is_dtype_equal,
pandas_dtype,
is_float_dtype,
is_object_dtype,
needs_i8_conversion,
is_integer_dtype,
is_categorical_dtype,
is_bool,
is_bool_dtype,
is_scalar)
Expand All @@ -17,7 +15,6 @@
from pandas.core import algorithms
from pandas.core.indexes.base import (
Index, InvalidIndexError, _index_shared_docs)
from pandas.core.indexes.category import CategoricalIndex
from pandas.util._decorators import Appender, cache_readonly
import pandas.core.dtypes.concat as _concat
import pandas.core.indexes.base as ibase
Expand Down Expand Up @@ -315,22 +312,14 @@ def inferred_type(self):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)
if is_float_dtype(dtype):
values = self._values.astype(dtype, copy=copy)
elif is_integer_dtype(dtype):
if self.hasnans:
raise ValueError('cannot convert float NaN to integer')
values = self._values.astype(dtype, copy=copy)
elif is_object_dtype(dtype):
values = self._values.astype('object', copy=copy)
elif is_categorical_dtype(dtype):
return CategoricalIndex(self, name=self.name, dtype=dtype,
copy=copy)
else:
raise TypeError('Setting {cls} dtype to anything other than '
'float64, object, or category is not supported'
.format(cls=self.__class__))
return Index(values, name=self.name, dtype=dtype)
if needs_i8_conversion(dtype):
msg = ('Cannot convert Float64Index to dtype {dtype}; integer '
'values are required for conversion').format(dtype=dtype)
raise TypeError(msg)
elif is_integer_dtype(dtype) and self.hasnans:
# GH 13149
raise ValueError('Cannot convert NA to integer')
return super(Float64Index, self).astype(dtype, copy=copy)

@Appender(_index_shared_docs['_convert_scalar_indexer'])
def _convert_scalar_indexer(self, key, kind=None):
Expand Down
26 changes: 7 additions & 19 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,21 @@
from pandas.core.dtypes.common import (
is_integer,
is_float,
is_object_dtype,
is_integer_dtype,
is_float_dtype,
is_scalar,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_datetime64_any_dtype,
is_timedelta64_dtype,
is_period_dtype,
is_bool_dtype,
is_categorical_dtype,
pandas_dtype,
_ensure_object)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCSeries

import pandas.tseries.frequencies as frequencies
from pandas.tseries.frequencies import get_freq_code as _gfc
from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin
Expand Down Expand Up @@ -506,23 +503,14 @@ def asof_locs(self, where, mask):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True, how='start'):
dtype = pandas_dtype(dtype)
if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_integer_dtype(dtype):
if copy:
return self._int64index.copy()
else:
return self._int64index
elif is_datetime64_dtype(dtype):
return self.to_timestamp(how=how)
elif is_datetime64tz_dtype(dtype):
return self.to_timestamp(how=how).tz_localize(dtype.tz)
if is_integer_dtype(dtype):
return self._int64index.copy() if copy else self._int64index
elif is_datetime64_any_dtype(dtype):
tz = getattr(dtype, 'tz', None)
return self.to_timestamp(how=how).tz_localize(tz)
elif is_period_dtype(dtype):
return self.asfreq(freq=dtype.freq)
elif is_categorical_dtype(dtype):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
raise TypeError('Cannot cast PeriodIndex to dtype %s' % dtype)
return super(PeriodIndex, self).astype(dtype, copy=copy)

@Substitution(klass='PeriodIndex')
@Appender(_shared_docs['searchsorted'])
Expand Down
29 changes: 6 additions & 23 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,20 @@
import numpy as np
from pandas.core.dtypes.common import (
_TD_DTYPE,
is_integer, is_float,
is_integer,
is_float,
is_bool_dtype,
is_list_like,
is_scalar,
is_integer_dtype,
is_object_dtype,
is_timedelta64_dtype,
is_timedelta64_ns_dtype,
is_categorical_dtype,
pandas_dtype,
_ensure_int64)
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.common import _maybe_box, _values_from_object

from pandas.core.indexes.base import Index
from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.numeric import Int64Index
import pandas.compat as compat
from pandas.compat import u
Expand Down Expand Up @@ -483,28 +480,14 @@ def to_pytimedelta(self):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_timedelta64_ns_dtype(dtype):
if copy is True:
return self.copy()
return self
elif is_timedelta64_dtype(dtype):
if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
# return an index (essentially this is division)
result = self.values.astype(dtype, copy=copy)
if self.hasnans:
return Index(self._maybe_mask_results(result,
convert='float64'),
name=self.name)
values = self._maybe_mask_results(result, convert='float64')
return Index(values, name=self.name)
return Index(result.astype('i8'), name=self.name)
elif is_integer_dtype(dtype):
return Index(self.values.astype('i8', copy=copy), dtype='i8',
name=self.name)
elif is_categorical_dtype(dtype):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
raise TypeError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
return super(TimedeltaIndex, self).astype(dtype, copy=copy)

def union(self, other):
"""
Expand Down
Loading

0 comments on commit e577e2f

Please sign in to comment.