From b812e6a8b4aa630c9314378cbee8f740068092b1 Mon Sep 17 00:00:00 2001 From: jschendel Date: Sun, 24 Dec 2017 17:46:07 -0700 Subject: [PATCH] CLN: Consolidate Index.astype --- pandas/core/indexes/base.py | 8 +++- pandas/core/indexes/datetimelike.py | 34 ++++++++++++--- pandas/core/indexes/datetimes.py | 41 +++++++------------ pandas/core/indexes/interval.py | 15 +------ pandas/core/indexes/numeric.py | 29 ++++--------- pandas/core/indexes/period.py | 26 ++++-------- pandas/core/indexes/timedeltas.py | 29 ++++--------- pandas/tests/indexes/datetimes/test_astype.py | 14 +++---- pandas/tests/indexes/period/test_period.py | 16 +++++--- pandas/tests/indexes/test_interval.py | 16 ++++---- .../tests/indexes/timedeltas/test_astype.py | 17 ++++---- 11 files changed, 109 insertions(+), 136 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 79de63b0caeb6..8491495dd38cf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1069,8 +1069,12 @@ def astype(self, dtype, copy=True): from .category import CategoricalIndex return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) - return Index(self.values.astype(dtype, copy=copy), name=self.name, - dtype=dtype) + try: + return Index(self.values.astype(dtype, copy=copy), name=self.name, + dtype=dtype) + except (TypeError, ValueError): + msg = 'Cannot cast {name} to dtype {dtype}' + raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8cc996285fbbd..24d0d11041503 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -11,13 +11,22 @@ import numpy as np from pandas.core.dtypes.common import ( - is_integer, is_float, - is_bool_dtype, _ensure_int64, - is_scalar, is_dtype_equal, - is_list_like, is_timedelta64_dtype) + _ensure_int64, + is_dtype_equal, + is_float, + is_integer, + is_list_like, + is_scalar, + is_bool_dtype, + is_categorical_dtype, + is_datetime_or_timedelta_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_string_dtype, + is_timedelta64_dtype) from pandas.core.dtypes.generic import ( - ABCIndex, ABCSeries, - ABCPeriodIndex, ABCIndexClass) + ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms from pandas.core.algorithms import checked_add_with_arr @@ -859,6 +868,19 @@ def _concat_same_dtype(self, to_concat, name): new_data = np.concatenate([c.asi8 for c in to_concat]) return self._simple_new(new_data, **attribs) + def astype(self, dtype, copy=True): + if is_object_dtype(dtype): + return self._box_values_as_index() + elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): + return Index(self.format(), name=self.name, dtype=object) + elif is_integer_dtype(dtype): + return Index(self.values.astype('i8', copy=copy), name=self.name, + dtype='i8') + elif is_float_dtype(dtype) or is_datetime_or_timedelta_dtype(dtype): + msg = 'Cannot cast {name} to dtype {dtype}' + raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) + return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) + def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index bec26ef72d63a..ae4a58602c492 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -10,17 +10,19 @@ from pandas.core.base import _shared_docs from pandas.core.dtypes.common import ( - _NS_DTYPE, _INT64_DTYPE, - is_object_dtype, is_datetime64_dtype, - is_datetimetz, is_dtype_equal, + _INT64_DTYPE, + _NS_DTYPE, + is_object_dtype, + is_datetime64_dtype, + is_datetimetz, + is_dtype_equal, is_timedelta64_dtype, - is_integer, is_float, + is_integer, + is_float, is_integer_dtype, is_datetime64_ns_dtype, is_period_dtype, is_bool_dtype, - is_string_dtype, - is_categorical_dtype, is_string_like, is_list_like, is_scalar, @@ -36,20 +38,17 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.indexes.base import Index, _index_shared_docs -from pandas.core.indexes.category import CategoricalIndex from pandas.core.indexes.numeric import Int64Index, Float64Index import pandas.compat as compat -from pandas.tseries.frequencies import ( - to_offset, get_period_alias, - Resolution) +from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution from pandas.core.indexes.datetimelike import ( DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin) from pandas.tseries.offsets import ( DateOffset, generate_range, Tick, CDay, prefix_mapping) from pandas.core.tools.timedeltas import to_timedelta -from pandas.util._decorators import (Appender, cache_readonly, - deprecate_kwarg, Substitution) +from pandas.util._decorators import ( + Appender, cache_readonly, deprecate_kwarg, Substitution) import pandas.core.common as com import pandas.tseries.offsets as offsets import pandas.core.tools.datetimes as tools @@ -906,25 +905,13 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) - if is_object_dtype(dtype): - return self._box_values_as_index() - elif is_integer_dtype(dtype): - return Index(self.values.astype('i8', copy=copy), name=self.name, - dtype='i8') - elif is_datetime64_ns_dtype(dtype): + if is_datetime64_ns_dtype(dtype): if self.tz is not None: return self.tz_convert('UTC').tz_localize(None) - elif copy is True: - return self.copy() - return self - elif is_categorical_dtype(dtype): - return CategoricalIndex(self.values, name=self.name, dtype=dtype, - copy=copy) - elif is_string_dtype(dtype): - return Index(self.format(), name=self.name, dtype=object) + return self.copy() if copy else self elif is_period_dtype(dtype): return self.to_period(freq=dtype.freq) - raise TypeError('Cannot cast DatetimeIndex to dtype %s' % dtype) + return super(DatetimeIndex, self).astype(dtype, copy=copy) def _get_time_micros(self): values = self.asi8 diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 49e574dcbae45..2a132f683c519 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -12,8 +12,6 @@ is_datetime_or_timedelta_dtype, is_datetime64tz_dtype, is_integer_dtype, - is_object_dtype, - is_categorical_dtype, is_float_dtype, is_interval_dtype, is_scalar, @@ -29,7 +27,6 @@ Interval, IntervalMixin, IntervalTree, intervals_to_interval_bounds) -from pandas.core.indexes.category import CategoricalIndex from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.indexes.multi import MultiIndex @@ -671,16 +668,8 @@ def copy(self, deep=False, name=None): @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): if is_interval_dtype(dtype): - if copy: - self = self.copy() - return self - elif is_object_dtype(dtype): - return Index(self.values, dtype=object) - elif is_categorical_dtype(dtype): - return CategoricalIndex(self.values, name=self.name, dtype=dtype, - copy=copy) - raise ValueError('Cannot cast IntervalIndex to dtype {dtype}' - .format(dtype=dtype)) + return self.copy() if copy else self + return super(IntervalIndex, self).astype(dtype, copy=copy) @cache_readonly def dtype(self): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 5fc9cb47362d6..5995b9fc7674c 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -4,10 +4,8 @@ from pandas.core.dtypes.common import ( is_dtype_equal, pandas_dtype, - is_float_dtype, - is_object_dtype, + needs_i8_conversion, is_integer_dtype, - is_categorical_dtype, is_bool, is_bool_dtype, is_scalar) @@ -17,7 +15,6 @@ from pandas.core import algorithms from pandas.core.indexes.base import ( Index, InvalidIndexError, _index_shared_docs) -from pandas.core.indexes.category import CategoricalIndex from pandas.util._decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat import pandas.core.indexes.base as ibase @@ -315,22 +312,14 @@ def inferred_type(self): @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) - if is_float_dtype(dtype): - values = self._values.astype(dtype, copy=copy) - elif is_integer_dtype(dtype): - if self.hasnans: - raise ValueError('cannot convert float NaN to integer') - values = self._values.astype(dtype, copy=copy) - elif is_object_dtype(dtype): - values = self._values.astype('object', copy=copy) - elif is_categorical_dtype(dtype): - return CategoricalIndex(self, name=self.name, dtype=dtype, - copy=copy) - else: - raise TypeError('Setting {cls} dtype to anything other than ' - 'float64, object, or category is not supported' - .format(cls=self.__class__)) - return Index(values, name=self.name, dtype=dtype) + if needs_i8_conversion(dtype): + msg = ('Cannot convert Float64Index to dtype {dtype}; integer ' + 'values are required for conversion').format(dtype=dtype) + raise TypeError(msg) + elif is_integer_dtype(dtype) and self.hasnans: + # GH 13149 + raise ValueError('Cannot convert NA to integer') + return super(Float64Index, self).astype(dtype, copy=copy) @Appender(_index_shared_docs['_convert_scalar_indexer']) def _convert_scalar_indexer(self, key, kind=None): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 64756906d8a63..8b35b1a231551 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -7,16 +7,14 @@ from pandas.core.dtypes.common import ( is_integer, is_float, - is_object_dtype, is_integer_dtype, is_float_dtype, is_scalar, is_datetime64_dtype, - is_datetime64tz_dtype, + is_datetime64_any_dtype, is_timedelta64_dtype, is_period_dtype, is_bool_dtype, - is_categorical_dtype, pandas_dtype, _ensure_object) from pandas.core.dtypes.dtypes import PeriodDtype @@ -24,7 +22,6 @@ import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc -from pandas.core.indexes.category import CategoricalIndex from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin @@ -506,23 +503,14 @@ def asof_locs(self, where, mask): @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) - if is_object_dtype(dtype): - return self._box_values_as_index() - elif is_integer_dtype(dtype): - if copy: - return self._int64index.copy() - else: - return self._int64index - elif is_datetime64_dtype(dtype): - return self.to_timestamp(how=how) - elif is_datetime64tz_dtype(dtype): - return self.to_timestamp(how=how).tz_localize(dtype.tz) + if is_integer_dtype(dtype): + return self._int64index.copy() if copy else self._int64index + elif is_datetime64_any_dtype(dtype): + tz = getattr(dtype, 'tz', None) + return self.to_timestamp(how=how).tz_localize(tz) elif is_period_dtype(dtype): return self.asfreq(freq=dtype.freq) - elif is_categorical_dtype(dtype): - return CategoricalIndex(self.values, name=self.name, dtype=dtype, - copy=copy) - raise TypeError('Cannot cast PeriodIndex to dtype %s' % dtype) + return super(PeriodIndex, self).astype(dtype, copy=copy) @Substitution(klass='PeriodIndex') @Appender(_shared_docs['searchsorted']) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 25c764b138465..3f19527dda13f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -4,15 +4,13 @@ import numpy as np from pandas.core.dtypes.common import ( _TD_DTYPE, - is_integer, is_float, + is_integer, + is_float, is_bool_dtype, is_list_like, is_scalar, - is_integer_dtype, - is_object_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, - is_categorical_dtype, pandas_dtype, _ensure_int64) from pandas.core.dtypes.missing import isna @@ -20,7 +18,6 @@ from pandas.core.common import _maybe_box, _values_from_object from pandas.core.indexes.base import Index -from pandas.core.indexes.category import CategoricalIndex from pandas.core.indexes.numeric import Int64Index import pandas.compat as compat from pandas.compat import u @@ -483,28 +480,16 @@ def to_pytimedelta(self): @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) - - if is_object_dtype(dtype): - return self._box_values_as_index() - elif is_timedelta64_ns_dtype(dtype): - if copy is True: - return self.copy() - return self + if is_timedelta64_ns_dtype(dtype): + return self.copy() if copy else self elif is_timedelta64_dtype(dtype): # return an index (essentially this is division) result = self.values.astype(dtype, copy=copy) if self.hasnans: - return Index(self._maybe_mask_results(result, - convert='float64'), - name=self.name) + values = self._maybe_mask_results(result, convert='float64') + return Index(values, name=self.name) return Index(result.astype('i8'), name=self.name) - elif is_integer_dtype(dtype): - return Index(self.values.astype('i8', copy=copy), dtype='i8', - name=self.name) - elif is_categorical_dtype(dtype): - return CategoricalIndex(self.values, name=self.name, dtype=dtype, - copy=copy) - raise TypeError('Cannot cast TimedeltaIndex to dtype %s' % dtype) + return super(TimedeltaIndex, self).astype(dtype, copy=copy) def union(self, other): """ diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index e211807b6a3e4..d40efe34d0182 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -126,15 +126,15 @@ def test_astype_object(self): tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) assert casted.tolist() == exp_values - def test_astype_raises(self): + @pytest.mark.parametrize('dtype', [ + float, 'timedelta64', 'timedelta64[ns]', 'datetime64', + 'datetime64[D]']) + def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - - pytest.raises(TypeError, idx.astype, float) - pytest.raises(TypeError, idx.astype, 'timedelta64') - pytest.raises(TypeError, idx.astype, 'timedelta64[ns]') - pytest.raises(TypeError, idx.astype, 'datetime64') - pytest.raises(TypeError, idx.astype, 'datetime64[D]') + msg = 'Cannot cast DatetimeIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + idx.astype(dtype) def test_index_convert_to_datetime_array(self): def _check_rng(rng): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 599f6efd16f74..ab341b70dfe91 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -39,19 +39,23 @@ def test_astype_conversion(self): dtype=np.int64) tm.assert_index_equal(result, expected) + result = idx.astype(str) + expected = Index(str(x) for x in idx) + tm.assert_index_equal(result, expected) + idx = period_range('1990', '2009', freq='A') result = idx.astype('i8') tm.assert_index_equal(result, Index(idx.asi8)) tm.assert_numpy_array_equal(result.values, idx.asi8) - def test_astype_raises(self): + @pytest.mark.parametrize('dtype', [ + float, 'timedelta64', 'timedelta64[ns]']) + def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - pytest.raises(TypeError, idx.astype, str) - pytest.raises(TypeError, idx.astype, float) - pytest.raises(TypeError, idx.astype, 'timedelta64') - pytest.raises(TypeError, idx.astype, 'timedelta64[ns]') + msg = 'Cannot cast PeriodIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + idx.astype(dtype) def test_pickle_compat_construction(self): pass diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 74446af8b77f6..4169c93809059 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -390,14 +390,7 @@ def test_equals(self, closed): assert not expected.equals(expected_other_closed) def test_astype(self, closed): - idx = self.create_index(closed=closed) - - for dtype in [np.int64, np.float64, 'datetime64[ns]', - 'datetime64[ns, US/Eastern]', 'timedelta64', - 'period[M]']: - pytest.raises(ValueError, idx.astype, dtype) - result = idx.astype(object) tm.assert_index_equal(result, Index(idx.values, dtype='object')) assert not idx.equals(result) @@ -407,6 +400,15 @@ def test_astype(self, closed): tm.assert_index_equal(result, idx) assert result.equals(idx) + @pytest.mark.parametrize('dtype', [ + np.int64, np.float64, 'period[M]', 'timedelta64', 'datetime64[ns]', + 'datetime64[ns, US/Eastern]']) + def test_astype_errors(self, closed, dtype): + idx = self.create_index(closed=closed) + msg = 'Cannot cast IntervalIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + idx.astype(dtype) + @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) def test_where(self, closed, klass): idx = self.create_index(closed=closed) diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 0fa0e036096d0..af16fe71edcf3 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -40,8 +40,11 @@ def test_astype(self): dtype=np.int64) tm.assert_index_equal(result, expected) - rng = timedelta_range('1 days', periods=10) + result = idx.astype(str) + expected = Index(str(x) for x in idx) + tm.assert_index_equal(result, expected) + rng = timedelta_range('1 days', periods=10) result = rng.astype('i8') tm.assert_index_equal(result, Index(rng.asi8)) tm.assert_numpy_array_equal(rng.asi8, result.values) @@ -62,14 +65,14 @@ def test_astype_timedelta64(self): tm.assert_index_equal(result, idx) assert result is idx - def test_astype_raises(self): + @pytest.mark.parametrize('dtype', [ + float, 'datetime64', 'datetime64[ns]']) + def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - pytest.raises(TypeError, idx.astype, float) - pytest.raises(TypeError, idx.astype, str) - pytest.raises(TypeError, idx.astype, 'datetime64') - pytest.raises(TypeError, idx.astype, 'datetime64[ns]') + msg = 'Cannot cast TimedeltaIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + idx.astype(dtype) def test_pickle_compat_construction(self): pass