diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 0463a30cd11351..f82004747f0d0e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -296,9 +296,22 @@ def __iter__(self): @property def asi8(self): + # type: () -> ndarray + """ + Integer representation of the values. + + Returns + ------- + ndarray + An ndarray with int64 dtype. + """ # do not cache or you'll create a memory leak return self._data.view('i8') + @property + def _ndarray_values(self): + return self._data + # ---------------------------------------------------------------- # Rendering Methods @@ -469,7 +482,7 @@ def _isnan(self): return (self.asi8 == iNaT) @property # NB: override with cache_readonly in immutable subclasses - def hasnans(self): + def _hasnans(self): """ return if I have any nans; enables various perf speedups """ @@ -493,7 +506,7 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): This is an internal routine """ - if self.hasnans: + if self._hasnans: if convert: result = result.astype(convert) if fill_value is None: @@ -696,7 +709,7 @@ def _add_delta_tdi(self, other): new_values = checked_add_with_arr(self_i8, other_i8, arr_mask=self._isnan, b_mask=other._isnan) - if self.hasnans or other.hasnans: + if self._hasnans or other._hasnans: mask = (self._isnan) | (other._isnan) new_values[mask] = iNaT return new_values.view('i8') @@ -764,7 +777,7 @@ def _sub_period_array(self, other): b_mask=other._isnan) new_values = np.array([self.freq.base * x for x in new_values]) - if self.hasnans or other.hasnans: + if self._hasnans or other._hasnans: mask = (self._isnan) | (other._isnan) new_values[mask] = NaT return new_values @@ -1085,7 +1098,7 @@ def _evaluate_compare(self, other, op): elif lib.is_scalar(lib.item_from_zerodim(other)): # ndarray scalar other = [other.item()] - other = type(self)(other) + other = type(self)._from_sequence(other) # compare result = op(self.asi8, other.asi8) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 59e9fe49f650ab..2d1330dd871523 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -115,7 +115,7 @@ def wrapper(self, other): else: if isinstance(other, list): try: - other = type(self)(other) + other = type(self)._from_sequence(other) except ValueError: other = np.array(other, dtype=np.object_) elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, @@ -147,7 +147,7 @@ def wrapper(self, other): if o_mask.any(): result[o_mask] = nat_result - if self.hasnans: + if self._hasnans: result[self._isnan] = nat_result return result @@ -349,6 +349,19 @@ def _box_func(self): @property def dtype(self): + # type: () -> Union[np.dtype, DatetimeTZDtype] + """ + The dtype for the DatetimeArray. + + Returns + ------- + numpy.dtype or DatetimeTZDtype + If the values are tz-naive, then ``np.dtype('datetime64[ns]')`` + is returned. + + If the values are tz-aware, then the ``DatetimeTZDtype`` + is returned. + """ if self.tz is None: return _NS_DTYPE return DatetimeTZDtype('ns', self.tz) @@ -356,7 +369,12 @@ def dtype(self): @property def tz(self): """ - Return timezone. + Return timezone, if any. + + Returns + ------- + datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None + Returns None when the array is tz-naive. """ # GH 18595 return self._tz @@ -534,7 +552,7 @@ def _sub_datetime_arraylike(self, other): other_i8 = other.asi8 new_values = checked_add_with_arr(self_i8, -other_i8, arr_mask=self._isnan) - if self.hasnans or other.hasnans: + if self._hasnans or other._hasnans: mask = (self._isnan) | (other._isnan) new_values[mask] = iNaT return new_values.view('timedelta64[ns]') diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 6fd98bb25380a8..16951275707ccb 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -84,7 +84,7 @@ def wrapper(self, other): other = Period(other, freq=self.freq) result = op(other.ordinal) - if self.hasnans: + if self._hasnans: result[self._isnan] = nat_result return result @@ -499,7 +499,7 @@ def _time_shift(self, n, freq=None): "{cls}._time_shift" .format(cls=type(self).__name__)) values = self.asi8 + n * self.freq.n - if self.hasnans: + if self._hasnans: values[self._isnan] = iNaT return type(self)(values, freq=self.freq) @@ -561,7 +561,7 @@ def asfreq(self, freq=None, how='E'): new_data = period_asfreq_arr(ordinal, base1, base2, end) - if self.hasnans: + if self._hasnans: new_data[self._isnan] = iNaT return type(self)(new_data, freq=freq) @@ -581,7 +581,7 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): else: formatter = lambda dt: u'%s' % dt - if self.hasnans: + if self._hasnans: mask = self._isnan values[mask] = na_rep imask = ~mask @@ -668,7 +668,7 @@ def _sub_period(self, other): new_data = asi8 - other.ordinal new_data = np.array([self.freq * x for x in new_data]) - if self.hasnans: + if self._hasnans: new_data[self._isnan] = NaT return new_data @@ -983,7 +983,7 @@ def dt64arr_to_periodarr(data, freq, tz=None): """ if data.dtype != np.dtype('M8[ns]'): - raise ValueError('Wrong dtype: %s' % data.dtype) + raise ValueError('Wrong dtype: {dtype}'.format(dtype=data.dtype)) if freq is None: if isinstance(data, ABCIndexClass): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 314a3948f1032e..06a9627a290c6f 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -54,7 +54,7 @@ def _field_accessor(name, alias, docstring=None): def f(self): values = self.asi8 result = get_timedelta_field(values, alias) - if self.hasnans: + if self._hasnans: result = self._maybe_mask_results(result, fill_value=None, convert='float64') @@ -102,7 +102,7 @@ def wrapper(self, other): if o_mask.any(): result[o_mask] = nat_result - if self.hasnans: + if self._hasnans: result[self._isnan] = nat_result return result @@ -714,7 +714,7 @@ def components(self): columns = ['days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds', 'nanoseconds'] - hasnans = self.hasnans + hasnans = self._hasnans if hasnans: def f(x): if isna(x): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a2cf88fa9cb1a2..478902fe53e581 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -682,7 +682,7 @@ def __array__(self, dtype=None): """ The array interface, return my values. """ - return self._data.view(np.ndarray) + return np.asarray(self._data, dtype=dtype) def __array_wrap__(self, result, context=None): """ @@ -739,6 +739,8 @@ def view(self, cls=None): Parameters ---------- dtype : numpy dtype or pandas type + Note that any integer `dtype` is treated as ``'int64'``, + regardless of the sign and size. copy : bool, default True By default, astype always returns a newly allocated object. If copy is set to False and internal requirements on dtype are diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index db0cb88b06b2b0..86fa7f785914f0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -40,17 +40,22 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): # override DatetimeLikeArrayMixin method copy = Index.copy unique = Index.unique - take = Index.take # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget) _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget) - hasnans = cache_readonly(DatetimeLikeArrayMixin.hasnans.fget) + hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget) + _hasnans = hasnans # for index / array -agnostic code _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) + # A few methods that are shared + _maybe_mask_results = DatetimeLikeArrayMixin._maybe_mask_results + + # ------------------------------------------------------------------------ + def equals(self, other): """ Determines if two Index objects contain the same elements. diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 0e4132524045c5..09e741af363dac 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -227,11 +227,11 @@ def __new__(cls, data=None, "endpoints is deprecated. Use " "`pandas.date_range` instead.", FutureWarning, stacklevel=2) - result = cls._generate_range(start, end, periods, - freq=freq, tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) - result.name = name - return result + dtarr = DatetimeArray._generate_range( + start, end, periods, + freq=freq, tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) + return cls(dtarr, name=name) if is_scalar(data): raise TypeError("{cls}() must be called with a " @@ -1473,12 +1473,12 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None, if freq is None and com._any_none(periods, start, end): freq = 'D' - result = DatetimeIndex._generate_range( + dtarr = DatetimeArray._generate_range( start=start, end=end, periods=periods, freq=freq, tz=tz, normalize=normalize, closed=closed, **kwargs) - result.name = name + result = DatetimeIndex(dtarr, name=name) return result diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index e6c714683979fa..47f7f7cf860fc7 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -125,15 +125,6 @@ def _join_i8_wrapper(joinf, **kwargs): _left_indexer_unique = _join_i8_wrapper( libjoin.left_join_indexer_unique_int64, with_indexers=False) - # define my properties & methods for delegation - _other_ops = [] - _bool_ops = [] - _object_ops = ['freq'] - _field_ops = ['days', 'seconds', 'microseconds', 'nanoseconds'] - _datetimelike_ops = _field_ops + _object_ops + _bool_ops - _datetimelike_methods = ["to_pytimedelta", "total_seconds", - "round", "floor", "ceil"] - _engine_type = libindex.TimedeltaEngine _comparables = ['name', 'freq'] @@ -143,6 +134,14 @@ def _join_i8_wrapper(joinf, **kwargs): _freq = None + _box_func = TimedeltaArray._box_func + _bool_ops = TimedeltaArray._bool_ops + _object_ops = TimedeltaArray._object_ops + _field_ops = TimedeltaArray._field_ops + _datetimelike_ops = TimedeltaArray._datetimelike_ops + _datetimelike_methods = TimedeltaArray._datetimelike_methods + _other_ops = TimedeltaArray._other_ops + # ------------------------------------------------------------------- # Constructors @@ -163,10 +162,9 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, "endpoints is deprecated. Use " "`pandas.timedelta_range` instead.", FutureWarning, stacklevel=2) - result = cls._generate_range(start, end, periods, freq, - closed=closed) - result.name = name - return result + tdarr = TimedeltaArray._generate_range(start, end, periods, freq, + closed=closed) + return cls(tdarr, name=name) if is_scalar(data): raise TypeError('{cls}() must be called with a ' @@ -766,7 +764,6 @@ def timedelta_range(start=None, end=None, periods=None, freq=None, freq = 'D' freq, freq_infer = dtl.maybe_infer_freq(freq) - result = TimedeltaIndex._generate_range(start, end, periods, freq, - closed=closed) - result.name = name - return result + tdarr = TimedeltaArray._generate_range(start, end, periods, freq, + closed=closed) + return TimedeltaIndex(tdarr, name=name) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0adeb7997a888a..58344c0ec9ec73 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1592,8 +1592,8 @@ def _right_outer_join(x, y, max_groups): def _factorize_keys(lk, rk, sort=True): # Some pre-processing for non-ndarray lk / rk if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): - lk = lk.values - rk = rk.values + lk = lk._data + rk = rk._data elif (is_categorical_dtype(lk) and is_categorical_dtype(rk) and diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 4fca5216e24f3c..45d2615a3d055b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -171,6 +171,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex + from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray from pandas.core.arrays.datetimes import ( maybe_convert_dtype, objects_to_datetime64ns) @@ -179,14 +180,14 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, # these are shortcutable if is_datetime64tz_dtype(arg): - if not isinstance(arg, DatetimeIndex): + if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == 'utc': arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): - if box and not isinstance(arg, DatetimeIndex): + if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index aef30c1bb77447..97ac3fce070880 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -8,6 +8,17 @@ import pandas.util.testing as tm +class TestTimedeltaArrayConstructor(object): + def test_copy(self): + data = np.array([1, 2, 3], dtype='m8[ns]') + arr = TimedeltaArray(data, copy=False) + assert arr._data is data + + arr = TimedeltaArray(data, copy=True) + assert arr._data is not data + assert arr._data.base is not data + + class TestTimedeltaArray(object): def test_from_sequence_dtype(self): msg = r"Only timedelta64\[ns\] dtype is valid" diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 88c322ff7c9ff4..bca99d27bda56a 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -7,8 +7,7 @@ import pytest import pytz -from pandas._libs.tslib import OutOfBoundsDatetime -from pandas._libs.tslibs import conversion +from pandas._libs.tslibs import OutOfBoundsDatetime, conversion import pandas as pd from pandas import ( @@ -21,7 +20,8 @@ class TestDatetimeIndex(object): - @pytest.mark.parametrize('dt_cls', [DatetimeIndex, DatetimeArray]) + @pytest.mark.parametrize('dt_cls', [DatetimeIndex, + DatetimeArray._from_sequence]) def test_freq_validation_with_nat(self, dt_cls): # GH#11587 make sure we get a useful error message when generate_range # raises diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index c24c1025ea63cd..13f9648d462166 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -24,6 +24,7 @@ from pandas import ( DataFrame, DatetimeIndex, Index, NaT, Series, Timestamp, compat, date_range, isna, to_datetime) +from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray from pandas.core.tools import datetimes as tools from pandas.util import testing as tm from pandas.util.testing import assert_series_equal @@ -246,6 +247,18 @@ def test_to_datetime_parse_timezone_keeps_name(self): class TestToDatetime(object): + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_to_datetime_dtarr(self, tz): + # DatetimeArray + dti = date_range('1965-04-03', periods=19, freq='2W', tz=tz) + arr = DatetimeArray(dti) + + result = to_datetime(arr) + assert result is arr + + result = to_datetime(arr, box=True) + assert result is arr + def test_to_datetime_pydatetime(self): actual = pd.to_datetime(datetime(2008, 1, 15)) assert actual == datetime(2008, 1, 15)