From 41728a96f71829e956de40ac83c4340a325ed9a5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 22 Mar 2017 14:24:32 +0100 Subject: [PATCH] FIX: boolean fields should still return array --- doc/source/whatsnew/v0.20.0.txt | 4 ++- pandas/tests/indexes/datetimes/test_misc.py | 33 +++++++++++++++------ pandas/tests/scalar/test_timestamp.py | 13 +++++++- pandas/tseries/index.py | 10 ++++--- 4 files changed, 45 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 402ccb83042cb..3421fe4811cdf 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -478,6 +478,8 @@ Accessing datetime fields of Index now return Index The several datetime-related attributes (see :ref:`here ` for an overview) of DatetimeIndex, PeriodIndex and TimedeltaIndex previously returned numpy arrays, now they will return a new Index object (:issue:`15022`). +Only in case of a boolean field, still a boolean array is returned to support +boolean indexing. Previous behaviour: @@ -498,7 +500,7 @@ New Behavior: This has the advantage that specific Index methods are still available on the result. On the other hand, this might have backward incompatibilities: e.g. compared to numpy arrays, Index objects are not mutable (values cannot be set -by indexing). To get the original result, you can convert to a nunpy array +by indexing). To get the original result, you can convert to a numpy array explicitly using ``np.asarray(idx.hour)``. .. _whatsnew_0200.api_breaking.s3: diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index bbb231bd58ad4..1a065ac475752 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -172,7 +172,7 @@ def test_normalize(self): class TestDatetime64(tm.TestCase): def test_datetimeindex_accessors(self): -<<<<<<< f2831e2a2074e27e5cd3cfc0728d989742ee4680 + dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) # GH 13303 @@ -258,16 +258,31 @@ def test_datetimeindex_accessors(self): dti.name = 'name' - for accessor in ['year', 'month', 'day', 'hour', 'minute', 'second', - 'microsecond', 'nanosecond', 'dayofweek', 'dayofyear', - 'weekofyear', 'quarter', - 'is_month_start', 'is_month_end', + # non boolean accessors -> return Index + for accessor in ['year', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', + 'dayofweek', 'dayofyear', 'weekofyear', + 'quarter', 'weekday_name']: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, Index) + assert res.name == 'name' + + # boolean accessors -> return array + for accessor in ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', - 'is_year_start', 'is_year_end', 'weekday_name']: + 'is_year_start', 'is_year_end']: res = getattr(dti, accessor) - self.assertEqual(len(res), 365) - self.assertIsInstance(res, Index) - self.assertEqual(res.name, 'name') + assert len(res) == 365 + assert isinstance(res, np.ndarray) + + # test boolean indexing + res = dti[dti.is_quarter_start] + exp = dti[[0, 90, 181, 273]] + tm.assert_index_equal(res, exp) + res = dti[dti.is_leap_year] + exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name') + tm.assert_index_equal(res, exp) dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 48e0b96a95209..bbf33c4db5ad7 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -597,15 +597,26 @@ def test_nat_fields(self): def test_nat_vector_field_access(self): idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + # non boolean fields fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'is_leap_year'] + 'days_in_month'] for field in fields: result = getattr(idx, field) expected = [getattr(x, field) for x in idx] self.assert_index_equal(result, pd.Index(expected)) + # boolean fields + fields = ['is_leap_year'] + # other boolean fields like 'is_month_start' and 'is_month_end' + # not yet supported by NaT + + for field in fields: + result = getattr(idx, field) + expected = [getattr(x, field) for x in idx] + self.assert_numpy_array_equal(result, np.array(expected)) + s = pd.Series(idx) for field in fields: diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index c3608d847e867..11d2d29597fc0 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -64,6 +64,7 @@ def f(self): if self.tz is not utc: values = self._local_timestamps() + # boolean accessors -> return array if field in ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end']: @@ -73,14 +74,15 @@ def f(self): result = libts.get_start_end_field(values, field, self.freqstr, month_kw) - result = self._maybe_mask_results(result, convert='float64') + return self._maybe_mask_results(result, convert='float64') + elif field in ['is_leap_year']: + # no need to mask NaT + return libts.get_date_field(values, field) + # non-boolean accessors -> return Index elif field in ['weekday_name']: result = libts.get_date_name_field(values, field) result = self._maybe_mask_results(result) - elif field in ['is_leap_year']: - # no need to mask NaT - result = libts.get_date_field(values, field) else: result = libts.get_date_field(values, field) result = self._maybe_mask_results(result, convert='float64')