Skip to content

Commit

Permalink
API: return Index instead of array from DatetimeIndex field accessors…
Browse files Browse the repository at this point in the history
… (GH15022)

closes pandas-dev#15022

Author: Joris Van den Bossche <jorisvandenbossche@gmail.com>

Closes pandas-dev#15589 from jorisvandenbossche/api-dt-fields-index and squashes the following commits:

ffacd38 [Joris Van den Bossche] doc fixes
41728a9 [Joris Van den Bossche] FIX: boolean fields should still return array
6317b6b [Joris Van den Bossche] Add whatsnew
96ed069 [Joris Van den Bossche] Preserve name for PeriodIndex field accessors
cdf6cae [Joris Van den Bossche] Preserve name for DatetimeIndex field accessors
f2831e2 [Joris Van den Bossche] Update timedelta accessors
52f9008 [Joris Van den Bossche] Fix tests
41008c7 [Joris Van den Bossche] API: return Index instead of array from datetime field accessors (GH15022)
  • Loading branch information
jorisvandenbossche authored and mattip committed Mar 30, 2017
1 parent a171bb9 commit dfc8211
Show file tree
Hide file tree
Showing 15 changed files with 156 additions and 78 deletions.
33 changes: 32 additions & 1 deletion doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,38 @@ New Behavior:

s.map(lambda x: x.hour)


.. _whatsnew_0200.api_breaking.index_dt_field:

Accessing datetime fields of Index now return Index
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The datetime-related attributes (see :ref:`here <timeseries.components>`
for an overview) of ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex`` previously
returned numpy arrays. They will now return a new ``Index`` object, except
in the case of a boolean field, where the result will stil be a boolean ndarray. (:issue:`15022`)

Previous behaviour:

.. code-block:: ipython

In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H')

In [2]: idx.hour
Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32)

New Behavior:

.. ipython:: python

idx = pd.date_range("2015-01-01", periods=5, freq='10H')
idx.hour

This has the advantage that specific ``Index`` methods are still available on the
result. On the other hand, this might have backward incompatibilities: e.g.
compared to numpy arrays, ``Index`` objects are not mutable. To get the original
ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``.

.. _whatsnew_0200.api_breaking.s3:

S3 File Handling
Expand Down Expand Up @@ -936,4 +968,3 @@ Bug Fixes
- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`)

33 changes: 31 additions & 2 deletions pandas/tests/indexes/datetimes/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ def test_normalize(self):
class TestDatetime64(tm.TestCase):

def test_datetimeindex_accessors(self):

dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1),
periods=365)
# GH 13303
Expand Down Expand Up @@ -255,6 +256,34 @@ def test_datetimeindex_accessors(self):
self.assertEqual(len(dti.is_year_end), 365)
self.assertEqual(len(dti.weekday_name), 365)

dti.name = 'name'

# non boolean accessors -> return Index
for accessor in ['year', 'month', 'day', 'hour', 'minute',
'second', 'microsecond', 'nanosecond',
'dayofweek', 'dayofyear', 'weekofyear',
'quarter', 'weekday_name']:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, Index)
assert res.name == 'name'

# boolean accessors -> return array
for accessor in ['is_month_start', 'is_month_end',
'is_quarter_start', 'is_quarter_end',
'is_year_start', 'is_year_end']:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, np.ndarray)

# test boolean indexing
res = dti[dti.is_quarter_start]
exp = dti[[0, 90, 181, 273]]
tm.assert_index_equal(res, exp)
res = dti[dti.is_leap_year]
exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name')
tm.assert_index_equal(res, exp)

dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1),
periods=4)

Expand Down Expand Up @@ -313,5 +342,5 @@ def test_datetimeindex_accessors(self):
def test_nanosecond_field(self):
dti = DatetimeIndex(np.arange(10))

self.assert_numpy_array_equal(dti.nanosecond,
np.arange(10, dtype=np.int32))
self.assert_index_equal(dti.nanosecond,
pd.Index(np.arange(10, dtype=np.int64)))
4 changes: 2 additions & 2 deletions pandas/tests/indexes/period/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def test_constructor_arrays_negative_year(self):

pindex = PeriodIndex(year=years, quarter=quarters)

self.assert_numpy_array_equal(pindex.year, years)
self.assert_numpy_array_equal(pindex.quarter, quarters)
self.assert_index_equal(pindex.year, pd.Index(years))
self.assert_index_equal(pindex.quarter, pd.Index(quarters))

def test_constructor_invalid_quarters(self):
self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004),
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,12 +658,12 @@ def test_negative_ordinals(self):

def test_pindex_fieldaccessor_nat(self):
idx = PeriodIndex(['2011-01', '2011-02', 'NaT',
'2012-03', '2012-04'], freq='D')
'2012-03', '2012-04'], freq='D', name='name')

exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64)
self.assert_numpy_array_equal(idx.year, exp)
exp = np.array([1, 2, -1, 3, 4], dtype=np.int64)
self.assert_numpy_array_equal(idx.month, exp)
exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name='name')
self.assert_index_equal(idx.year, exp)
exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name='name')
self.assert_index_equal(idx.month, exp)

def test_pindex_qaccess(self):
pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q')
Expand Down
24 changes: 14 additions & 10 deletions pandas/tests/indexes/timedeltas/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def test_total_seconds(self):
freq='s')
expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9,
1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9]
tm.assert_almost_equal(rng.total_seconds(), np.array(expt))
tm.assert_almost_equal(rng.total_seconds(), Index(expt))

# test Series
s = Series(rng)
Expand Down Expand Up @@ -486,16 +486,16 @@ def test_append_numpy_bug_1681(self):
def test_fields(self):
rng = timedelta_range('1 days, 10:11:12.100123456', periods=2,
freq='s')
self.assert_numpy_array_equal(rng.days, np.array(
[1, 1], dtype='int64'))
self.assert_numpy_array_equal(
self.assert_index_equal(rng.days, Index([1, 1], dtype='int64'))
self.assert_index_equal(
rng.seconds,
np.array([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13],
dtype='int64'))
self.assert_numpy_array_equal(rng.microseconds, np.array(
[100 * 1000 + 123, 100 * 1000 + 123], dtype='int64'))
self.assert_numpy_array_equal(rng.nanoseconds, np.array(
[456, 456], dtype='int64'))
Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13],
dtype='int64'))
self.assert_index_equal(
rng.microseconds,
Index([100 * 1000 + 123, 100 * 1000 + 123], dtype='int64'))
self.assert_index_equal(rng.nanoseconds,
Index([456, 456], dtype='int64'))

self.assertRaises(AttributeError, lambda: rng.hours)
self.assertRaises(AttributeError, lambda: rng.minutes)
Expand All @@ -509,6 +509,10 @@ def test_fields(self):
tm.assert_series_equal(s.dt.seconds, Series(
[10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1]))

# preserve name (GH15589)
rng.name = 'name'
assert rng.days.name == 'name'

def test_freq_conversion(self):

# doc example
Expand Down
13 changes: 12 additions & 1 deletion pandas/tests/scalar/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,9 +597,20 @@ def test_nat_fields(self):
def test_nat_vector_field_access(self):
idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000'])

# non boolean fields
fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute',
'second', 'microsecond', 'nanosecond', 'week', 'dayofyear',
'days_in_month', 'is_leap_year']
'days_in_month']

for field in fields:
result = getattr(idx, field)
expected = [getattr(x, field) for x in idx]
self.assert_index_equal(result, pd.Index(expected))

# boolean fields
fields = ['is_leap_year']
# other boolean fields like 'is_month_start' and 'is_month_end'
# not yet supported by NaT

for field in fields:
result = getattr(idx, field)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/tools/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1367,7 +1367,7 @@ def test_daily(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
annual = pivot_annual(ts, 'D')

doy = ts.index.dayofyear
doy = np.asarray(ts.index.dayofyear)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/tools/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ def test_datetimeindex(self):
# make sure that the ordering on datetimeindex is consistent
x = date_range('2000-01-01', periods=2)
result1, result2 = [Index(y).day for y in cartesian_product([x, x])]
expected1 = np.array([1, 1, 2, 2], dtype=np.int32)
expected2 = np.array([1, 2, 1, 2], dtype=np.int32)
tm.assert_numpy_array_equal(result1, expected1)
tm.assert_numpy_array_equal(result2, expected2)
expected1 = Index([1, 1, 2, 2])
expected2 = Index([1, 2, 1, 2])
tm.assert_index_equal(result1, expected1)
tm.assert_index_equal(result2, expected2)

def test_empty(self):
# product of empty factors
Expand Down
Loading

0 comments on commit dfc8211

Please sign in to comment.