Skip to content

Commit

Permalink
Extend DatetimeAccessor properties and support .dt accessor for Tim…
Browse files Browse the repository at this point in the history
…edelta (#3612)

* Support `.dt` accessor for Timedelta

* Rename accessors

* Use `is_np_timedelta_like` for consistency

* Use `pd.timedelta_range`

* Move shared method to Properties

* Parametrize field access test

* move `strftime()` to `DatetimeAccessor`

* Update the documentation

* Update `whats-new.rst`

* Add PR reference

* Parametrize tests

* Extend DatetimeAccessor properties

* Cleanup

* Fix docstring
  • Loading branch information
andersy005 authored and dcherian committed Dec 18, 2019
1 parent 471a5d6 commit 3cbc459
Show file tree
Hide file tree
Showing 6 changed files with 418 additions and 139 deletions.
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,7 @@ Accessors
:toctree: generated/

core.accessor_dt.DatetimeAccessor
core.accessor_dt.TimedeltaAccessor
core.accessor_str.StringAccessor

Custom Indexes
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ New Features
- Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen`
and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`)
By `Deepak Cherian <https://github.com/dcherian>`_
- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties
and support `.dt` accessor for timedelta
via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`)
By `Anderson Banihirwe <https://github.com/andersy005>`_.

Bug fixes
~~~~~~~~~
Expand Down
283 changes: 210 additions & 73 deletions xarray/core/accessor_dt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import numpy as np
import pandas as pd

from .common import _contains_datetime_like_objects, is_np_datetime_like
from .common import (
_contains_datetime_like_objects,
is_np_datetime_like,
is_np_timedelta_like,
)
from .pycompat import dask_array_type


Expand Down Expand Up @@ -145,37 +149,8 @@ def _strftime(values, date_format):
return access_method(values, date_format)


class DatetimeAccessor:
"""Access datetime fields for DataArrays with datetime-like dtypes.
Similar to pandas, fields can be accessed through the `.dt` attribute
for applicable DataArrays:
>>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01',
... freq='D', periods=100)})
>>> ds.time.dt
<xarray.core.accessors.DatetimeAccessor at 0x10c369f60>
>>> ds.time.dt.dayofyear[:5]
<xarray.DataArray 'dayofyear' (time: 5)>
array([1, 2, 3, 4, 5], dtype=int32)
Coordinates:
* time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...
All of the pandas fields are accessible here. Note that these fields are
not calendar-aware; if your datetimes are encoded with a non-Gregorian
calendar (e.g. a 360-day calendar) using cftime, then some fields like
`dayofyear` may not be accurate.
"""

class Properties:
def __init__(self, obj):
if not _contains_datetime_like_objects(obj):
raise TypeError(
"'dt' accessor only available for "
"DataArray with datetime64 timedelta64 dtype or "
"for arrays containing cftime datetime "
"objects."
)
self._obj = obj

def _tslib_field_accessor( # type: ignore
Expand All @@ -194,48 +169,6 @@ def f(self, dtype=dtype):
f.__doc__ = docstring
return property(f)

year = _tslib_field_accessor("year", "The year of the datetime", np.int64)
month = _tslib_field_accessor(
"month", "The month as January=1, December=12", np.int64
)
day = _tslib_field_accessor("day", "The days of the datetime", np.int64)
hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64)
minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64)
second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64)
microsecond = _tslib_field_accessor(
"microsecond", "The microseconds of the datetime", np.int64
)
nanosecond = _tslib_field_accessor(
"nanosecond", "The nanoseconds of the datetime", np.int64
)
weekofyear = _tslib_field_accessor(
"weekofyear", "The week ordinal of the year", np.int64
)
week = weekofyear
dayofweek = _tslib_field_accessor(
"dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64
)
weekday = dayofweek

weekday_name = _tslib_field_accessor(
"weekday_name", "The name of day in a week (ex: Friday)", object
)

dayofyear = _tslib_field_accessor(
"dayofyear", "The ordinal day of the year", np.int64
)
quarter = _tslib_field_accessor("quarter", "The quarter of the date")
days_in_month = _tslib_field_accessor(
"days_in_month", "The number of days in the month", np.int64
)
daysinmonth = days_in_month

season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object)

time = _tslib_field_accessor(
"time", "Timestamps corresponding to datetimes", object
)

def _tslib_round_accessor(self, name, freq):
obj_type = type(self._obj)
result = _round_field(self._obj.data, name, freq)
Expand Down Expand Up @@ -290,6 +223,50 @@ def round(self, freq):
"""
return self._tslib_round_accessor("round", freq)


class DatetimeAccessor(Properties):
"""Access datetime fields for DataArrays with datetime-like dtypes.
Fields can be accessed through the `.dt` attribute
for applicable DataArrays.
Notes
------
Note that these fields are not calendar-aware; if your datetimes are encoded
with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime,
then some fields like `dayofyear` may not be accurate.
Examples
---------
>>> import xarray as xr
>>> import pandas as pd
>>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10)
>>> ts = xr.DataArray(dates, dims=('time'))
>>> ts
<xarray.DataArray (time: 10)>
array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000',
'2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000',
'2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000',
'2000-01-07T00:00:00.000000000', '2000-01-08T00:00:00.000000000',
'2000-01-09T00:00:00.000000000', '2000-01-10T00:00:00.000000000'],
dtype='datetime64[ns]')
Coordinates:
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
>>> ts.dt
<xarray.core.accessor_dt.DatetimeAccessor object at 0x118b54d68>
>>> ts.dt.dayofyear
<xarray.DataArray 'dayofyear' (time: 10)>
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
Coordinates:
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
>>> ts.dt.quarter
<xarray.DataArray 'quarter' (time: 10)>
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
Coordinates:
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
"""

def strftime(self, date_format):
'''
Return an array of formatted strings specified by date_format, which
Expand Down Expand Up @@ -323,3 +300,163 @@ def strftime(self, date_format):
return obj_type(
result, name="strftime", coords=self._obj.coords, dims=self._obj.dims
)

year = Properties._tslib_field_accessor(
"year", "The year of the datetime", np.int64
)
month = Properties._tslib_field_accessor(
"month", "The month as January=1, December=12", np.int64
)
day = Properties._tslib_field_accessor("day", "The days of the datetime", np.int64)
hour = Properties._tslib_field_accessor(
"hour", "The hours of the datetime", np.int64
)
minute = Properties._tslib_field_accessor(
"minute", "The minutes of the datetime", np.int64
)
second = Properties._tslib_field_accessor(
"second", "The seconds of the datetime", np.int64
)
microsecond = Properties._tslib_field_accessor(
"microsecond", "The microseconds of the datetime", np.int64
)
nanosecond = Properties._tslib_field_accessor(
"nanosecond", "The nanoseconds of the datetime", np.int64
)
weekofyear = Properties._tslib_field_accessor(
"weekofyear", "The week ordinal of the year", np.int64
)
week = weekofyear
dayofweek = Properties._tslib_field_accessor(
"dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64
)
weekday = dayofweek

weekday_name = Properties._tslib_field_accessor(
"weekday_name", "The name of day in a week", object
)

dayofyear = Properties._tslib_field_accessor(
"dayofyear", "The ordinal day of the year", np.int64
)
quarter = Properties._tslib_field_accessor("quarter", "The quarter of the date")
days_in_month = Properties._tslib_field_accessor(
"days_in_month", "The number of days in the month", np.int64
)
daysinmonth = days_in_month

season = Properties._tslib_field_accessor("season", "Season of the year", object)

time = Properties._tslib_field_accessor(
"time", "Timestamps corresponding to datetimes", object
)

is_month_start = Properties._tslib_field_accessor(
"is_month_start",
"Indicates whether the date is the first day of the month.",
bool,
)
is_month_end = Properties._tslib_field_accessor(
"is_month_end", "Indicates whether the date is the last day of the month.", bool
)
is_quarter_start = Properties._tslib_field_accessor(
"is_quarter_start",
"Indicator for whether the date is the first day of a quarter.",
bool,
)
is_quarter_end = Properties._tslib_field_accessor(
"is_quarter_end",
"Indicator for whether the date is the last day of a quarter.",
bool,
)
is_year_start = Properties._tslib_field_accessor(
"is_year_start", "Indicate whether the date is the first day of a year.", bool
)
is_year_end = Properties._tslib_field_accessor(
"is_year_end", "Indicate whether the date is the last day of the year.", bool
)
is_leap_year = Properties._tslib_field_accessor(
"is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool
)


class TimedeltaAccessor(Properties):
"""Access Timedelta fields for DataArrays with Timedelta-like dtypes.
Fields can be accessed through the `.dt` attribute for applicable DataArrays.
Examples
--------
>>> import pandas as pd
>>> import xarray as xr
>>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20)
>>> ts = xr.DataArray(dates, dims=('time'))
>>> ts
<xarray.DataArray (time: 20)>
array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000,
172800000000000, 194400000000000, 216000000000000, 237600000000000,
259200000000000, 280800000000000, 302400000000000, 324000000000000,
345600000000000, 367200000000000, 388800000000000, 410400000000000,
432000000000000, 453600000000000, 475200000000000, 496800000000000],
dtype='timedelta64[ns]')
Coordinates:
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
>>> ts.dt
<xarray.core.accessor_dt.TimedeltaAccessor object at 0x109a27d68>
>>> ts.dt.days
<xarray.DataArray 'days' (time: 20)>
array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5])
Coordinates:
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
>>> ts.dt.microseconds
<xarray.DataArray 'microseconds' (time: 20)>
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Coordinates:
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
>>> ts.dt.seconds
<xarray.DataArray 'seconds' (time: 20)>
array([ 0, 21600, 43200, 64800, 0, 21600, 43200, 64800, 0,
21600, 43200, 64800, 0, 21600, 43200, 64800, 0, 21600,
43200, 64800])
Coordinates:
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
"""

days = Properties._tslib_field_accessor(
"days", "Number of days for each element.", np.int64
)
seconds = Properties._tslib_field_accessor(
"seconds",
"Number of seconds (>= 0 and less than 1 day) for each element.",
np.int64,
)
microseconds = Properties._tslib_field_accessor(
"microseconds",
"Number of microseconds (>= 0 and less than 1 second) for each element.",
np.int64,
)
nanoseconds = Properties._tslib_field_accessor(
"nanoseconds",
"Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.",
np.int64,
)


class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor):
def __new__(cls, obj):
# CombinedDatetimelikeAccessor isn't really instatiated. Instead
# we need to choose which parent (datetime or timedelta) is
# appropriate. Since we're checking the dtypes anyway, we'll just
# do all the validation here.
if not _contains_datetime_like_objects(obj):
raise TypeError(
"'.dt' accessor only available for "
"DataArray with datetime64 timedelta64 dtype or "
"for arrays containing cftime datetime "
"objects."
)

if is_np_timedelta_like(obj.dtype):
return TimedeltaAccessor(obj)
else:
return DatetimeAccessor(obj)
6 changes: 6 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,12 @@ def is_np_datetime_like(dtype: DTypeLike) -> bool:
return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)


def is_np_timedelta_like(dtype: DTypeLike) -> bool:
"""Check whether dtype is of the timedelta64 dtype.
"""
return np.issubdtype(dtype, np.timedelta64)


def _contains_cftime_datetimes(array) -> bool:
"""Check if an array contains cftime.datetime objects
"""
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
rolling,
utils,
)
from .accessor_dt import DatetimeAccessor
from .accessor_dt import CombinedDatetimelikeAccessor
from .accessor_str import StringAccessor
from .alignment import (
_broadcast_helper,
Expand Down Expand Up @@ -258,7 +258,7 @@ class DataArray(AbstractArray, DataWithCoords):
_coarsen_cls = rolling.DataArrayCoarsen
_resample_cls = resample.DataArrayResample

dt = property(DatetimeAccessor)
dt = property(CombinedDatetimelikeAccessor)

def __init__(
self,
Expand Down
Loading

0 comments on commit 3cbc459

Please sign in to comment.