Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend DatetimeAccessor properties and support .dt accessor for Timedelta #3612

Merged
merged 17 commits into from
Dec 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,7 @@ Accessors
:toctree: generated/

core.accessor_dt.DatetimeAccessor
core.accessor_dt.TimedeltaAccessor
core.accessor_str.StringAccessor

Custom Indexes
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ New Features
- Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen`
and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`)
By `Deepak Cherian <https://github.com/dcherian>`_
- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties
and support `.dt` accessor for timedelta
via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`)
By `Anderson Banihirwe <https://github.com/andersy005>`_.

Bug fixes
~~~~~~~~~
Expand Down
283 changes: 210 additions & 73 deletions xarray/core/accessor_dt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import numpy as np
import pandas as pd

from .common import _contains_datetime_like_objects, is_np_datetime_like
from .common import (
_contains_datetime_like_objects,
is_np_datetime_like,
is_np_timedelta_like,
)
from .pycompat import dask_array_type


Expand Down Expand Up @@ -145,37 +149,8 @@ def _strftime(values, date_format):
return access_method(values, date_format)


class DatetimeAccessor:
"""Access datetime fields for DataArrays with datetime-like dtypes.

Similar to pandas, fields can be accessed through the `.dt` attribute
for applicable DataArrays:

>>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01',
... freq='D', periods=100)})
>>> ds.time.dt
<xarray.core.accessors.DatetimeAccessor at 0x10c369f60>
>>> ds.time.dt.dayofyear[:5]
<xarray.DataArray 'dayofyear' (time: 5)>
array([1, 2, 3, 4, 5], dtype=int32)
Coordinates:
* time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...

All of the pandas fields are accessible here. Note that these fields are
not calendar-aware; if your datetimes are encoded with a non-Gregorian
calendar (e.g. a 360-day calendar) using cftime, then some fields like
`dayofyear` may not be accurate.

"""

class Properties:
def __init__(self, obj):
if not _contains_datetime_like_objects(obj):
raise TypeError(
"'dt' accessor only available for "
"DataArray with datetime64 timedelta64 dtype or "
"for arrays containing cftime datetime "
"objects."
)
self._obj = obj

def _tslib_field_accessor( # type: ignore
Expand All @@ -194,48 +169,6 @@ def f(self, dtype=dtype):
f.__doc__ = docstring
return property(f)

year = _tslib_field_accessor("year", "The year of the datetime", np.int64)
month = _tslib_field_accessor(
"month", "The month as January=1, December=12", np.int64
)
day = _tslib_field_accessor("day", "The days of the datetime", np.int64)
hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64)
minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64)
second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64)
microsecond = _tslib_field_accessor(
"microsecond", "The microseconds of the datetime", np.int64
)
nanosecond = _tslib_field_accessor(
"nanosecond", "The nanoseconds of the datetime", np.int64
)
weekofyear = _tslib_field_accessor(
"weekofyear", "The week ordinal of the year", np.int64
)
week = weekofyear
dayofweek = _tslib_field_accessor(
"dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64
)
weekday = dayofweek

weekday_name = _tslib_field_accessor(
"weekday_name", "The name of day in a week (ex: Friday)", object
)

dayofyear = _tslib_field_accessor(
"dayofyear", "The ordinal day of the year", np.int64
)
quarter = _tslib_field_accessor("quarter", "The quarter of the date")
days_in_month = _tslib_field_accessor(
"days_in_month", "The number of days in the month", np.int64
)
daysinmonth = days_in_month

season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object)

time = _tslib_field_accessor(
"time", "Timestamps corresponding to datetimes", object
)

def _tslib_round_accessor(self, name, freq):
obj_type = type(self._obj)
result = _round_field(self._obj.data, name, freq)
Expand Down Expand Up @@ -290,6 +223,50 @@ def round(self, freq):
"""
return self._tslib_round_accessor("round", freq)


class DatetimeAccessor(Properties):
"""Access datetime fields for DataArrays with datetime-like dtypes.

Fields can be accessed through the `.dt` attribute
for applicable DataArrays.

Notes
------
Note that these fields are not calendar-aware; if your datetimes are encoded
with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime,
then some fields like `dayofyear` may not be accurate.

Examples
---------
>>> import xarray as xr
>>> import pandas as pd
>>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10)
>>> ts = xr.DataArray(dates, dims=('time'))
>>> ts
<xarray.DataArray (time: 10)>
array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000',
'2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000',
'2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000',
'2000-01-07T00:00:00.000000000', '2000-01-08T00:00:00.000000000',
'2000-01-09T00:00:00.000000000', '2000-01-10T00:00:00.000000000'],
dtype='datetime64[ns]')
Coordinates:
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
>>> ts.dt
<xarray.core.accessor_dt.DatetimeAccessor object at 0x118b54d68>
>>> ts.dt.dayofyear
<xarray.DataArray 'dayofyear' (time: 10)>
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
Coordinates:
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
>>> ts.dt.quarter
<xarray.DataArray 'quarter' (time: 10)>
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
Coordinates:
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10

"""

def strftime(self, date_format):
'''
Return an array of formatted strings specified by date_format, which
Expand Down Expand Up @@ -323,3 +300,163 @@ def strftime(self, date_format):
return obj_type(
result, name="strftime", coords=self._obj.coords, dims=self._obj.dims
)

year = Properties._tslib_field_accessor(
"year", "The year of the datetime", np.int64
)
month = Properties._tslib_field_accessor(
"month", "The month as January=1, December=12", np.int64
)
day = Properties._tslib_field_accessor("day", "The days of the datetime", np.int64)
hour = Properties._tslib_field_accessor(
"hour", "The hours of the datetime", np.int64
)
minute = Properties._tslib_field_accessor(
"minute", "The minutes of the datetime", np.int64
)
second = Properties._tslib_field_accessor(
"second", "The seconds of the datetime", np.int64
)
microsecond = Properties._tslib_field_accessor(
"microsecond", "The microseconds of the datetime", np.int64
)
nanosecond = Properties._tslib_field_accessor(
"nanosecond", "The nanoseconds of the datetime", np.int64
)
weekofyear = Properties._tslib_field_accessor(
"weekofyear", "The week ordinal of the year", np.int64
)
week = weekofyear
dayofweek = Properties._tslib_field_accessor(
"dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64
)
weekday = dayofweek

weekday_name = Properties._tslib_field_accessor(
"weekday_name", "The name of day in a week", object
)

dayofyear = Properties._tslib_field_accessor(
"dayofyear", "The ordinal day of the year", np.int64
)
quarter = Properties._tslib_field_accessor("quarter", "The quarter of the date")
days_in_month = Properties._tslib_field_accessor(
"days_in_month", "The number of days in the month", np.int64
)
daysinmonth = days_in_month

season = Properties._tslib_field_accessor("season", "Season of the year", object)

time = Properties._tslib_field_accessor(
"time", "Timestamps corresponding to datetimes", object
)

is_month_start = Properties._tslib_field_accessor(
"is_month_start",
"Indicates whether the date is the first day of the month.",
bool,
)
is_month_end = Properties._tslib_field_accessor(
"is_month_end", "Indicates whether the date is the last day of the month.", bool
)
is_quarter_start = Properties._tslib_field_accessor(
"is_quarter_start",
"Indicator for whether the date is the first day of a quarter.",
bool,
)
is_quarter_end = Properties._tslib_field_accessor(
"is_quarter_end",
"Indicator for whether the date is the last day of a quarter.",
bool,
)
is_year_start = Properties._tslib_field_accessor(
"is_year_start", "Indicate whether the date is the first day of a year.", bool
)
is_year_end = Properties._tslib_field_accessor(
"is_year_end", "Indicate whether the date is the last day of the year.", bool
)
is_leap_year = Properties._tslib_field_accessor(
"is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool
)


class TimedeltaAccessor(Properties):
"""Access Timedelta fields for DataArrays with Timedelta-like dtypes.

Fields can be accessed through the `.dt` attribute for applicable DataArrays.

Examples
--------
>>> import pandas as pd
>>> import xarray as xr
>>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20)
>>> ts = xr.DataArray(dates, dims=('time'))
>>> ts
<xarray.DataArray (time: 20)>
array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000,
172800000000000, 194400000000000, 216000000000000, 237600000000000,
259200000000000, 280800000000000, 302400000000000, 324000000000000,
345600000000000, 367200000000000, 388800000000000, 410400000000000,
432000000000000, 453600000000000, 475200000000000, 496800000000000],
dtype='timedelta64[ns]')
Coordinates:
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
>>> ts.dt
<xarray.core.accessor_dt.TimedeltaAccessor object at 0x109a27d68>
>>> ts.dt.days
<xarray.DataArray 'days' (time: 20)>
array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5])
Coordinates:
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
>>> ts.dt.microseconds
<xarray.DataArray 'microseconds' (time: 20)>
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Coordinates:
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
>>> ts.dt.seconds
<xarray.DataArray 'seconds' (time: 20)>
array([ 0, 21600, 43200, 64800, 0, 21600, 43200, 64800, 0,
21600, 43200, 64800, 0, 21600, 43200, 64800, 0, 21600,
43200, 64800])
Coordinates:
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
"""

days = Properties._tslib_field_accessor(
"days", "Number of days for each element.", np.int64
)
seconds = Properties._tslib_field_accessor(
"seconds",
"Number of seconds (>= 0 and less than 1 day) for each element.",
np.int64,
)
microseconds = Properties._tslib_field_accessor(
"microseconds",
"Number of microseconds (>= 0 and less than 1 second) for each element.",
np.int64,
)
nanoseconds = Properties._tslib_field_accessor(
"nanoseconds",
"Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.",
np.int64,
)


class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor):
def __new__(cls, obj):
# CombinedDatetimelikeAccessor isn't really instatiated. Instead
# we need to choose which parent (datetime or timedelta) is
# appropriate. Since we're checking the dtypes anyway, we'll just
# do all the validation here.
if not _contains_datetime_like_objects(obj):
raise TypeError(
"'.dt' accessor only available for "
"DataArray with datetime64 timedelta64 dtype or "
"for arrays containing cftime datetime "
"objects."
)

if is_np_timedelta_like(obj.dtype):
return TimedeltaAccessor(obj)
else:
return DatetimeAccessor(obj)
6 changes: 6 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,12 @@ def is_np_datetime_like(dtype: DTypeLike) -> bool:
return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)


def is_np_timedelta_like(dtype: DTypeLike) -> bool:
"""Check whether dtype is of the timedelta64 dtype.
"""
return np.issubdtype(dtype, np.timedelta64)


def _contains_cftime_datetimes(array) -> bool:
"""Check if an array contains cftime.datetime objects
"""
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
rolling,
utils,
)
from .accessor_dt import DatetimeAccessor
from .accessor_dt import CombinedDatetimelikeAccessor
from .accessor_str import StringAccessor
from .alignment import (
_broadcast_helper,
Expand Down Expand Up @@ -258,7 +258,7 @@ class DataArray(AbstractArray, DataWithCoords):
_coarsen_cls = rolling.DataArrayCoarsen
_resample_cls = resample.DataArrayResample

dt = property(DatetimeAccessor)
dt = property(CombinedDatetimelikeAccessor)

def __init__(
self,
Expand Down
Loading