Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add days_in_year and decimal_year to dt accessor #9105

Merged
merged 20 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -529,9 +529,11 @@ Datetimelike properties
DataArray.dt.quarter
DataArray.dt.days_in_month
DataArray.dt.daysinmonth
DataArray.dt.days_in_year
DataArray.dt.season
DataArray.dt.time
DataArray.dt.date
DataArray.dt.decimal_year
DataArray.dt.calendar
DataArray.dt.is_month_start
DataArray.dt.is_month_end
Expand Down
9 changes: 9 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ v2024.07.1 (unreleased)
New Features
~~~~~~~~~~~~

- Add :py:property:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:property:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the Datetime accessor on DataArrays. (:pull:`9105`).
By `Pascal Bourgault <https://github.com/aulemahal>`_.

Performance
~~~~~~~~~~~

- Small optimization to the netCDF4 and h5netcdf backends (:issue:`9058`, :pull:`9067`).
By `Deepak Cherian <https://github.com/dcherian>`_.
aulemahal marked this conversation as resolved.
Show resolved Hide resolved


Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
117 changes: 66 additions & 51 deletions xarray/coding/calendar_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
_should_cftime_be_used,
convert_times,
)
from xarray.core.common import _contains_datetime_like_objects, is_np_datetime_like
from xarray.core.common import (
_contains_datetime_like_objects,
full_like,
is_np_datetime_like,
)
from xarray.core.computation import apply_ufunc

try:
import cftime
Expand All @@ -25,16 +30,6 @@
]


def _days_in_year(year, calendar, use_cftime=True):
"""Return the number of days in the input year according to the input calendar."""
date_type = get_date_type(calendar, use_cftime=use_cftime)
if year == -1 and calendar in _CALENDARS_WITHOUT_YEAR_ZERO:
difference = date_type(year + 2, 1, 1) - date_type(year, 1, 1)
else:
difference = date_type(year + 1, 1, 1) - date_type(year, 1, 1)
return difference.days


def convert_calendar(
obj,
calendar,
Expand Down Expand Up @@ -191,11 +186,7 @@ def convert_calendar(
# Special case for conversion involving 360_day calendar
if align_on == "year":
# Instead of translating dates directly, this tries to keep the position within a year similar.
new_doy = time.groupby(f"{dim}.year").map(
_interpolate_day_of_year,
target_calendar=calendar,
use_cftime=use_cftime,
)
new_doy = _interpolate_day_of_year(time, target_calendar=calendar)
elif align_on == "random":
# The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year.
new_doy = time.groupby(f"{dim}.year").map(
Expand Down Expand Up @@ -242,16 +233,25 @@ def convert_calendar(
return out


def _interpolate_day_of_year(time, target_calendar, use_cftime):
"""Returns the nearest day in the target calendar of the corresponding
"decimal year" in the source calendar.
"""
year = int(time.dt.year[0])
source_calendar = time.dt.calendar
def _is_leap_year(years, calendar):
func = np.vectorize(cftime.is_leap_year)
return func(years, calendar=calendar)


def _days_in_year(years, calendar):
"""The number of days in the year according to given calendar."""
if calendar == "360_day":
return full_like(years, 360)
return _is_leap_year(years, calendar).astype(int) + 365


def _interpolate_day_of_year(times, target_calendar):
"""Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar."""
source_calendar = times.dt.calendar
return np.round(
_days_in_year(year, target_calendar, use_cftime)
* time.dt.dayofyear
/ _days_in_year(year, source_calendar, use_cftime)
_days_in_year(times.dt.year, target_calendar)
* times.dt.dayofyear
/ _days_in_year(times.dt.year, source_calendar)
).astype(int)


Expand All @@ -260,18 +260,18 @@ def _random_day_of_year(time, target_calendar, use_cftime):

Removes Feb 29th and five other days chosen randomly within five sections of 72 days.
"""
year = int(time.dt.year[0])
year = time.dt.year[0]
source_calendar = time.dt.calendar
new_doy = np.arange(360) + 1
rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5)
if source_calendar == "360_day":
for idx in rm_idx:
new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1
if _days_in_year(year, target_calendar, use_cftime) == 366:
if _days_in_year(year, target_calendar) == 366:
new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1
elif target_calendar == "360_day":
new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1)
if _days_in_year(year, source_calendar, use_cftime) == 366:
if _days_in_year(year, source_calendar) == 366:
new_doy = np.insert(new_doy, 60, -1)
return new_doy[time.dt.dayofyear - 1]

Expand Down Expand Up @@ -304,32 +304,49 @@ def _convert_to_new_calendar_with_new_day_of_year(
return np.nan


def _datetime_to_decimal_year(times, dim="time", calendar=None):
"""Convert a datetime DataArray to decimal years according to its calendar or the given one.
def _yearstart_cftime(year, date_class):
return date_class(year, 1, 1)


def _yearstart_np(year, dtype):
return np.datetime64(int(year) - 1970, "Y").astype(dtype)


def _yearstart(times):
if times.dtype == "O":
return apply_ufunc(
_yearstart_cftime,
times.dt.year,
kwargs={"date_class": get_date_type(times.dt.calendar, True)},
vectorize=True,
dask="parallelized",
)
return apply_ufunc(
_yearstart_np,
times.dt.year,
kwargs={"dtype": times.dtype},
vectorize=True,
dask="parallelized",
)


def _decimal_year(times):
"""Convert a datetime DataArray to decimal years according to its calendar.

The decimal year of a timestamp is its year plus its sub-year component
converted to the fraction of its year.
Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar,
2000.16301 in a "noleap" or 2000.16806 in a "360_day".
"""
from xarray.core.dataarray import DataArray

calendar = calendar or times.dt.calendar

if is_np_datetime_like(times.dtype):
times = times.copy(data=convert_times(times.values, get_date_type("standard")))

def _make_index(time):
year = int(time.dt.year[0])
doys = cftime.date2num(time, f"days since {year:04d}-01-01", calendar=calendar)
return DataArray(
year + doys / _days_in_year(year, calendar),
dims=(dim,),
coords=time.coords,
name=dim,
years = times.dt.year
deltas = times - _yearstart(times)
# astype on the data to avoid warning about timedelta64[D] being automatically converted to ns in Variable constructor.
days_in_years = deltas.copy(
data=times.dt.days_in_year.data.astype("timedelta64[D]").astype(
"timedelta64[ns]"
)
aulemahal marked this conversation as resolved.
Show resolved Hide resolved

return times.groupby(f"{dim}.year").map(_make_index)
)
return years + deltas / days_in_years


def interp_calendar(source, target, dim="time"):
Expand Down Expand Up @@ -372,9 +389,7 @@ def interp_calendar(source, target, dim="time"):
f"Both 'source.{dim}' and 'target' must contain datetime objects."
)

source_calendar = source[dim].dt.calendar
target_calendar = target.dt.calendar

if (
source[dim].time.dt.year == 0
).any() and target_calendar in _CALENDARS_WITHOUT_YEAR_ZERO:
Expand All @@ -383,8 +398,8 @@ def interp_calendar(source, target, dim="time"):
)

out = source.copy()
out[dim] = _datetime_to_decimal_year(source[dim], dim=dim, calendar=source_calendar)
target_idx = _datetime_to_decimal_year(target, dim=dim, calendar=target_calendar)
out[dim] = _decimal_year(source[dim])
target_idx = _decimal_year(target)
out = out.interp(**{dim: target_idx})
out[dim] = target
return out
5 changes: 5 additions & 0 deletions xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,11 @@ def round(self, freq):
"""
return self._round_via_method(freq, _round_to_nearest_half_even)

@property
def is_leap_year(self):
func = np.vectorize(cftime.is_leap_year)
return func(self.year, calendar=self.calendar)


def _parse_iso8601_without_reso(date_type, datetime_str):
date, _ = _parse_iso8601_with_reso(date_type, datetime_str)
Expand Down
19 changes: 19 additions & 0 deletions xarray/core/accessor_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import numpy as np
import pandas as pd

from xarray.coding.calendar_ops import _decimal_year
from xarray.coding.times import infer_calendar_name
from xarray.core import duck_array_ops
from xarray.core.common import (
_contains_datetime_like_objects,
full_like,
is_np_datetime_like,
is_np_timedelta_like,
)
Expand Down Expand Up @@ -533,6 +535,23 @@ def calendar(self) -> CFCalendar:
"""
return infer_calendar_name(self._obj.data)

@property
def days_in_year(self) -> T_DataArray:
"""The number of days in the year."""
if self.calendar == "360_day":
result = full_like(self.year, 360)
else:
result = self.is_leap_year.astype(int) + 365
newvar = self._obj.variable.copy(data=result, deep=False)
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
return self._obj._replace(newvar, name="days_in_year")

@property
def decimal_year(self) -> T_DataArray:
"""Convert the dates as a fractional year."""
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
result = _decimal_year(self._obj)
newvar = self._obj.variable.copy(data=result, deep=False)
return self._obj._replace(newvar, name="decimal_year")


class TimedeltaAccessor(TimeAccessor[T_DataArray]):
"""Access Timedelta fields for DataArrays with Timedelta-like dtypes.
Expand Down
15 changes: 15 additions & 0 deletions xarray/tests/test_accessor_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,21 @@ def test_strftime(self) -> None:
"2000-01-01 01:00:00" == self.data.time.dt.strftime("%Y-%m-%d %H:%M:%S")[1]
)

@requires_cftime
@pytest.mark.parametrize("calendar,expected", [('standard', 366), ('noleap', 365), ('360_day', 360), ('all_leap', 366)])
def test_days_in_year(self, calendar, expected) -> None:
assert (
self.data.convert_calendar(calendar, align_on='year').time.dt.days_in_year == expected
).all()

@requires_cftime
def test_decimal_year(self) -> None:
h_per_yr = 366 * 24
np.testing.assert_array_equal(
self.data.time.dt.decimal_year[0:3],
[2000, 2000 + 1 / h_per_yr, 2000 + 2 / h_per_yr]
)

def test_not_datetime_type(self) -> None:
nontime_data = self.data.copy()
int_data = np.arange(len(self.data.time)).astype("int8")
Expand Down
Loading