From 06b2c834ee64276defa0bcc9a90c2f21f0293f0c Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 13 Nov 2022 09:25:55 -0500 Subject: [PATCH 01/18] Initial work toward enabling origin and offset arguments in resample --- xarray/core/common.py | 36 +++- xarray/core/dataarray.py | 21 ++- xarray/core/dataset.py | 21 ++- xarray/core/resample_cftime.py | 197 +++++++++++++++++----- xarray/core/types.py | 4 + xarray/tests/test_cftimeindex_resample.py | 51 +++++- xarray/tests/test_groupby.py | 27 +++ 7 files changed, 303 insertions(+), 54 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 13fd91d8e99..75927cad160 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -44,7 +44,13 @@ from .indexes import Index from .resample import Resample from .rolling_exp import RollingExp - from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords + from .types import ( + DatetimeLike, + DTypeLikeSave, + ScalarOrArray, + SideOptions, + T_DataWithCoords, + ) from .variable import Variable DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]] @@ -817,7 +823,9 @@ def _resample( skipna: bool | None, closed: SideOptions | None, label: SideOptions | None, - base: int, + base: int | None, + offset: pd.Timedelta | datetime.timedelta | str | None, + origin: str | DatetimeLike, keep_attrs: bool | None, loffset: datetime.timedelta | str | None, restore_coord_dims: bool | None, @@ -845,6 +853,18 @@ def _resample( For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : pd.Timedelta, datetime.timedelta, or str, default is None + An offset timedelta added to the origin. loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. @@ -960,10 +980,18 @@ def _resample( if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex): from .resample_cftime import CFTimeGrouper - grouper = CFTimeGrouper(freq, closed, label, base, loffset) + grouper = CFTimeGrouper( + freq, closed, label, base, loffset, origin, offset + ) else: grouper = pd.Grouper( - freq=freq, closed=closed, label=label, base=base, loffset=loffset + freq=freq, + closed=closed, + label=label, + base=base, + offset=offset, + origin=origin, + loffset=loffset, ) group = DataArray( dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 15d1777b270..6de33c0e09c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -78,6 +78,7 @@ from .rolling import DataArrayCoarsen, DataArrayRolling from .types import ( CoarsenBoundaryOptions, + DatetimeLike, DatetimeUnitOptions, Dims, ErrorOptions, @@ -6531,7 +6532,9 @@ def resample( skipna: bool | None = None, closed: SideOptions | None = None, label: SideOptions | None = None, - base: int = 0, + base: int | None = None, + offset: pd.Timedelta | datetime.timedelta | str | None = None, + origin: str | DatetimeLike = "start_day", keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, @@ -6555,10 +6558,22 @@ def resample( Side of each interval to treat as closed. label : {"left", "right"}, optional Side of each interval to use for labeling. - base : int, default = 0 + base : int, optional For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : pd.Timedelta, datetime.timedelta, or str, default is None + An offset timedelta added to the origin. loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. @@ -6640,6 +6655,8 @@ def resample( closed=closed, label=label, base=base, + offset=offset, + origin=origin, keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1055e33e111..2ec15ec2d3e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -107,6 +107,7 @@ CoarsenBoundaryOptions, CombineAttrsOptions, CompatOptions, + DatetimeLike, DatetimeUnitOptions, Dims, ErrorOptions, @@ -9114,7 +9115,9 @@ def resample( skipna: bool | None = None, closed: SideOptions | None = None, label: SideOptions | None = None, - base: int = 0, + base: int | None = None, + offset: pd.Timedelta | datetime.timedelta | str | None = None, + origin: str | DatetimeLike = "start_day", keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, @@ -9138,10 +9141,22 @@ def resample( Side of each interval to treat as closed. label : {"left", "right"}, optional Side of each interval to use for labeling. - base : int, default = 0 + base : int, optional For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : pd.Timedelta, datetime.timedelta, or str, default is None + An offset timedelta added to the origin. loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. @@ -9176,6 +9191,8 @@ def resample( closed=closed, label=label, base=base, + offset=offset, + origin=origin, keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 11eceda77ee..a9916a76348 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -55,17 +55,40 @@ from ..coding.cftimeindex import CFTimeIndex +try: + import cftime +except ImportError: + cftime = None + + class CFTimeGrouper: """This is a simple container for the grouping parameters that implements a single method, the only one required for resampling in xarray. It cannot be used in a call to groupby like a pandas.Grouper object can.""" - def __init__(self, freq, closed=None, label=None, base=0, loffset=None): + def __init__( + self, + freq, + closed=None, + label=None, + base=None, + loffset=None, + origin="start_day", + offset=None, + ): + if base is not None and offset is not None: + raise ValueError("base and offset cannot be provided at the same time") + self.freq = to_offset(freq) self.closed = closed self.label = label - self.base = base self.loffset = loffset + self.origin = origin + + if base is not None and isinstance(self.freq, CFTIME_TICKS): + self.offset = type(self.freq)(n=base % self.freq.n) + else: + self.offset = offset if isinstance(self.freq, (MonthEnd, QuarterEnd, YearEnd)): if self.closed is None: @@ -73,10 +96,31 @@ def __init__(self, freq, closed=None, label=None, base=0, loffset=None): if self.label is None: self.label = "right" else: - if self.closed is None: - self.closed = "left" - if self.label is None: - self.label = "left" + # The backward resample sets ``closed`` to ``'right'`` by default + # since the last value should be considered as the edge point for + # the last bin. When origin in "end" or "end_day", the value for a + # specific ``cftime.datetime`` index stands for the resample result + # from the current ``cftime.datetime`` minus ``freq`` to the current + # ``cftime.datetime`` with a right close. + if self.origin in ["end", "end_day"]: + if self.closed is None: + self.closed = "right" + if self.label is None: + self.label = "right" + else: + if self.closed is None: + self.closed = "left" + if self.label is None: + self.label = "left" + + if self.offset is not None: + try: + self.offset = _convert_offset_to_timedelta(self.offset) + except (ValueError, AttributeError): + raise ValueError( + f"offset must be a datetime.timedelta object or an offset string " + f"that can be converted to a timedelta. Got {offset} instead." + ) def first_items(self, index): """Meant to reproduce the results of the following @@ -89,7 +133,7 @@ def first_items(self, index): """ datetime_bins, labels = _get_time_bins( - index, self.freq, self.closed, self.label, self.base + index, self.freq, self.closed, self.label, self.origin, self.offset ) if self.loffset is not None: if isinstance(self.loffset, datetime.timedelta): @@ -111,7 +155,7 @@ def first_items(self, index): return first_items.where(non_duplicate) -def _get_time_bins(index, freq, closed, label, base): +def _get_time_bins(index, freq, closed, label, origin, offset): """Obtain the bins and their respective labels for resampling operations. Parameters @@ -130,10 +174,18 @@ def _get_time_bins(index, freq, closed, label, base): Which bin edge label to label bucket with. The default is 'left' for all frequency offsets except for 'M' and 'A', which have a default of 'right'. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : datetime.timedelta, default is None + An offset timedelta added to the origin. Returns ------- @@ -154,7 +206,7 @@ def _get_time_bins(index, freq, closed, label, base): return datetime_bins, labels first, last = _get_range_edges( - index.min(), index.max(), freq, closed=closed, base=base + index.min(), index.max(), freq, closed=closed, origin=origin, offset=offset ) datetime_bins = labels = cftime_range( freq=freq, start=first, end=last, name=index.name @@ -172,7 +224,7 @@ def _get_time_bins(index, freq, closed, label, base): return datetime_bins, labels -def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): +def _adjust_bin_edges(datetime_bins, freq, closed, index, labels): """This is required for determining the bin edges resampling with daily frequencies greater than one day, month end, and year end frequencies. @@ -207,8 +259,8 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): This is also required for daily frequencies longer than one day and year-end frequencies. """ - is_super_daily = isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)) or ( - isinstance(offset, Day) and offset.n > 1 + is_super_daily = isinstance(freq, (MonthEnd, QuarterEnd, YearEnd)) or ( + isinstance(freq, Day) and freq.n > 1 ) if is_super_daily: if closed == "right": @@ -220,7 +272,7 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): return datetime_bins, labels -def _get_range_edges(first, last, offset, closed="left", base=0): +def _get_range_edges(first, last, freq, closed="left", origin="start_day", offset=None): """Get the correct starting and ending datetimes for the resampled CFTimeIndex range. @@ -232,16 +284,24 @@ def _get_range_edges(first, last, offset, closed="left", base=0): last : cftime.datetime Uncorrected ending datetime object for resampled CFTimeIndex range. Usually the max of the original CFTimeIndex. - offset : xarray.coding.cftime_offsets.BaseCFTimeOffset + freq : xarray.coding.cftime_offsets.BaseCFTimeOffset The offset object representing target conversion a.k.a. resampling frequency. Contains information on offset type (e.g. Day or 'D') and offset magnitude (e.g., n = 3). closed : 'left' or 'right', optional Which side of bin interval is closed. Defaults to 'left'. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : datetime.timedelta, default is None + An offset timedelta added to the origin. Returns ------- @@ -250,21 +310,23 @@ def _get_range_edges(first, last, offset, closed="left", base=0): last : cftime.datetime Corrected ending datetime object for resampled CFTimeIndex range. """ - if isinstance(offset, CFTIME_TICKS): + if isinstance(freq, CFTIME_TICKS): first, last = _adjust_dates_anchored( - first, last, offset, closed=closed, base=base + first, last, freq, closed=closed, origin=origin, offset=offset ) return first, last else: first = normalize_date(first) last = normalize_date(last) - first = offset.rollback(first) if closed == "left" else first - offset - last = last + offset + first = freq.rollback(first) if closed == "left" else first - freq + last = last + freq return first, last -def _adjust_dates_anchored(first, last, offset, closed="right", base=0): +def _adjust_dates_anchored( + first, last, freq, closed="right", origin="start_day", offset=None +): """First and last offsets should be calculated from the start day to fix an error cause by resampling across multiple days when a one day period is not a multiple of the frequency. @@ -276,16 +338,24 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0): A datetime object representing the start of a CFTimeIndex range. last : cftime.datetime A datetime object representing the end of a CFTimeIndex range. - offset : xarray.coding.cftime_offsets.BaseCFTimeOffset + freq : xarray.coding.cftime_offsets.BaseCFTimeOffset The offset object representing target conversion a.k.a. resampling frequency. Contains information on offset type (e.g. Day or 'D') and offset magnitude (e.g., n = 3). closed : 'left' or 'right', optional Which side of bin interval is closed. Defaults to 'right'. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : datetime.timedelta, default is None + An offset timedelta added to the origin. Returns ------- @@ -296,29 +366,56 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0): A datetime object representing the end of a date range that has been adjusted to fix resampling errors. """ + if cftime is None: + raise ModuleNotFoundError("No module named 'cftime'") + + if origin == "start_day": + origin_date = normalize_date(first) + elif origin == "start": + origin_date = first + elif origin == "epoch": + origin_date = type(first)(1970, 1, 1) + elif origin in ["end", "end_day"]: + origin_last = last if origin == "end" else _ceil_via_cftimeindex(last, "D") + sub_freq_times = (origin_last - first) // freq.as_timedelta() + if closed == "left": + sub_freq_times += 1 + first = origin_last - sub_freq_times * freq + origin_date = first + elif isinstance(origin, cftime.datetime): + origin_date = origin + else: + raise ValueError( + f"origin must be one of {{'epoch', 'start_day', 'start', 'end', 'end_day'}} " + f"or a cftime.datetime object. Got {origin}." + ) + + if offset is not None: + origin_date = origin_date + offset + + foffset = (first - origin_date) % freq.as_timedelta() + loffset = (last - origin_date) % freq.as_timedelta() - base = base % offset.n - start_day = normalize_date(first) - base_td = type(offset)(n=base).as_timedelta() - start_day += base_td - foffset = exact_cftime_datetime_difference(start_day, first) % offset.as_timedelta() - loffset = exact_cftime_datetime_difference(start_day, last) % offset.as_timedelta() if closed == "right": if foffset.total_seconds() > 0: fresult = first - foffset else: - fresult = first - offset.as_timedelta() + fresult = first - freq.as_timedelta() if loffset.total_seconds() > 0: - lresult = last + (offset.as_timedelta() - loffset) + lresult = last + (freq.as_timedelta() - loffset) else: lresult = last else: - fresult = first - foffset if foffset.total_seconds() > 0 else first + if foffset.total_seconds() > 0: + fresult = first - foffset + else: + fresult = first + if loffset.total_seconds() > 0: - lresult = last + (offset.as_timedelta() - loffset) + lresult = last + (freq.as_timedelta() - loffset) else: - lresult = last + offset.as_timedelta() + lresult = last + freq return fresult, lresult @@ -360,3 +457,17 @@ def exact_cftime_datetime_difference(a, b): seconds = int(round(seconds.total_seconds())) microseconds = b.microsecond - a.microsecond return datetime.timedelta(seconds=seconds, microseconds=microseconds) + + +def _convert_offset_to_timedelta(offset): + if isinstance(offset, datetime.timedelta): + return offset + elif isinstance(offset, str): + return to_offset(offset).as_timedelta() + else: + raise ValueError + + +def _ceil_via_cftimeindex(date, freq): + index = CFTimeIndex([date]) + return index.ceil(freq).item() diff --git a/xarray/core/types.py b/xarray/core/types.py index 2b65f4d23e6..5e88c96ecda 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime from typing import ( TYPE_CHECKING, Any, @@ -15,6 +16,7 @@ ) import numpy as np +import pandas as pd from packaging.version import Version if TYPE_CHECKING: @@ -126,10 +128,12 @@ def dtype(self) -> np.dtype: InterpolantOptions = Literal["barycentric", "krog", "pchip", "spline", "akima"] InterpOptions = Union[Interp1dOptions, InterpolantOptions] +DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64] DatetimeUnitOptions = Literal[ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None ] + QueryEngineOptions = Literal["python", "numexpr", None] QueryParserOptions = Literal["pandas", "python"] diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 35447a39f3c..4a29c616626 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -9,7 +9,7 @@ import xarray as xr from xarray.core.resample_cftime import CFTimeGrouper -pytest.importorskip("cftime") +cftime = pytest.importorskip("cftime") # Create a list of pairs of similar-length initial and resample frequencies @@ -59,14 +59,26 @@ def da(index): @pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x)) @pytest.mark.parametrize("closed", [None, "left", "right"]) @pytest.mark.parametrize("label", [None, "left", "right"]) -@pytest.mark.parametrize("base", [24, 31]) -def test_resample(freqs, closed, label, base) -> None: +@pytest.mark.parametrize( + ("base", "offset"), [(24, None), (31, None), (None, "5S")], ids=lambda x: f"{x}" +) +@pytest.mark.parametrize( + "origin", ["start_day", "start", "end", "end_day", "epoch", (2000, 1, 1, 3)] +) +def test_resample(freqs, closed, label, base, offset, origin) -> None: initial_freq, resample_freq = freqs start = "2000-01-01T12:07:01" index_kwargs = dict(start=start, periods=5, freq=initial_freq) datetime_index = pd.date_range(**index_kwargs) cftime_index = xr.cftime_range(**index_kwargs) + if isinstance(origin, tuple): + origin_pandas = pd.Timestamp(datetime.datetime(*origin)) + origin_cftime = cftime.DatetimeGregorian(*origin) + else: + origin_pandas = origin + origin_cftime = origin + loffset = "12H" try: da_datetime = ( @@ -77,6 +89,8 @@ def test_resample(freqs, closed, label, base) -> None: label=label, base=base, loffset=loffset, + origin=origin_pandas, + offset=offset, ) .mean() ) @@ -88,6 +102,8 @@ def test_resample(freqs, closed, label, base) -> None: label=label, base=base, loffset=loffset, + origin=origin_cftime, + offset=offset, ).mean() else: da_cftime = ( @@ -98,6 +114,8 @@ def test_resample(freqs, closed, label, base) -> None: label=label, base=base, loffset=loffset, + origin=origin_cftime, + offset=offset, ) .mean() ) @@ -153,3 +171,30 @@ def test_calendars(calendar) -> None: # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass da_cftime["time"] = da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) + + +def test_base_and_offset_error(): + cftime_index = xr.cftime_range("2000", periods=5) + da_cftime = da(cftime_index) + with pytest.raises(ValueError, match="base and offset cannot"): + da_cftime.resample(time="2D", base=3, offset="5S") + + +@pytest.mark.parametrize("offset", ["foo", "5MS", 10]) +def test_invalid_offset_error(offset): + cftime_index = xr.cftime_range("2000", periods=5) + da_cftime = da(cftime_index) + with pytest.raises(ValueError, match="offset must be"): + da_cftime.resample(time="2D", offset=offset) + + +def test_timedelta_offset(): + timedelta = datetime.timedelta(seconds=5) + string = "5S" + + cftime_index = xr.cftime_range("2000", periods=5) + da_cftime = da(cftime_index) + + timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean() + string_result = da_cftime.resample(time="2D", offset=string).mean() + xr.testing.assert_identical(timedelta_result, string_result) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d647c82a76b..e22decc58f2 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1810,6 +1810,33 @@ def test_upsample_interpolate_dask(self, chunked_time): # done here due to floating point arithmetic assert_allclose(expected, actual, rtol=1e-16) + def test_resample_base(self): + times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + base = 11 + actual = array.resample(time="24H", base=base).mean() + expected = DataArray(array.to_series().resample("24H", base=base).mean()) + assert_identical(expected, actual) + + def test_resample_offset(self): + times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + offset = pd.Timedelta("11H") + actual = array.resample(time="24H", offset=offset).mean() + expected = DataArray(array.to_series().resample("24H", offset=offset).mean()) + assert_identical(expected, actual) + + def test_resample_origin(self): + times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + origin = "start" + actual = array.resample(time="24H", origin=origin).mean() + expected = DataArray(array.to_series().resample("24H", origin=origin).mean()) + assert_identical(expected, actual) + class TestDatasetResample: def test_resample_and_first(self): From 0339084343a1c3901916f11fd98e09ee0878dbbc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 13 Nov 2022 15:24:11 +0000 Subject: [PATCH 02/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/resample_cftime.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index a9916a76348..e58abc2cebc 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -54,7 +54,6 @@ ) from ..coding.cftimeindex import CFTimeIndex - try: import cftime except ImportError: From ffeb7a8c8c45cdf8c5d3656212d48ec304a64ccf Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 13 Nov 2022 11:31:50 -0500 Subject: [PATCH 03/18] Fix _convert_offset_to_timedelta --- xarray/core/resample_cftime.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index a9916a76348..f17d73096ac 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -54,7 +54,6 @@ ) from ..coding.cftimeindex import CFTimeIndex - try: import cftime except ImportError: @@ -462,7 +461,7 @@ def exact_cftime_datetime_difference(a, b): def _convert_offset_to_timedelta(offset): if isinstance(offset, datetime.timedelta): return offset - elif isinstance(offset, str): + elif isinstance(offset, (str, *CFTIME_TICKS)): return to_offset(offset).as_timedelta() else: raise ValueError From 9590458011ddf5fbc9d11fc1b128cb3e6c54424e Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Wed, 23 Nov 2022 09:42:13 -0500 Subject: [PATCH 04/18] Reduce number of tests --- xarray/tests/test_cftimeindex_resample.py | 153 ++++++++++++++-------- 1 file changed, 97 insertions(+), 56 deletions(-) diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 4a29c616626..cf1270482fb 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -50,28 +50,17 @@ ] -def da(index): - return xr.DataArray( - np.arange(100.0, 100.0 + index.size), coords=[index], dims=["time"] - ) - - -@pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x)) -@pytest.mark.parametrize("closed", [None, "left", "right"]) -@pytest.mark.parametrize("label", [None, "left", "right"]) -@pytest.mark.parametrize( - ("base", "offset"), [(24, None), (31, None), (None, "5S")], ids=lambda x: f"{x}" -) -@pytest.mark.parametrize( - "origin", ["start_day", "start", "end", "end_day", "epoch", (2000, 1, 1, 3)] -) -def test_resample(freqs, closed, label, base, offset, origin) -> None: - initial_freq, resample_freq = freqs - start = "2000-01-01T12:07:01" - index_kwargs = dict(start=start, periods=5, freq=initial_freq) - datetime_index = pd.date_range(**index_kwargs) - cftime_index = xr.cftime_range(**index_kwargs) - +def compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + freq, + closed=None, + label=None, + base=None, + offset=None, + origin=None, + loffset=None, +): if isinstance(origin, tuple): origin_pandas = pd.Timestamp(datetime.datetime(*origin)) origin_cftime = cftime.DatetimeGregorian(*origin) @@ -79,25 +68,20 @@ def test_resample(freqs, closed, label, base, offset, origin) -> None: origin_pandas = origin origin_cftime = origin - loffset = "12H" try: - da_datetime = ( - da(datetime_index) - .resample( - time=resample_freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - origin=origin_pandas, - offset=offset, - ) - .mean() - ) + result_datetimeindex = da_datetimeindex.resample( + time=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + offset=offset, + origin=origin_pandas, + ).mean() except ValueError: with pytest.raises(ValueError): - da(cftime_index).resample( - time=resample_freq, + da_cftimeindex.resample( + time=freq, closed=closed, label=label, base=base, @@ -106,24 +90,56 @@ def test_resample(freqs, closed, label, base, offset, origin) -> None: offset=offset, ).mean() else: - da_cftime = ( - da(cftime_index) - .resample( - time=resample_freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - origin=origin_cftime, - offset=offset, - ) - .mean() - ) - # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass - da_cftime["time"] = ( - da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() - ) - xr.testing.assert_identical(da_cftime, da_datetime) + result_cftimeindex = da_cftimeindex.resample( + time=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + origin=origin_cftime, + offset=offset, + ).mean() + # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass + result_cftimeindex["time"] = ( + result_cftimeindex.xindexes["time"].to_pandas_index().to_datetimeindex() + ) + xr.testing.assert_identical(result_cftimeindex, result_datetimeindex) + + +def da(index): + return xr.DataArray( + np.arange(100.0, 100.0 + index.size), coords=[index], dims=["time"] + ) + + +@pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x)) +@pytest.mark.parametrize("closed", [None, "left", "right"]) +@pytest.mark.parametrize("label", [None, "left", "right"]) +@pytest.mark.parametrize( + ("base", "offset"), [(24, None), (31, None), (None, "5S")], ids=lambda x: f"{x}" +) +def test_resample(freqs, closed, label, base, offset, origin) -> None: + initial_freq, resample_freq = freqs + start = "2000-01-01T12:07:01" + loffset = "12H" + origin = "start" + index_kwargs = dict(start=start, periods=5, freq=initial_freq) + datetime_index = pd.date_range(**index_kwargs) + cftime_index = xr.cftime_range(**index_kwargs) + da_datetimeindex = da(datetime_index) + da_cftimeindex = da(cftime_index) + + compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + resample_freq, + closed=closed, + label=label, + base=base, + offset=offset, + origin=origin, + loffset=loffset, + ) @pytest.mark.parametrize( @@ -173,6 +189,31 @@ def test_calendars(calendar) -> None: xr.testing.assert_identical(da_cftime, da_datetime) +@pytest.mark.parametrize("closed", ["left", "right"]) +@pytest.mark.parametrize( + "origin", + ["start_day", "start", "end", "end_day", "epoch", (1970, 1, 1, 3, 2)], + ids=lambda x: f"{x}", +) +def test_origin(closed, origin): + initial_freq, resample_freq = ("3H", "9H") + start = "1969-12-31T12:07:01" + index_kwargs = dict(start=start, periods=12, freq=initial_freq) + datetime_index = pd.date_range(**index_kwargs) + cftime_index = xr.cftime_range(**index_kwargs) + da_datetimeindex = da(datetime_index) + da_cftimeindex = da(cftime_index) + + compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + resample_freq, + closed=closed, + origin=origin, + ) + assert 1 == 0 + + def test_base_and_offset_error(): cftime_index = xr.cftime_range("2000", periods=5) da_cftime = da(cftime_index) From 6db613e06cf3b8062c18ccdc9464e63f8f4056cc Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Wed, 23 Nov 2022 09:50:46 -0500 Subject: [PATCH 05/18] Address initial review comments --- xarray/core/common.py | 8 +++++++- xarray/core/resample_cftime.py | 4 ++-- xarray/tests/test_cftimeindex_resample.py | 10 +++++----- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 75927cad160..45a2094751a 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -981,7 +981,13 @@ def _resample( from .resample_cftime import CFTimeGrouper grouper = CFTimeGrouper( - freq, closed, label, base, loffset, origin, offset + freq=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + origin=origin, + offset=offset, ) else: grouper = pd.Grouper( diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index f17d73096ac..82c81da6cfc 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -115,11 +115,11 @@ def __init__( if self.offset is not None: try: self.offset = _convert_offset_to_timedelta(self.offset) - except (ValueError, AttributeError): + except (ValueError, AttributeError) as error: raise ValueError( f"offset must be a datetime.timedelta object or an offset string " f"that can be converted to a timedelta. Got {offset} instead." - ) + ) from error def first_items(self, index): """Meant to reproduce the results of the following diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index cf1270482fb..d7ee4e9c712 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -60,7 +60,7 @@ def compare_against_pandas( offset=None, origin=None, loffset=None, -): +) -> None: if isinstance(origin, tuple): origin_pandas = pd.Timestamp(datetime.datetime(*origin)) origin_cftime = cftime.DatetimeGregorian(*origin) @@ -106,7 +106,7 @@ def compare_against_pandas( xr.testing.assert_identical(result_cftimeindex, result_datetimeindex) -def da(index): +def da(index) -> xr.DataArray: return xr.DataArray( np.arange(100.0, 100.0 + index.size), coords=[index], dims=["time"] ) @@ -195,7 +195,7 @@ def test_calendars(calendar) -> None: ["start_day", "start", "end", "end_day", "epoch", (1970, 1, 1, 3, 2)], ids=lambda x: f"{x}", ) -def test_origin(closed, origin): +def test_origin(closed, origin) -> None: initial_freq, resample_freq = ("3H", "9H") start = "1969-12-31T12:07:01" index_kwargs = dict(start=start, periods=12, freq=initial_freq) @@ -222,14 +222,14 @@ def test_base_and_offset_error(): @pytest.mark.parametrize("offset", ["foo", "5MS", 10]) -def test_invalid_offset_error(offset): +def test_invalid_offset_error(offset) -> None: cftime_index = xr.cftime_range("2000", periods=5) da_cftime = da(cftime_index) with pytest.raises(ValueError, match="offset must be"): da_cftime.resample(time="2D", offset=offset) -def test_timedelta_offset(): +def test_timedelta_offset() -> None: timedelta = datetime.timedelta(seconds=5) string = "5S" From 85eb312eed98b3c2b8e08be329af28eb35469fb1 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Wed, 23 Nov 2022 09:56:19 -0500 Subject: [PATCH 06/18] Add more typing information --- xarray/core/resample_cftime.py | 5 ++++- xarray/tests/test_groupby.py | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 82c81da6cfc..80f435b25db 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -44,6 +44,7 @@ from ..coding.cftime_offsets import ( CFTIME_TICKS, + BaseCFTimeOffset, Day, MonthEnd, QuarterEnd, @@ -458,7 +459,9 @@ def exact_cftime_datetime_difference(a, b): return datetime.timedelta(seconds=seconds, microseconds=microseconds) -def _convert_offset_to_timedelta(offset): +def _convert_offset_to_timedelta( + offset: datetime.timedelta | str | BaseCFTimeOffset, +) -> datetime.timedelta: if isinstance(offset, datetime.timedelta): return offset elif isinstance(offset, (str, *CFTIME_TICKS)): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index e22decc58f2..063dc22e633 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1810,7 +1810,7 @@ def test_upsample_interpolate_dask(self, chunked_time): # done here due to floating point arithmetic assert_allclose(expected, actual, rtol=1e-16) - def test_resample_base(self): + def test_resample_base(self) -> None: times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) array = DataArray(np.arange(10), [("time", times)]) @@ -1819,7 +1819,7 @@ def test_resample_base(self): expected = DataArray(array.to_series().resample("24H", base=base).mean()) assert_identical(expected, actual) - def test_resample_offset(self): + def test_resample_offset(self) -> None: times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) array = DataArray(np.arange(10), [("time", times)]) @@ -1828,7 +1828,7 @@ def test_resample_offset(self): expected = DataArray(array.to_series().resample("24H", offset=offset).mean()) assert_identical(expected, actual) - def test_resample_origin(self): + def test_resample_origin(self) -> None: times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) array = DataArray(np.arange(10), [("time", times)]) From 04b163300ea78d7b9e1fc80e106b0a27d8f26890 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Wed, 23 Nov 2022 10:02:53 -0500 Subject: [PATCH 07/18] Make cftime import lazy --- xarray/core/resample_cftime.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 80f435b25db..4645792dc8f 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -41,6 +41,7 @@ import numpy as np import pandas as pd +from core.utils import module_available from ..coding.cftime_offsets import ( CFTIME_TICKS, @@ -55,11 +56,6 @@ ) from ..coding.cftimeindex import CFTimeIndex -try: - import cftime -except ImportError: - cftime = None - class CFTimeGrouper: """This is a simple container for the grouping parameters that implements a @@ -366,7 +362,9 @@ def _adjust_dates_anchored( A datetime object representing the end of a date range that has been adjusted to fix resampling errors. """ - if cftime is None: + if module_available("cftime"): + import cftime + else: raise ModuleNotFoundError("No module named 'cftime'") if origin == "start_day": From 23b3fb6a93732ca5cb6b5ae3423943540f9f4eb7 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Wed, 23 Nov 2022 10:05:56 -0500 Subject: [PATCH 08/18] Fix module_available import and test --- xarray/core/resample_cftime.py | 2 +- xarray/tests/test_cftimeindex_resample.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 4645792dc8f..43bd716ce05 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -41,7 +41,6 @@ import numpy as np import pandas as pd -from core.utils import module_available from ..coding.cftime_offsets import ( CFTIME_TICKS, @@ -55,6 +54,7 @@ to_offset, ) from ..coding.cftimeindex import CFTimeIndex +from .utils import module_available class CFTimeGrouper: diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index d7ee4e9c712..25fe2738c0e 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -211,7 +211,6 @@ def test_origin(closed, origin) -> None: closed=closed, origin=origin, ) - assert 1 == 0 def test_base_and_offset_error(): From 0ac422faa3c0ff0864224fef4e10809ac3bf5b3c Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Wed, 23 Nov 2022 10:42:18 -0500 Subject: [PATCH 09/18] Remove old origin argument --- xarray/tests/test_cftimeindex_resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 25fe2738c0e..e780421e09e 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -118,7 +118,7 @@ def da(index) -> xr.DataArray: @pytest.mark.parametrize( ("base", "offset"), [(24, None), (31, None), (None, "5S")], ids=lambda x: f"{x}" ) -def test_resample(freqs, closed, label, base, offset, origin) -> None: +def test_resample(freqs, closed, label, base, offset) -> None: initial_freq, resample_freq = freqs start = "2000-01-01T12:07:01" loffset = "12H" From 4db89cfdbb2edc6712ab469eaf8df44ac528a3d0 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Fri, 25 Nov 2022 16:06:20 -0500 Subject: [PATCH 10/18] Add type annotations for resample_cftime.py --- xarray/core/resample_cftime.py | 60 +++++++++++++++++++++++----------- xarray/core/types.py | 11 ++++++- 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 43bd716ce05..a1891946a11 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -54,7 +54,7 @@ to_offset, ) from ..coding.cftimeindex import CFTimeIndex -from .utils import module_available +from .types import CFTimeDatetime, SideOptions class CFTimeGrouper: @@ -64,13 +64,13 @@ class CFTimeGrouper: def __init__( self, - freq, - closed=None, - label=None, - base=None, - loffset=None, - origin="start_day", - offset=None, + freq: str | BaseCFTimeOffset, + closed: SideOptions = None, + label: SideOptions = None, + base: int | None = None, + loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None, + origin: str | CFTimeDatetime = "start_day", + offset: str | datetime.timedelta | None = None, ): if base is not None and offset is not None: raise ValueError("base and offset cannot be provided at the same time") @@ -118,7 +118,7 @@ def __init__( f"that can be converted to a timedelta. Got {offset} instead." ) from error - def first_items(self, index): + def first_items(self, index: CFTimeIndex): """Meant to reproduce the results of the following grouper = pandas.Grouper(...) @@ -151,7 +151,14 @@ def first_items(self, index): return first_items.where(non_duplicate) -def _get_time_bins(index, freq, closed, label, origin, offset): +def _get_time_bins( + index: CFTimeIndex, + freq: BaseCFTimeOffset, + closed: SideOptions, + label: SideOptions, + origin: str | CFTimeDatetime, + offset: str | datetime.timedelta, +): """Obtain the bins and their respective labels for resampling operations. Parameters @@ -220,7 +227,13 @@ def _get_time_bins(index, freq, closed, label, origin, offset): return datetime_bins, labels -def _adjust_bin_edges(datetime_bins, freq, closed, index, labels): +def _adjust_bin_edges( + datetime_bins: np.array, + freq: BaseCFTimeOffset, + closed: SideOptions, + index: CFTimeIndex, + labels: np.array, +): """This is required for determining the bin edges resampling with daily frequencies greater than one day, month end, and year end frequencies. @@ -268,7 +281,14 @@ def _adjust_bin_edges(datetime_bins, freq, closed, index, labels): return datetime_bins, labels -def _get_range_edges(first, last, freq, closed="left", origin="start_day", offset=None): +def _get_range_edges( + first: CFTimeDatetime, + last: CFTimeDatetime, + freq, + closed: SideOptions = "left", + origin: str | CFTimeDatetime = "start_day", + offset: str | datetime.timedelta | None = None, +): """Get the correct starting and ending datetimes for the resampled CFTimeIndex range. @@ -321,7 +341,12 @@ def _get_range_edges(first, last, freq, closed="left", origin="start_day", offse def _adjust_dates_anchored( - first, last, freq, closed="right", origin="start_day", offset=None + first: CFTimeDatetime, + last: CFTimeDatetime, + freq: BaseCFTimeOffset, + closed: SideOptions = "right", + origin: str | CFTimeDatetime = "start_day", + offset: str | datetime.timedelta | None = None, ): """First and last offsets should be calculated from the start day to fix an error cause by resampling across multiple days when a one day period is @@ -362,10 +387,7 @@ def _adjust_dates_anchored( A datetime object representing the end of a date range that has been adjusted to fix resampling errors. """ - if module_available("cftime"): - import cftime - else: - raise ModuleNotFoundError("No module named 'cftime'") + import cftime if origin == "start_day": origin_date = normalize_date(first) @@ -417,7 +439,7 @@ def _adjust_dates_anchored( return fresult, lresult -def exact_cftime_datetime_difference(a, b): +def exact_cftime_datetime_difference(a: CFTimeDatetime, b: CFTimeDatetime): """Exact computation of b - a Assumes: @@ -468,6 +490,6 @@ def _convert_offset_to_timedelta( raise ValueError -def _ceil_via_cftimeindex(date, freq): +def _ceil_via_cftimeindex(date: CFTimeDatetime, freq: BaseCFTimeOffset): index = CFTimeIndex([date]) return index.ceil(freq).item() diff --git a/xarray/core/types.py b/xarray/core/types.py index 5e88c96ecda..e344229c4d1 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -19,6 +19,8 @@ import pandas as pd from packaging.version import Version +from .utils import module_available + if TYPE_CHECKING: from numpy.typing import ArrayLike @@ -87,6 +89,13 @@ def dtype(self) -> np.dtype: Self: Any = None DTypeLikeSave: Any = None +if module_available("cftime"): + import cftime + + CFTimeDatetime = cftime.datetime +else: + CFTimeDatetime = Any + T_Backend = TypeVar("T_Backend", bound="BackendEntrypoint") T_Dataset = TypeVar("T_Dataset", bound="Dataset") @@ -128,7 +137,7 @@ def dtype(self) -> np.dtype: InterpolantOptions = Literal["barycentric", "krog", "pchip", "spline", "akima"] InterpOptions = Union[Interp1dOptions, InterpolantOptions] -DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64] +DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64, CFTimeDatetime] DatetimeUnitOptions = Literal[ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None ] From 92c949eecb73f7ed9af4bc80aab091f7c91943b2 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Fri, 25 Nov 2022 16:07:40 -0500 Subject: [PATCH 11/18] Add None as a possibility for closed and label --- xarray/core/resample_cftime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index a1891946a11..c6073dc817b 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -65,8 +65,8 @@ class CFTimeGrouper: def __init__( self, freq: str | BaseCFTimeOffset, - closed: SideOptions = None, - label: SideOptions = None, + closed: SideOptions | None = None, + label: SideOptions | None = None, base: int | None = None, loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None, origin: str | CFTimeDatetime = "start_day", From fb724ce6649c99058ec9b095ac161e7c5b2430b7 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Fri, 25 Nov 2022 16:15:56 -0500 Subject: [PATCH 12/18] Add what's new entry --- doc/whats-new.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 97936dff700..b957bde47c0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,9 @@ v2022.11.1 (unreleased) New Features ~~~~~~~~~~~~ - +- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample` + and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`6538`). By `Spencer + Clark `_. Breaking changes ~~~~~~~~~~~~~~~~ From e2fb20f9553474f1ee4bb4ac2edab2226a7f3a40 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Fri, 25 Nov 2022 16:17:25 -0500 Subject: [PATCH 13/18] Add missing type annotation --- xarray/core/resample_cftime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index c6073dc817b..a377356b8e4 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -284,7 +284,7 @@ def _adjust_bin_edges( def _get_range_edges( first: CFTimeDatetime, last: CFTimeDatetime, - freq, + freq: BaseCFTimeOffset, closed: SideOptions = "left", origin: str | CFTimeDatetime = "start_day", offset: str | datetime.timedelta | None = None, From cd655dfac5a053962e897b65bab8ba3d1355ce72 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Fri, 25 Nov 2022 16:19:41 -0500 Subject: [PATCH 14/18] Delete added line --- xarray/core/types.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/types.py b/xarray/core/types.py index e344229c4d1..424d510d677 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -142,7 +142,6 @@ def dtype(self) -> np.dtype: "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None ] - QueryEngineOptions = Literal["python", "numexpr", None] QueryParserOptions = Literal["pandas", "python"] From 572799e868752ebe5d2323015a43ce8bf04bf511 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Fri, 25 Nov 2022 18:49:30 -0500 Subject: [PATCH 15/18] Fix typing errors --- xarray/coding/cftime_offsets.py | 3 ++ xarray/core/resample_cftime.py | 78 ++++++++++++++++++++------------- xarray/core/types.py | 19 ++++---- 3 files changed, 60 insertions(+), 40 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a029f39c7b8..70d05a845be 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -207,6 +207,9 @@ def __mul__(self, other): return new_self * other return type(self)(n=other * self.n) + def as_timedelta(self): + raise NotImplementedError + def _get_day_of_month(other, day_option): """Find the day in `other`'s month that satisfies a BaseCFTimeOffset's diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index a377356b8e4..da21fdd17cf 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -38,23 +38,27 @@ from __future__ import annotations import datetime +import typing import numpy as np import pandas as pd from ..coding.cftime_offsets import ( - CFTIME_TICKS, BaseCFTimeOffset, Day, MonthEnd, QuarterEnd, + Tick, YearEnd, cftime_range, normalize_date, to_offset, ) from ..coding.cftimeindex import CFTimeIndex -from .types import CFTimeDatetime, SideOptions +from .types import SideOptions + +if typing.TYPE_CHECKING: + from .types import CFTimeDatetime class CFTimeGrouper: @@ -72,25 +76,26 @@ def __init__( origin: str | CFTimeDatetime = "start_day", offset: str | datetime.timedelta | None = None, ): + self.offset: datetime.timedelta | None + self.closed: SideOptions + self.label: SideOptions + if base is not None and offset is not None: raise ValueError("base and offset cannot be provided at the same time") self.freq = to_offset(freq) - self.closed = closed - self.label = label self.loffset = loffset self.origin = origin - if base is not None and isinstance(self.freq, CFTIME_TICKS): - self.offset = type(self.freq)(n=base % self.freq.n) - else: - self.offset = offset - if isinstance(self.freq, (MonthEnd, QuarterEnd, YearEnd)): - if self.closed is None: + if closed is None: self.closed = "right" - if self.label is None: + else: + self.closed = closed + if label is None: self.label = "right" + else: + self.label = label else: # The backward resample sets ``closed`` to ``'right'`` by default # since the last value should be considered as the edge point for @@ -99,24 +104,37 @@ def __init__( # from the current ``cftime.datetime`` minus ``freq`` to the current # ``cftime.datetime`` with a right close. if self.origin in ["end", "end_day"]: - if self.closed is None: + if closed is None: self.closed = "right" - if self.label is None: + else: + self.closed = closed + if label is None: self.label = "right" + else: + self.label = label else: - if self.closed is None: + if closed is None: self.closed = "left" - if self.label is None: + else: + self.closed = closed + if label is None: self.label = "left" + else: + self.label = label + + if base is not None and isinstance(self.freq, Tick): + offset = type(self.freq)(n=base % self.freq.n).as_timedelta() - if self.offset is not None: + if offset is not None: try: - self.offset = _convert_offset_to_timedelta(self.offset) + self.offset = _convert_offset_to_timedelta(offset) except (ValueError, AttributeError) as error: raise ValueError( f"offset must be a datetime.timedelta object or an offset string " f"that can be converted to a timedelta. Got {offset} instead." ) from error + else: + self.offset = None def first_items(self, index: CFTimeIndex): """Meant to reproduce the results of the following @@ -157,7 +175,7 @@ def _get_time_bins( closed: SideOptions, label: SideOptions, origin: str | CFTimeDatetime, - offset: str | datetime.timedelta, + offset: datetime.timedelta | None, ): """Obtain the bins and their respective labels for resampling operations. @@ -169,11 +187,11 @@ def _get_time_bins( The offset object representing target conversion a.k.a. resampling frequency (e.g., 'MS', '2D', 'H', or '3T' with coding.cftime_offsets.to_offset() applied to it). - closed : 'left' or 'right', optional + closed : 'left' or 'right' Which side of bin interval is closed. The default is 'left' for all frequency offsets except for 'M' and 'A', which have a default of 'right'. - label : 'left' or 'right', optional + label : 'left' or 'right' Which bin edge label to label bucket with. The default is 'left' for all frequency offsets except for 'M' and 'A', which have a default of 'right'. @@ -228,11 +246,11 @@ def _get_time_bins( def _adjust_bin_edges( - datetime_bins: np.array, + datetime_bins: np.ndarray, freq: BaseCFTimeOffset, closed: SideOptions, index: CFTimeIndex, - labels: np.array, + labels: np.ndarray, ): """This is required for determining the bin edges resampling with daily frequencies greater than one day, month end, and year end @@ -287,7 +305,7 @@ def _get_range_edges( freq: BaseCFTimeOffset, closed: SideOptions = "left", origin: str | CFTimeDatetime = "start_day", - offset: str | datetime.timedelta | None = None, + offset: datetime.timedelta | None = None, ): """Get the correct starting and ending datetimes for the resampled CFTimeIndex range. @@ -304,7 +322,7 @@ def _get_range_edges( The offset object representing target conversion a.k.a. resampling frequency. Contains information on offset type (e.g. Day or 'D') and offset magnitude (e.g., n = 3). - closed : 'left' or 'right', optional + closed : 'left' or 'right' Which side of bin interval is closed. Defaults to 'left'. origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' The datetime on which to adjust the grouping. The timezone of origin @@ -326,7 +344,7 @@ def _get_range_edges( last : cftime.datetime Corrected ending datetime object for resampled CFTimeIndex range. """ - if isinstance(freq, CFTIME_TICKS): + if isinstance(freq, Tick): first, last = _adjust_dates_anchored( first, last, freq, closed=closed, origin=origin, offset=offset ) @@ -343,10 +361,10 @@ def _get_range_edges( def _adjust_dates_anchored( first: CFTimeDatetime, last: CFTimeDatetime, - freq: BaseCFTimeOffset, + freq: Tick, closed: SideOptions = "right", origin: str | CFTimeDatetime = "start_day", - offset: str | datetime.timedelta | None = None, + offset: datetime.timedelta | None = None, ): """First and last offsets should be calculated from the start day to fix an error cause by resampling across multiple days when a one day period is @@ -363,7 +381,7 @@ def _adjust_dates_anchored( The offset object representing target conversion a.k.a. resampling frequency. Contains information on offset type (e.g. Day or 'D') and offset magnitude (e.g., n = 3). - closed : 'left' or 'right', optional + closed : 'left' or 'right' Which side of bin interval is closed. Defaults to 'right'. origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' The datetime on which to adjust the grouping. The timezone of origin @@ -484,12 +502,12 @@ def _convert_offset_to_timedelta( ) -> datetime.timedelta: if isinstance(offset, datetime.timedelta): return offset - elif isinstance(offset, (str, *CFTIME_TICKS)): + elif isinstance(offset, (str, Tick)): return to_offset(offset).as_timedelta() else: raise ValueError -def _ceil_via_cftimeindex(date: CFTimeDatetime, freq: BaseCFTimeOffset): +def _ceil_via_cftimeindex(date: CFTimeDatetime, freq: str | BaseCFTimeOffset): index = CFTimeIndex([date]) return index.ceil(freq).item() diff --git a/xarray/core/types.py b/xarray/core/types.py index 424d510d677..b18c8d7ba04 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -19,8 +19,6 @@ import pandas as pd from packaging.version import Version -from .utils import module_available - if TYPE_CHECKING: from numpy.typing import ArrayLike @@ -84,17 +82,19 @@ def dtype(self) -> np.dtype: # anything with a dtype attribute _SupportsDType, ] - + try: + from cftime import datetime as CFTimeDatetime + except ImportError: + CFTimeDatetime = Any + DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64, CFTimeDatetime] else: Self: Any = None DTypeLikeSave: Any = None -if module_available("cftime"): - import cftime - - CFTimeDatetime = cftime.datetime -else: - CFTimeDatetime = Any +# if module_available("cftime"): +# import cftime.datetime as CFTimeDatetime +# else: +# CFTimeDatetime = Any T_Backend = TypeVar("T_Backend", bound="BackendEntrypoint") @@ -137,7 +137,6 @@ def dtype(self) -> np.dtype: InterpolantOptions = Literal["barycentric", "krog", "pchip", "spline", "akima"] InterpOptions = Union[Interp1dOptions, InterpolantOptions] -DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64, CFTimeDatetime] DatetimeUnitOptions = Literal[ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None ] From 3e74c4ad3d67d1a6e573029f99e4bf0b35137b06 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Fri, 25 Nov 2022 18:52:28 -0500 Subject: [PATCH 16/18] Add comment and test for as_timedelta stub --- xarray/coding/cftime_offsets.py | 1 + xarray/tests/test_cftime_offsets.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 70d05a845be..04b2d773e2e 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -208,6 +208,7 @@ def __mul__(self, other): return type(self)(n=other * self.n) def as_timedelta(self): + """All Tick subclasses must implement an as_timedelta method.""" raise NotImplementedError diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 075393e84e7..d28f4594559 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1385,3 +1385,9 @@ def test_date_range_like_errors(): match="'source' must be a 1D array of datetime objects for inferring its range.", ): date_range_like(da, "noleap") + + +def as_timedelta_not_implemented_error(): + tick = Tick() + with pytest.raises(NotImplementedError): + tick.as_timedelta() From cdc59c30adb08ba78bf64b6cd843e415bdcd3b28 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Fri, 25 Nov 2022 18:54:30 -0500 Subject: [PATCH 17/18] Remove old code --- xarray/core/types.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/xarray/core/types.py b/xarray/core/types.py index b18c8d7ba04..be3e34bf521 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -91,11 +91,6 @@ def dtype(self) -> np.dtype: Self: Any = None DTypeLikeSave: Any = None -# if module_available("cftime"): -# import cftime.datetime as CFTimeDatetime -# else: -# CFTimeDatetime = Any - T_Backend = TypeVar("T_Backend", bound="BackendEntrypoint") T_Dataset = TypeVar("T_Dataset", bound="Dataset") From 4dbf69482e3f647e7e64f8c17a816ce3970c279e Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 28 Nov 2022 16:17:10 -0700 Subject: [PATCH 18/18] [test-upstream]