Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xr.infer_freq #4033

Merged
merged 20 commits into from
May 30, 2020
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Top-level functions
combine_nested
where
set_options
infer_freq
full_like
zeros_like
ones_like
Expand Down
9 changes: 9 additions & 0 deletions doc/weather-climate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ instance, we can create the same dates and DataArray we created above using:
dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap")
da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo")

Mirroring pandas' method with the same name, :py:meth:`~xarray.infer_freq` allows to
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
infer the sampling frequency of a :py:class:`~xarray.CFTimeIndex` or a 1-D
:py:class:`~xarray.DataArray` containing cftime objects. It also works tranparently with
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
``np.datetime64[ns]`` and ``np.timedelta64[ns]`` data.

.. ipython:: python

xr.infer_freq(dates)

With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from
the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the
:py:meth:`~xarray.DataArray.dt` accessor for a :py:class:`~xarray.DataArray`
Expand Down
3 changes: 2 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ Enhancements

New Features
~~~~~~~~~~~~

- Added :py:meth:`xr.infer_freq` for extending frequency inferring to CFTime indexes and data.
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
By `Pascal Bourgault <https://github.com/aulemahal>`_.
- ``chunks='auto'`` is now supported in the ``chunks`` argument of
:py:meth:`Dataset.chunk`. (:issue:`4055`)
By `Andrew Williams <https://github.com/AndrewWilliams3142>`_
Expand Down
2 changes: 2 additions & 0 deletions xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .backends.zarr import open_zarr
from .coding.cftime_offsets import cftime_range
from .coding.cftimeindex import CFTimeIndex
from .coding.frequencies import infer_freq
from .conventions import SerializationWarning, decode_cf
from .core.alignment import align, broadcast
from .core.combine import auto_combine, combine_by_coords, combine_nested
Expand Down Expand Up @@ -57,6 +58,7 @@
"cov",
"corr",
"full_like",
"infer_freq",
"load_dataarray",
"load_dataset",
"map_blocks",
Expand Down
3 changes: 2 additions & 1 deletion xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,8 @@ def asi8(self):
[
_total_microseconds(exact_cftime_datetime_difference(epoch, date))
for date in self.values
]
],
dtype=np.int64,
)

def _round_via_method(self, freq, method):
Expand Down
272 changes: 272 additions & 0 deletions xarray/coding/frequencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
"""FrequencyInferer analog for cftime.datetime objects"""
# The infer_freq method and the _CFTimeFrequencyInferer
# subclass defined here were copied and adapted for
# use with cftime.datetime objects based on the source code in
# pandas.tseries.Frequencies._FrequencyInferer

# For reference, here is a copy of the pandas copyright notice:

# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
# All rights reserved.

# Copyright (c) 2008-2011 AQR Capital Management, LLC
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:

# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.

# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.

# * Neither the name of the copyright holder nor the names of any
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
import pandas as pd

from ..core.common import _contains_datetime_like_objects
from .cftime_offsets import _MONTH_ABBREVIATIONS
from .cftimeindex import CFTimeIndex

_ONE_MICRO = 1
_ONE_MILLI = _ONE_MICRO * 1000
_ONE_SECOND = _ONE_MILLI * 1000
_ONE_MINUTE = 60 * _ONE_SECOND
_ONE_HOUR = 60 * _ONE_MINUTE
_ONE_DAY = 24 * _ONE_HOUR


def infer_freq(index):
"""
Infer the most likely frequency given the input index.

Parameters
----------
index : CFTimeIndex, DataArray, pd.DatetimeIndex, pd.TimedeltaIndex, pd.Series
If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`.
If passed a Series or a DataArray will use the values of the series (NOT THE INDEX).
aulemahal marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
str or None
None if no discernible frequency.

Raises
------
TypeError
If the index is not datetime-like.
ValueError
If there are fewer than three values or the index is not 1D.
"""
from xarray.core.dataarray import DataArray

if isinstance(index, (DataArray, pd.Series)):
if index.ndim != 1:
raise ValueError("'index' must be 1D")
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
elif not _contains_datetime_like_objects(DataArray(index)):
spencerkclark marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("'index' must contain datetime-like objects")
dtype = np.asarray(index).dtype
if dtype == "datetime64[ns]":
index = pd.DatetimeIndex(index.values)
elif dtype == "timedelta64[ns]":
index = pd.TimedeltaIndex(index.values)
else:
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
index = CFTimeIndex(index.values)

if isinstance(index, CFTimeIndex):
inferer = _CFTimeFrequencyInferer(index)
return inferer.get_freq()

return pd.infer_freq(index)


class _CFTimeFrequencyInferer: # (pd.tseries.frequencies._FrequencyInferer):
def __init__(self, index):
self.index = index
self.values = index.asi8

if len(index) < 3:
raise ValueError("Need at least 3 dates to infer frequency")

self.is_monotonic = (
self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing
)

self._deltas = None
self._year_deltas = None
self._month_deltas = None

def get_freq(self):
"""Find the appropriate frequency string to describe the inferred frequency of self.index

Adapted from `pandas.tsseries.frequencies._FrequencyInferer.get_freq` for CFTimeIndexes.

Returns
-------
str or None
"""
if not self.is_monotonic or not self.index.is_unique:
return None

delta = self.deltas[0] # Smallest delta
if _is_multiple(delta, _ONE_DAY):
return self._infer_daily_rule()
# There is no possible intraday frequency with a non-unique delta
# Different from pandas: we don't need to manage DST and business offsets in cftime
elif not len(self.deltas) == 1:
spencerkclark marked this conversation as resolved.
Show resolved Hide resolved
return None

if _is_multiple(delta, _ONE_HOUR):
return _maybe_add_count("H", delta / _ONE_HOUR)
elif _is_multiple(delta, _ONE_MINUTE):
return _maybe_add_count("T", delta / _ONE_MINUTE)
elif _is_multiple(delta, _ONE_SECOND):
return _maybe_add_count("S", delta / _ONE_SECOND)
elif _is_multiple(delta, _ONE_MILLI):
return _maybe_add_count("L", delta / _ONE_MILLI)
else:
return _maybe_add_count("U", delta / _ONE_MICRO)

def _infer_daily_rule(self):
annual_rule = self._get_annual_rule()
if annual_rule:
nyears = self.year_deltas[0]
month = _MONTH_ABBREVIATIONS[self.index[0].month]
alias = f"{annual_rule}-{month}"
return _maybe_add_count(alias, nyears)

quartely_rule = self._get_quartely_rule()
if quartely_rule:
nquarters = self.month_deltas[0] / 3
mod_dict = {0: 12, 2: 11, 1: 10}
month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]]
alias = f"{quartely_rule}-{month}"
return _maybe_add_count(alias, nquarters)

monthly_rule = self._get_monthly_rule()
if monthly_rule:
return _maybe_add_count(monthly_rule, self.month_deltas[0])

if len(self.deltas) == 1:
# Daily as there is no "Weekly" offsets with CFTime
days = self.deltas[0] / _ONE_DAY
return _maybe_add_count("D", days)

# CFTime has no business freq and no "week of month" (WOM)
return None

def _get_annual_rule(self):
if len(self.year_deltas) > 1:
return None

if len(np.unique(self.index.month)) > 1:
return None

return {"cs": "AS", "ce": "A"}.get(month_anchor_check(self.index))

def _get_quartely_rule(self):
if len(self.month_deltas) > 1:
return None

if not self.month_deltas[0] % 3 == 0:
return None

return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index))

def _get_monthly_rule(self):
if len(self.month_deltas) > 1:
return None

return {"cs": "MS", "ce": "M"}.get(month_anchor_check(self.index))

@property
def deltas(self):
"""Sorted unique timedeltas as microseconds."""
if self._deltas is None:
self._deltas = _unique_deltas(self.values)
return self._deltas

@property
def year_deltas(self):
"""Sorted unique year deltas."""
if self._year_deltas is None:
self._year_deltas = _unique_deltas(self.index.year)
return self._year_deltas

@property
def month_deltas(self):
"""Sorted unique month deltas."""
if self._month_deltas is None:
self._month_deltas = _unique_deltas(self.index.year * 12 + self.index.month)
return self._month_deltas


def _unique_deltas(arr):
"""Sorted unique deltas of numpy array"""
return np.sort(np.unique(np.diff(arr)))


def _is_multiple(us, mult: int):
"""Whether us is a multiple of mult"""
return us % mult == 0


def _maybe_add_count(base: str, count: float):
"""If count is greater than 1, add it to the base offset string"""
if count != 1:
assert count == int(count)
count = int(count)
return f"{count}{base}"
else:
return base


def month_anchor_check(dates):
"""Return the monthly offset string.

Return "cs" if all dates are the first days of the month,
"ce" if all dates are the last day of the month,
None otherwise.

Replicated pandas._libs.tslibs.resolution.month_position_check
but without business offset handling.
"""
calendar_end = True
calendar_start = True

for date in dates:
if calendar_start:
calendar_start &= date.day == 1

if calendar_end:
cal = date.day == date.daysinmonth
if calendar_end:
calendar_end &= cal
elif not calendar_start:
break

if calendar_end:
return "ce"
elif calendar_start:
return "cs"
else:
return None
57 changes: 57 additions & 0 deletions xarray/tests/test_cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,3 +1046,60 @@ def test_asi8_distant_date():
result = index.asi8
expected = np.array([1000000 * 86400 * 400 * 8000 + 12345 * 1000000 + 123456])
np.testing.assert_array_equal(result, expected)

aulemahal marked this conversation as resolved.
Show resolved Hide resolved

@requires_cftime_1_1_0
def test_infer_freq_valid_types():
cf_indx = xr.cftime_range("2000-01-01", periods=3, freq="D")
assert xr.infer_freq(cf_indx) == "D"
assert xr.infer_freq(xr.DataArray(cf_indx)) == "D"

pd_indx = pd.date_range("2000-01-01", periods=3, freq="D")
assert xr.infer_freq(pd_indx) == "D"
assert xr.infer_freq(xr.DataArray(pd_indx)) == "D"

pd_td_indx = pd.timedelta_range(start="1D", periods=3, freq="D")
assert xr.infer_freq(pd_td_indx) == "D"
assert xr.infer_freq(xr.DataArray(pd_td_indx)) == "D"


@requires_cftime_1_1_0
def test_infer_freq_invalid_inputs():
# Non-datetime DataArray
with pytest.raises(ValueError, match="must contain datetime-like objects"):
xr.infer_freq(xr.DataArray([0, 1, 2]))

indx = xr.cftime_range("1990-02-03", periods=3, freq="MS")
# 2D DataArray
with pytest.raises(ValueError, match="must be 1D"):
xr.infer_freq(xr.DataArray([indx, indx]))

# CFTimeIndex too short
with pytest.raises(ValueError, match="Need at least 3 dates to infer frequency"):
xr.infer_freq(indx[:2])


@requires_cftime_1_1_0
@pytest.mark.parametrize(
"freq",
[
"300AS-JAN",
"A-DEC",
"AS-JUL",
"2AS-FEB",
"Q-NOV",
"3QS-DEC",
"MS",
"4M",
"7D",
"D",
"30H",
"5T",
"40S",
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
],
)
@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
def test_infer_freq(freq, calendar):
indx = xr.cftime_range("2000-01-01", periods=3, freq=freq, calendar=calendar)
out = xr.infer_freq(indx)
assert out == freq