Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly interpolate seasons in Grouper #2019

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Bug fixes
* Fixed an issue with ``nimbus`` that was causing URL path components to be improperly joined. (:pull:`1997`).
* `base_kws_vars` in `MBCn` is now copied inside the `adjust` function so that in-place changes do not change the dict globally. (:pull:`1999`).
* Fixed a bug in the logic of ``xclim.testing.utils.load_registry`` that impacted the ability to load a `registry.txt` from a non-default repository. (:pull:`2001`).
* Fixed a bug in ``xclim.sdba.Grouper.get_index`` that didn't correctly interpolate seasonal values (:issue:`2014`, :pull:`2019`).

Internal changes
^^^^^^^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,4 @@ To create a conda environment including `xclim`'s dependencies and several optio

conda env create -n my_xclim_env python=3.10 --file=environment.yml
conda activate my_xclim_env
(my_xclim_env) python -m pip install -e --no-deps .
(my_xclim_env) python -m pip install --no-deps -e .
44 changes: 31 additions & 13 deletions src/xclim/sdba/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,21 +295,39 @@ def get_index(
return da[self.dim].rename("group")

ind = da.indexes[self.dim]
if self.prop == "week":
i = da[self.dim].copy(data=ind.isocalendar().week).astype(int)
elif self.prop == "season":
i = da[self.dim].copy(data=ind.month % 12 // 3)
else:
i = getattr(ind, self.prop)

if not np.issubdtype(i.dtype, np.integer):
raise ValueError(
f"Index {self.name} is not of type int (rather {i.dtype}), "
f"but {self.__class__.__name__} requires integer indexes."
)
if interp and self.dim == "time":
if interp and self.dim == "time" and self.prop == "month":
i = ind.month - 0.5 + ind.day / ind.days_in_month

elif interp and self.dim == "time" and self.prop == "season":
calendar = ind.calendar if hasattr(ind, "calendar") else "standard"
length_year = (
360
if calendar == "360_day"
else 365 + (0 if calendar == "noleap" else ind.is_leap_year)
)
# This is assuming that seasons have the same length. The factor 1/6 comes from the fact that
# the first season is shifted by 1 month the but the middle of the season is shifted in the other direction
# by half a month so -(1/12-1/24)*4 = -1/6
i = ind.dayofyear / length_year * 4 - 1 / 6
else:
raise ValueError(
f"Interpolation is not supported for {self.dim}.{self.prop}."
)
else:
if self.prop == "week":
i = da[self.dim].copy(data=ind.isocalendar().week).astype(int)
elif self.prop == "season":
i = da[self.dim].copy(data=ind.month % 12 // 3)
else:
i = getattr(ind, self.prop)

if interp and self.dim == "time" and self.prop == "month":
i = ind.month - 0.5 + ind.day / ind.days_in_month
if not np.issubdtype(i.dtype, np.integer):
raise ValueError(
f"Index {self.name} is not of type int (rather {i.dtype}), "
f"but {self.__class__.__name__} requires integer indexes."
)

xi = xr.DataArray(
i,
Expand Down
12 changes: 7 additions & 5 deletions src/xclim/testing/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def test_timeseries(
units: str | None = None,
freq: str = "D",
as_dataset: bool = False,
cftime: bool = False,
calendar: str | None = None,
) -> xr.DataArray | xr.Dataset:
"""
Create a generic timeseries object based on pre-defined dictionaries of existing variables.
Expand All @@ -208,16 +208,18 @@ def test_timeseries(
The frequency of the time dimension. Default is daily/"D".
as_dataset : bool
Whether to return a Dataset or a DataArray. Default is False.
cftime : bool
Whether to use cftime or not. Default is False.
calendar : str or None
Whether to use a calendar. If a calendar is provided, cftime is used.

Returns
-------
xr.DataArray or xr.Dataset
A DataArray or Dataset with time, lon and lat dimensions.
"""
if cftime:
coords = xr.cftime_range(start, periods=len(values), freq=freq)
if calendar:
coords = xr.cftime_range(
start, periods=len(values), freq=freq, calendar=calendar
)
else:
coords = pd.date_range(start, periods=len(values), freq=freq)

Expand Down
32 changes: 16 additions & 16 deletions tests/test_bootstrapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,30 +23,30 @@ class Test_bootstrap:
@pytest.mark.slow
@pytest.mark.parametrize("use_dask", [True, False])
@pytest.mark.parametrize(
"var,p,index,freq, cftime",
"var,p,index,freq, calendar",
(
["tas", 98, tg90p, "MS", False],
["tasmin", 98, tn90p, "YS-JUL", False],
["tasmax", 98, tx90p, "QS-APR", False],
["tasmax", 98, tx90p, "QS-APR", True],
["tasmin", 2, tn10p, "MS", False],
["tasmax", 2, tx10p, "YS", False],
["tasmax", 2, tx10p, "YS", True],
["tas", 2, tg10p, "MS", False],
["tasmax", 98, warm_spell_duration_index, "MS", False],
["tasmin", 2, cold_spell_duration_index, "MS", False],
["pr", 99, days_over_precip_thresh, "MS", False],
["pr", 98, fraction_over_precip_thresh, "MS", False],
["pr", 98, fraction_over_precip_thresh, "MS", True],
["tas", 98, tg90p, "MS", None],
["tasmin", 98, tn90p, "YS-JUL", None],
["tasmax", 98, tx90p, "QS-APR", None],
["tasmax", 98, tx90p, "QS-APR", "standard"],
["tasmin", 2, tn10p, "MS", None],
["tasmax", 2, tx10p, "YS", None],
["tasmax", 2, tx10p, "YS", "standard"],
["tas", 2, tg10p, "MS", None],
["tasmax", 98, warm_spell_duration_index, "MS", None],
["tasmin", 2, cold_spell_duration_index, "MS", None],
["pr", 99, days_over_precip_thresh, "MS", None],
["pr", 98, fraction_over_precip_thresh, "MS", None],
["pr", 98, fraction_over_precip_thresh, "MS", "standard"],
),
)
def test_bootstrap(self, var, p, index, freq, cftime, use_dask, random):
def test_bootstrap(self, var, p, index, freq, calendar, use_dask, random):
# -- GIVEN
arr = self.ar1(
alpha=0.8, n=int(4 * 365.25), random=random, positive_values=(var == "pr")
)
climate_var = _test_timeseries(
arr, start="2000-01-01", variable=var, cftime=cftime
arr, start="2000-01-01", variable=var, calendar=calendar
)
if use_dask:
climate_var = climate_var.chunk(dict(time=50))
Expand Down
8 changes: 4 additions & 4 deletions tests/test_calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,13 @@ def test_convert_doy():
)


@pytest.mark.parametrize("cftime", [True, False])
@pytest.mark.parametrize("calendar", ["standard", None])
@pytest.mark.parametrize(
"w,s,m,f,ss",
[(30, 10, None, "YS", 0), (3, 1, None, "QS-DEC", 60), (6, None, None, "MS", 0)],
)
def test_stack_periods(tas_series, cftime, w, s, m, f, ss):
da = tas_series(np.arange(365 * 50), start="2000-01-01", cftime=cftime)
def test_stack_periods(tas_series, calendar, w, s, m, f, ss):
da = tas_series(np.arange(365 * 50), start="2000-01-01", calendar=calendar)

da_stck = stack_periods(
da, window=w, stride=s, min_length=m, freq=f, align_days=False
Expand All @@ -502,7 +502,7 @@ def test_stack_periods(tas_series, cftime, w, s, m, f, ss):


def test_stack_periods_special(tas_series):
da = tas_series(np.arange(365 * 48 + 12), cftime=True, start="2004-01-01")
da = tas_series(np.arange(365 * 48 + 12), calendar="standard", start="2004-01-01")

with pytest.raises(ValueError, match="unaligned day-of-year"):
stack_periods(da)
Expand Down
8 changes: 4 additions & 4 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ def test_resample_map_passthrough(tas_series):
assert not uses_dask(out)


@pytest.mark.parametrize("cftime", [False, True])
def test_make_hourly_temperature(tasmax_series, tasmin_series, cftime):
tasmax = tasmax_series(np.array([20]), units="degC", cftime=cftime)
tasmin = tasmin_series(np.array([0]), units="degC", cftime=cftime).expand_dims(
@pytest.mark.parametrize("calendar", [None, "standard"])
def test_make_hourly_temperature(tasmax_series, tasmin_series, calendar):
tasmax = tasmax_series(np.array([20]), units="degC", calendar=calendar)
tasmin = tasmin_series(np.array([0]), units="degC", calendar=calendar).expand_dims(
lat=[0]
)

Expand Down
18 changes: 14 additions & 4 deletions tests/test_sdba/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,21 @@ def test_grouper_group(tas_series, group, window, nvals):


@pytest.mark.parametrize(
"group,interp,val90",
[("time", False, True), ("time.month", False, 3), ("time.month", True, 3.5)],
"group,interp,val90,calendar",
[
("time", False, True, None),
("time.month", False, 3, None),
("time.month", True, 3.5, None),
("time.season", False, 1, None),
("time.season", True, 0.8278688524590164, None),
("time.month", True, 3.533333333333333, "360_day"),
("time.month", True, 3.533333333333333, "noleap"),
("time.season", True, 0.8444444444444444, "360_day"),
("time.season", True, 0.8305936073059361, "noleap"),
],
)
def test_grouper_get_index(tas_series, group, interp, val90):
tas = tas_series(np.ones(366), start="2000-01-01")
def test_grouper_get_index(tas_series, group, interp, val90, calendar):
tas = tas_series(np.ones(366), start="2000-01-01", calendar=calendar)
grouper = Grouper(group)
indx = grouper.get_index(tas, interp=interp)
# 90 is March 31st
Expand Down
Loading