Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Another groupby.reduce bugfix. #3403

Merged
merged 11 commits into from
Oct 25, 2019
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ Bug fixes
- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
By `Anderson Banihirwe <https://github.com/andersy005>`_.

- Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
:py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
(:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_


Documentation
~~~~~~~~~~~~~
Expand Down
27 changes: 16 additions & 11 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,26 @@
from .utils import (
either_dict_or_kwargs,
hashable,
is_scalar,
maybe_wrap_array,
peek_at,
safe_cast_to_index,
)
from .variable import IndexVariable, Variable, as_variable


def check_reduce_dims(reduce_dims, dimensions):

if reduce_dims is not ...:
if is_scalar(reduce_dims):
reduce_dims = [reduce_dims]
if any([dim not in dimensions for dim in reduce_dims]):
raise ValueError(
"cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r."
% (reduce_dims, dimensions)
)


def unique_value_groups(ar, sort=True):
"""Group an array by its unique values.

Expand Down Expand Up @@ -794,15 +807,11 @@ def reduce(
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

if dim is not ... and dim not in self.dims:
raise ValueError(
"cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r."
% (dim, self.dims)
)

def reduce_array(ar):
return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)

check_reduce_dims(dim, self.dims)

return self.apply(reduce_array, shortcut=shortcut)


Expand Down Expand Up @@ -895,11 +904,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
def reduce_dataset(ds):
return ds.reduce(func, dim, keep_attrs, **kwargs)

if dim is not ... and dim not in self.dims:
raise ValueError(
"cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r."
% (dim, self.dims)
)
check_reduce_dims(dim, self.dims)

return self.apply(reduce_dataset)

Expand Down
9 changes: 0 additions & 9 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2560,15 +2560,6 @@ def change_metadata(x):
expected = change_metadata(expected)
assert_equal(expected, actual)

def test_groupby_reduce_dimension_error(self):
array = self.make_groupby_example_array()
grouped = array.groupby("y")
with raises_regex(ValueError, "cannot reduce over dimension 'y'"):
grouped.mean()

grouped = array.groupby("y", squeeze=False)
assert_identical(array, grouped.mean())

def test_groupby_math(self):
array = self.make_groupby_example_array()
for squeeze in [True, False]:
Expand Down
56 changes: 41 additions & 15 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,23 @@
import xarray as xr
from xarray.core.groupby import _consolidate_slices

from . import assert_identical, raises_regex
from . import assert_allclose, assert_identical, raises_regex


@pytest.fixture
def dataset():
ds = xr.Dataset(
{"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))},
{"x": ["a", "b", "c"], "y": [1, 2, 3, 4], "z": [1, 2]},
)
ds["boo"] = (("z", "y"), [["f", "g", "h", "j"]] * 2)

return ds


@pytest.fixture
def array(dataset):
return dataset["foo"]


def test_consolidate_slices():
Expand All @@ -21,25 +37,17 @@ def test_consolidate_slices():
_consolidate_slices([slice(3), 4])


def test_groupby_dims_property():
ds = xr.Dataset(
{"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))},
{"x": ["a", "bcd", "c"], "y": [1, 2, 3, 4], "z": [1, 2]},
)
def test_groupby_dims_property(dataset):
assert dataset.groupby("x").dims == dataset.isel(x=1).dims
assert dataset.groupby("y").dims == dataset.isel(y=1).dims

assert ds.groupby("x").dims == ds.isel(x=1).dims
assert ds.groupby("y").dims == ds.isel(y=1).dims

stacked = ds.stack({"xy": ("x", "y")})
stacked = dataset.stack({"xy": ("x", "y")})
assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims


def test_multi_index_groupby_apply():
def test_multi_index_groupby_apply(dataset):
# regression test for GH873
ds = xr.Dataset(
{"foo": (("x", "y"), np.random.randn(3, 4))},
{"x": ["a", "b", "c"], "y": [1, 2, 3, 4]},
)
ds = dataset.isel(z=1, drop=True)[["foo"]]
doubled = 2 * ds
group_doubled = (
ds.stack(space=["x", "y"])
Expand Down Expand Up @@ -276,6 +284,24 @@ def test_groupby_grouping_errors():
dataset.to_array().groupby(dataset.foo * np.nan)


def test_groupby_reduce_dimension_error(array):
grouped = array.groupby("y")
with raises_regex(ValueError, "cannot reduce over dimensions"):
grouped.mean()

with raises_regex(ValueError, "cannot reduce over dimensions"):
grouped.mean("huh")

with raises_regex(ValueError, "cannot reduce over dimensions"):
grouped.mean(("x", "y", "asd"))

grouped = array.groupby("y", squeeze=False)
assert_identical(array, grouped.mean())

assert_identical(array.mean("x"), grouped.reduce(np.mean, "x"))
assert_allclose(array.mean(["x", "z"]), grouped.reduce(np.mean, ["x", "z"]))


def test_groupby_bins_timeseries():
ds = xr.Dataset()
ds["time"] = xr.DataArray(
Expand Down