Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make coarsen reductions consistent with reductions on other classes #3500

Merged
merged 14 commits into from
Dec 4, 2019
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ New Features
invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
By `Deepak Cherian <https://github.com/dcherian>`_ and
`Guido Imperiale <https://github.com/crusaderky>`_.
- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen`
and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`)
By `Deepak Cherian <https://github.com/dcherian/>`_
dcherian marked this conversation as resolved.
Show resolved Hide resolved
- Add the documented-but-missing :py:meth:`DatasetGroupBy.quantile`.
(:issue:`3525`, :pull:`3527`). By `Justus Magin <https://github.com/keewis>`_.

Expand Down
2 changes: 1 addition & 1 deletion xarray/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):
"""
if hasattr(axis, "__len__"): # if tuple or list
raise ValueError(
"min_count is not available for reduction " "with more than one dimensions."
"min_count is not available for reduction with more than one dimensions."
)

if axis is not None and getattr(result, "ndim", False):
Expand Down
10 changes: 0 additions & 10 deletions xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):

inject_reduce_methods(cls)
inject_cum_methods(cls)


def inject_coarsen_methods(cls):
# standard numpy reduce methods
methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS]
for name, f in methods:
func = cls._reduce_method(f)
func.__name__ = name
func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__)
setattr(cls, name, func)
51 changes: 37 additions & 14 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import functools
import warnings
from typing import Callable
from typing import Any, Callable, Dict

import numpy as np

from . import dtypes, duck_array_ops, utils
from .dask_array_ops import dask_rolling_wrapper
from .ops import inject_coarsen_methods
from .ops import inject_reduce_methods
from .pycompat import dask_array_type

try:
Expand Down Expand Up @@ -542,6 +542,11 @@ def __init__(self, obj, windows, boundary, side, coord_func):
self.side = side
self.boundary = boundary

absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
if absent_dims:
raise ValueError(
f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}."
)
if not utils.is_dict_like(coord_func):
coord_func = {d: coord_func for d in self.obj.dims}
for c in self.obj.coords:
Expand All @@ -565,18 +570,23 @@ def __repr__(self):
class DataArrayCoarsen(Coarsen):
__slots__ = ()

_reduce_extra_args_docstring = """"""

@classmethod
def _reduce_method(cls, func):
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
"""
Return a wrapped function for injecting numpy methods.
see ops.inject_coarsen_methods
Return a wrapped function for injecting reduction methods.
see ops.inject_reduce_methods
"""
kwargs: Dict[str, Any] = {}
if include_skipna:
kwargs["skipna"] = None

def wrapped_func(self, **kwargs):
from .dataarray import DataArray

reduced = self.obj.variable.coarsen(
self.windows, func, self.boundary, self.side
self.windows, func, self.boundary, self.side, **kwargs
)
coords = {}
for c, v in self.obj.coords.items():
Expand All @@ -585,7 +595,11 @@ def wrapped_func(self, **kwargs):
else:
if any(d in self.windows for d in v.dims):
coords[c] = v.variable.coarsen(
self.windows, self.coord_func[c], self.boundary, self.side
self.windows,
self.coord_func[c],
self.boundary,
self.side,
**kwargs,
)
else:
coords[c] = v
Expand All @@ -597,27 +611,36 @@ def wrapped_func(self, **kwargs):
class DatasetCoarsen(Coarsen):
__slots__ = ()

_reduce_extra_args_docstring = """"""

@classmethod
def _reduce_method(cls, func):
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
"""
Return a wrapped function for injecting numpy methods.
see ops.inject_coarsen_methods
Return a wrapped function for injecting reduction methods.
see ops.inject_reduce_methods
"""
kwargs: Dict[str, Any] = {}
if include_skipna:
kwargs["skipna"] = None

def wrapped_func(self, **kwargs):
from .dataset import Dataset

reduced = {}
for key, da in self.obj.data_vars.items():
reduced[key] = da.variable.coarsen(
self.windows, func, self.boundary, self.side
self.windows, func, self.boundary, self.side, **kwargs
)

coords = {}
for c, v in self.obj.coords.items():
if any(d in self.windows for d in v.dims):
coords[c] = v.variable.coarsen(
self.windows, self.coord_func[c], self.boundary, self.side
self.windows,
self.coord_func[c],
self.boundary,
self.side,
**kwargs,
)
else:
coords[c] = v.variable
Expand All @@ -626,5 +649,5 @@ def wrapped_func(self, **kwargs):
return wrapped_func


inject_coarsen_methods(DataArrayCoarsen)
inject_coarsen_methods(DatasetCoarsen)
inject_reduce_methods(DataArrayCoarsen)
inject_reduce_methods(DatasetCoarsen)
8 changes: 4 additions & 4 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1829,9 +1829,9 @@ def rolling_window(
),
)

def coarsen(self, windows, func, boundary="exact", side="left"):
def coarsen(self, windows, func, boundary="exact", side="left", **kwargs):
"""
Apply
Apply reduction function.
"""
windows = {k: v for k, v in windows.items() if k in self.dims}
if not windows:
Expand All @@ -1843,11 +1843,11 @@ def coarsen(self, windows, func, boundary="exact", side="left"):
func = getattr(duck_array_ops, name, None)
if func is None:
raise NameError(f"{name} is not a valid method.")
return type(self)(self.dims, func(reshaped, axis=axes), self._attrs)
return self._replace(data=func(reshaped, axis=axes, **kwargs))

def _coarsen_reshape(self, windows, boundary, side):
"""
Construct a reshaped-array for corsen
Construct a reshaped-array for coarsen
"""
if not utils.is_dict_like(boundary):
boundary = {d: boundary for d in windows.keys()}
Expand Down
14 changes: 9 additions & 5 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5474,6 +5474,11 @@ def ds(request):
)


def test_coarsen_absent_dims_error(ds):
with raises_regex(ValueError, "not found in Dataset."):
ds.coarsen(foo=2)


@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")])
def test_coarsen(ds, dask, boundary, side):
Expand All @@ -5482,12 +5487,11 @@ def test_coarsen(ds, dask, boundary, side):

actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max()
assert_equal(
actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary=boundary, side=side).max()
actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max()
)
# coordinate should be mean by default
assert_equal(
actual["time"],
ds["time"].coarsen(time=2, x=3, boundary=boundary, side=side).mean(),
actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean()
)


Expand All @@ -5498,8 +5502,8 @@ def test_coarsen_coords(ds, dask):

# check if coord_func works
actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max()
assert_equal(actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary="trim").max())
assert_equal(actual["time"], ds["time"].coarsen(time=2, x=3, boundary="trim").max())
assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max())
assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max())

# raise if exact
with pytest.raises(ValueError):
Expand Down
20 changes: 20 additions & 0 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1823,6 +1823,26 @@ def test_coarsen_2d(self):
expected[1, 1] *= 12 / 11
assert_allclose(actual, expected)

v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4))
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
expected = self.cls(("x", "y"), 4 * np.ones((2, 2)))
assert_equal(actual, expected)

v[0, 0] = np.nan
v[-1, -1] = np.nan
expected[0, 0] = 3
expected[-1, -1] = 3
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
assert_equal(actual, expected)

actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False)
expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]])
assert_equal(actual, expected)

actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True)
expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
assert_equal(actual, expected)


@requires_dask
class TestVariableWithDask(VariableSubclassobjects):
Expand Down