Skip to content
forked from pydata/xarray

Commit

Permalink
Squashed coarsen
Browse files Browse the repository at this point in the history
commit 35263a3f2137765299a1dd767fb412bc016feaa1
Author: dcherian <deepak@cherian.net>
Date:   Sat Nov 9 18:28:08 2019 -0700

    indexes wip

commit d7f677c
Author: dcherian <deepak@cherian.net>
Date:   Sat Nov 9 11:45:51 2019 -0700

    fix tests.

commit b93a19f
Author: dcherian <deepak@cherian.net>
Date:   Fri Nov 8 14:55:17 2019 -0700

    Add whats-new

commit 01729eb
Author: dcherian <deepak@cherian.net>
Date:   Fri Nov 8 14:49:09 2019 -0700

    Test that dims passed to coarsen are present in dataset.

commit c1977c8
Author: dcherian <deepak@cherian.net>
Date:   Fri Nov 8 14:37:56 2019 -0700

    test for count

commit dbf5d00
Author: dcherian <deepak@cherian.net>
Date:   Fri Nov 8 13:38:44 2019 -0700

    Coarsen now has the same reduction methods as groupby & rolling.

    This brings in support for coarsen.count as well as passing skipna down to the
    other reduction functions.
  • Loading branch information
dcherian committed Nov 13, 2019
1 parent 809aa73 commit ecc928c
Showing 10 changed files with 105 additions and 44 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
@@ -76,6 +76,9 @@ New Features
invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
By `Deepak Cherian <https://github.com/dcherian>`_ and
`Guido Imperiale <https://github.com/crusaderky>`_.
- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen`
and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`)
By `Deepak Cherian <https://github.com/dcherian/>`_

Bug fixes
~~~~~~~~~
31 changes: 24 additions & 7 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
@@ -377,7 +377,10 @@ def __init__(

# TODO(shoyer): document this argument, once it becomes part of the
# public interface.
self._indexes = indexes
if indexes is None or not indexes:
self._indexes = default_indexes(self._coords, self.dims)
else:
self._indexes = indexes

self._file_obj = None

@@ -445,6 +448,8 @@ def _from_temp_dataset(
return self._replace(variable, coords, name, indexes=indexes)

def _to_dataset_split(self, dim: Hashable) -> Dataset:
""" splits dataarray along dimension 'dim' """

def subset(dim, label):
array = self.loc[{dim: label}]
if dim in array.coords:
@@ -453,11 +458,17 @@ def subset(dim, label):
return array

variables = {label: subset(dim, label) for label in self.get_index(dim)}

coords = self.coords.to_dataset()
if dim in coords:
del coords[dim]
return Dataset(variables, coords, self.attrs)
if self._indexes is not None:
indexes = self._indexes.copy()
else:
indexes = {}
if indexes is not None and dim in indexes:
del indexes[dim]
coord_names = set(self._coords) - set([dim])
dataset = Dataset._from_vars_and_coord_names(
variables, coord_names, indexes=indexes, attrs=self.attrs
)
return dataset

def _to_dataset_whole(
self, name: Hashable = None, shallow_copy: bool = True
@@ -477,12 +488,18 @@ def _to_dataset_whole(
# use private APIs for speed: this is called by _to_temp_dataset(),
# which is used in the guts of a lot of operations (e.g., reindex)
variables = self._coords.copy()
if self._indexes is not None:
indexes = self._indexes.copy()
else:
indexes = {}
variables[name] = self.variable
if shallow_copy:
for k in variables:
variables[k] = variables[k].copy(deep=False)
coord_names = set(self._coords)
dataset = Dataset._from_vars_and_coord_names(variables, coord_names)
dataset = Dataset._from_vars_and_coord_names(
variables, coord_names, indexes=indexes
)
return dataset

def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset:
13 changes: 10 additions & 3 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
@@ -866,8 +866,12 @@ def _construct_direct(
return obj

@classmethod
def _from_vars_and_coord_names(cls, variables, coord_names, attrs=None):
return cls._construct_direct(variables, coord_names, attrs=attrs)
def _from_vars_and_coord_names(
cls, variables, coord_names, indexes=None, attrs=None
):
return cls._construct_direct(
variables, coord_names, indexes=indexes, attrs=attrs
)

def _replace(
self,
@@ -4312,10 +4316,13 @@ def to_array(self, dim="variable", name=None):

coords = dict(self.coords)
coords[dim] = list(self.data_vars)
indexes = self._indexes

dims = (dim,) + broadcast_vars[0].dims

return DataArray(data, coords, dims, attrs=self.attrs, name=name)
return DataArray(
data, coords, dims, attrs=self.attrs, name=name, indexes=indexes
)

def _to_dataframe(self, ordered_dims):
columns = [k for k in self.variables if k not in self.dims]
7 changes: 6 additions & 1 deletion xarray/core/indexes.py
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@


class Indexes(collections.abc.Mapping):
"""Immutable proxy for Dataset or DataArrary indexes."""
"""Mutable proxy for Dataset or DataArrary indexes."""

__slots__ = ("_indexes",)

@@ -41,6 +41,11 @@ def __delitem__(self, key):
def __repr__(self):
return formatting.indexes_repr(self)

def __copy__(self):
import copy

return copy.deepcopy(self)


def default_indexes(
coords: Mapping[Any, Variable], dims: Iterable
2 changes: 1 addition & 1 deletion xarray/core/nanops.py
Original file line number Diff line number Diff line change
@@ -25,7 +25,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):
"""
if hasattr(axis, "__len__"): # if tuple or list
raise ValueError(
"min_count is not available for reduction " "with more than one dimensions."
"min_count is not available for reduction with more than one dimensions."
)

if axis is not None and getattr(result, "ndim", False):
10 changes: 0 additions & 10 deletions xarray/core/ops.py
Original file line number Diff line number Diff line change
@@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):

inject_reduce_methods(cls)
inject_cum_methods(cls)


def inject_coarsen_methods(cls):
# standard numpy reduce methods
methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS]
for name, f in methods:
func = cls._reduce_method(f)
func.__name__ = name
func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__)
setattr(cls, name, func)
51 changes: 37 additions & 14 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import functools
import warnings
from typing import Callable
from typing import Any, Callable, Dict

import numpy as np

from . import dtypes, duck_array_ops, utils
from .dask_array_ops import dask_rolling_wrapper
from .ops import inject_coarsen_methods
from .ops import inject_reduce_methods
from .pycompat import dask_array_type

try:
@@ -542,6 +542,11 @@ def __init__(self, obj, windows, boundary, side, coord_func):
self.side = side
self.boundary = boundary

absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
if absent_dims:
raise ValueError(
f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}."
)
if not utils.is_dict_like(coord_func):
coord_func = {d: coord_func for d in self.obj.dims}
for c in self.obj.coords:
@@ -565,18 +570,23 @@ def __repr__(self):
class DataArrayCoarsen(Coarsen):
__slots__ = ()

_reduce_extra_args_docstring = """"""

@classmethod
def _reduce_method(cls, func):
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
"""
Return a wrapped function for injecting numpy methods.
see ops.inject_coarsen_methods
Return a wrapped function for injecting reduction methods.
see ops.inject_reduce_methods
"""
kwargs: Dict[str, Any] = {}
if include_skipna:
kwargs["skipna"] = None

def wrapped_func(self, **kwargs):
from .dataarray import DataArray

reduced = self.obj.variable.coarsen(
self.windows, func, self.boundary, self.side
self.windows, func, self.boundary, self.side, **kwargs
)
coords = {}
for c, v in self.obj.coords.items():
@@ -585,7 +595,11 @@ def wrapped_func(self, **kwargs):
else:
if any(d in self.windows for d in v.dims):
coords[c] = v.variable.coarsen(
self.windows, self.coord_func[c], self.boundary, self.side
self.windows,
self.coord_func[c],
self.boundary,
self.side,
**kwargs,
)
else:
coords[c] = v
@@ -597,27 +611,36 @@ def wrapped_func(self, **kwargs):
class DatasetCoarsen(Coarsen):
__slots__ = ()

_reduce_extra_args_docstring = """"""

@classmethod
def _reduce_method(cls, func):
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
"""
Return a wrapped function for injecting numpy methods.
see ops.inject_coarsen_methods
Return a wrapped function for injecting reduction methods.
see ops.inject_reduce_methods
"""
kwargs: Dict[str, Any] = {}
if include_skipna:
kwargs["skipna"] = None

def wrapped_func(self, **kwargs):
from .dataset import Dataset

reduced = {}
for key, da in self.obj.data_vars.items():
reduced[key] = da.variable.coarsen(
self.windows, func, self.boundary, self.side
self.windows, func, self.boundary, self.side, **kwargs
)

coords = {}
for c, v in self.obj.coords.items():
if any(d in self.windows for d in v.dims):
coords[c] = v.variable.coarsen(
self.windows, self.coord_func[c], self.boundary, self.side
self.windows,
self.coord_func[c],
self.boundary,
self.side,
**kwargs,
)
else:
coords[c] = v.variable
@@ -626,5 +649,5 @@ def wrapped_func(self, **kwargs):
return wrapped_func


inject_coarsen_methods(DataArrayCoarsen)
inject_coarsen_methods(DatasetCoarsen)
inject_reduce_methods(DataArrayCoarsen)
inject_reduce_methods(DatasetCoarsen)
6 changes: 3 additions & 3 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
@@ -1814,7 +1814,7 @@ def rolling_window(
),
)

def coarsen(self, windows, func, boundary="exact", side="left"):
def coarsen(self, windows, func, boundary="exact", side="left", **kwargs):
"""
Apply
"""
@@ -1828,11 +1828,11 @@ def coarsen(self, windows, func, boundary="exact", side="left"):
func = getattr(duck_array_ops, name, None)
if func is None:
raise NameError(f"{name} is not a valid method.")
return type(self)(self.dims, func(reshaped, axis=axes), self._attrs)
return type(self)(self.dims, func(reshaped, axis=axes, **kwargs), self._attrs)

def _coarsen_reshape(self, windows, boundary, side):
"""
Construct a reshaped-array for corsen
Construct a reshaped-array for coarsen
"""
if not utils.is_dict_like(boundary):
boundary = {d: boundary for d in windows.keys()}
14 changes: 9 additions & 5 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
@@ -5408,6 +5408,11 @@ def ds(request):
)


def test_coarsen_absent_dims_error(ds):
with raises_regex(ValueError, "not found in Dataset."):
ds.coarsen(foo=2)


@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")])
def test_coarsen(ds, dask, boundary, side):
@@ -5416,12 +5421,11 @@ def test_coarsen(ds, dask, boundary, side):

actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max()
assert_equal(
actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary=boundary, side=side).max()
actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max()
)
# coordinate should be mean by default
assert_equal(
actual["time"],
ds["time"].coarsen(time=2, x=3, boundary=boundary, side=side).mean(),
actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean()
)


@@ -5432,8 +5436,8 @@ def test_coarsen_coords(ds, dask):

# check if coord_func works
actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max()
assert_equal(actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary="trim").max())
assert_equal(actual["time"], ds["time"].coarsen(time=2, x=3, boundary="trim").max())
assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max())
assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max())

# raise if exact
with pytest.raises(ValueError):
12 changes: 12 additions & 0 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
@@ -1814,6 +1814,18 @@ def test_coarsen_2d(self):
expected[1, 1] *= 12 / 11
assert_allclose(actual, expected)

v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4))
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
expected = self.cls(("x", "y"), 4 * np.ones((2, 2)))
assert_equal(actual, expected)

v[0, 0] = np.nan
v[-1, -1] = np.nan
expected[0, 0] = 3
expected[-1, -1] = 3
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
assert_equal(actual, expected)


@requires_dask
class TestVariableWithDask(VariableSubclassobjects):

0 comments on commit ecc928c

Please sign in to comment.