diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000000..30c1e18f33c --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,2 @@ +github: numfocus +custom: http://numfocus.org/donate-to-xarray diff --git a/doc/computation.rst b/doc/computation.rst index 663c546be20..240a1e5704b 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -95,6 +95,9 @@ for filling missing values via 1D interpolation. Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification of which values to use as the index in the interpolation. +xarray also provides the ``max_gap`` keyword argument to limit the interpolation to +data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na` +for more. Aggregation =========== diff --git a/doc/conf.py b/doc/conf.py index 7c1557a1e66..0e04f8ccde8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -340,9 +340,10 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), - "iris": ("http://scitools.org.uk/iris/docs/latest/", None), - "numpy": ("https://docs.scipy.org/doc/numpy/", None), - "numba": ("https://numba.pydata.org/numba-doc/latest/", None), - "matplotlib": ("https://matplotlib.org/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), + "iris": ("https://scitools.org.uk/iris/docs/latest", None), + "numpy": ("https://docs.scipy.org/doc/numpy", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), + "numba": ("https://numba.pydata.org/numba-doc/latest", None), + "matplotlib": ("https://matplotlib.org", None), } diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 96f0ba9a4a6..6bf495713fe 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,13 @@ Breaking changes New Features ~~~~~~~~~~~~ + +- Added the ``fill_value`` option to :py:meth:`~xarray.DataArray.unstack` and + :py:meth:`~xarray.Dataset.unstack` (:issue:`3518`). + By `Keisuke Fujii `_. +- Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and + :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data + gap that will be filled by interpolation. By `Deepak Cherian `_. - :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels. :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible @@ -73,12 +80,22 @@ New Features for xarray objects. Note that xarray objects with a dask.array backend already used deterministic hashing in previous releases; this change implements it when whole xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is - invoked. (:issue:`3378`, :pull:`3446`) + invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`) By `Deepak Cherian `_ and `Guido Imperiale `_. +- Add the documented-but-missing :py:meth:`xarray.core.groupby.DatasetGroupBy.quantile`. + (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. Bug fixes ~~~~~~~~~ +- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when + calling :py:meth:`Dataset.rename` (also :py:meth:`Dataset.rename_dims` + and :py:meth:`xr.Dataset.rename_vars`). By `Mathias Hauser `_ + (:issue:`3522`). +- Fix a bug in `set_index` in case that an existing dimension becomes a level variable of MultiIndex. (:pull:`3520`) + By `Keisuke Fujii `_. +- Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`) + By `Anderson Banihirwe `_. - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). @@ -88,9 +105,14 @@ Bug fixes By `Deepak Cherian `_. - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. By `Anderson Banihirwe `_. +- Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`). + In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated. + By `Deepak Cherian `_. - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions. (:issue:`3402`). By `Deepak Cherian `_ +- Allow appending datetime and bool data variables to zarr stores. + (:issue:`3480`). By `Akihiro Matsukawa `_. Documentation ~~~~~~~~~~~~~ @@ -111,7 +133,8 @@ Internal Changes ~~~~~~~~~~~~~~~~ - Added integration tests against `pint `_. - (:pull:`3238`, :pull:`3447`, :pull:`3508`) by `Justus Magin `_. + (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`) + by `Justus Magin `_. .. note:: @@ -130,6 +153,9 @@ Internal Changes - Enable type checking on default sentinel values (:pull:`3472`) By `Maximilian Roos `_ +- Add :py:meth:`Variable._replace` for simpler replacing of a subset of attributes (:pull:`3472`) + By `Maximilian Roos `_ + .. _whats-new.0.14.0: v0.14.0 (14 Oct 2019) @@ -217,6 +243,9 @@ Bug fixes By `Deepak Cherian `_. - Fix error in concatenating unlabeled dimensions (:pull:`3362`). By `Deepak Cherian `_. +- Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is + specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created. + (:pull:`3362`). By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index d23594fc675..945b3937c43 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1234,6 +1234,8 @@ def _validate_datatypes_for_zarr_append(dataset): def check_dtype(var): if ( not np.issubdtype(var.dtype, np.number) + and not np.issubdtype(var.dtype, np.datetime64) + and not np.issubdtype(var.dtype, np.bool) and not coding.strings.is_unicode_dtype(var.dtype) and not var.dtype == object ): @@ -1241,8 +1243,9 @@ def check_dtype(var): raise ValueError( "Invalid dtype for data variable: {} " "dtype must be a subtype of number, " - "a fixed sized string, a fixed size " - "unicode string or an object".format(var) + "datetime, bool, a fixed sized string, " + "a fixed size unicode string or an " + "object".format(var) ) for k in dataset.data_vars.values(): diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 5f9c8932b6b..2b5f87ab0cd 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -8,7 +8,6 @@ from ..core import dtypes, duck_array_ops, indexing from ..core.pycompat import dask_array_type -from ..core.utils import equivalent from ..core.variable import Variable @@ -152,18 +151,25 @@ def encode(self, variable, name=None): fv = encoding.get("_FillValue") mv = encoding.get("missing_value") - if fv is not None and mv is not None and not equivalent(fv, mv): + if ( + fv is not None + and mv is not None + and not duck_array_ops.allclose_or_equiv(fv, mv) + ): raise ValueError( - "Variable {!r} has multiple fill values {}. " - "Cannot encode data. ".format(name, [fv, mv]) + f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data." ) if fv is not None: + # Ensure _FillValue is cast to same dtype as data's + encoding["_FillValue"] = data.dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) if mv is not None: + # Ensure missing_value is cast to same dtype as data's + encoding["missing_value"] = data.dtype.type(mv) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and fv is None: data = duck_array_ops.fillna(data, fill_value) diff --git a/xarray/core/common.py b/xarray/core/common.py index d372115ea57..2afe4b4c3a7 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -43,14 +43,12 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): - return self.reduce( - func, dim, axis, skipna=skipna, allow_lazy=True, **kwargs - ) + return self.reduce(func, dim, axis, skipna=skipna, **kwargs) else: def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore - return self.reduce(func, dim, axis, allow_lazy=True, **kwargs) + return self.reduce(func, dim, axis, **kwargs) return wrapped_func @@ -83,20 +81,13 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool def wrapped_func(self, dim=None, skipna=None, **kwargs): return self.reduce( - func, - dim, - skipna=skipna, - numeric_only=numeric_only, - allow_lazy=True, - **kwargs, + func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs ) else: def wrapped_func(self, dim=None, **kwargs): # type: ignore - return self.reduce( - func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs - ) + return self.reduce(func, dim, numeric_only=numeric_only, **kwargs) return wrapped_func diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f0b5afdf4d5..700ef2a8d16 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -48,7 +48,7 @@ assert_coordinate_consistent, remap_label_indexers, ) -from .dataset import Dataset, merge_indexes, split_indexes +from .dataset import Dataset, split_indexes from .formatting import format_item from .indexes import Indexes, copy_indexes, default_indexes from .merge import PANDAS_TYPES, _extract_indexes_from_coords @@ -249,14 +249,14 @@ class DataArray(AbstractArray, DataWithCoords): Dictionary for holding arbitrary metadata. """ - _accessors: Optional[Dict[str, Any]] # noqa + _cache: Dict[str, Any] _coords: Dict[Any, Variable] _indexes: Optional[Dict[Hashable, pd.Index]] _name: Optional[Hashable] _variable: Variable __slots__ = ( - "_accessors", + "_cache", "_coords", "_file_obj", "_indexes", @@ -376,7 +376,6 @@ def __init__( assert isinstance(coords, dict) self._coords = coords self._name = name - self._accessors = None # TODO(shoyer): document this argument, once it becomes part of the # public interface. @@ -772,7 +771,9 @@ def reset_coords( return dataset def __dask_tokenize__(self): - return (type(self), self._variable, self._coords, self._name) + from dask.base import normalize_token + + return normalize_token((type(self), self._variable, self._coords, self._name)) def __dask_graph__(self): return self._to_temp_dataset().__dask_graph__() @@ -1617,10 +1618,10 @@ def set_index( -------- DataArray.reset_index """ - _check_inplace(inplace) - indexes = either_dict_or_kwargs(indexes, indexes_kwargs, "set_index") - coords, _ = merge_indexes(indexes, self._coords, set(), append=append) - return self._replace(coords=coords) + ds = self._to_temp_dataset().set_index( + indexes, append=append, inplace=inplace, **indexes_kwargs + ) + return self._from_temp_dataset(ds) def reset_index( self, @@ -1743,7 +1744,9 @@ def stack( return self._from_temp_dataset(ds) def unstack( - self, dim: Union[Hashable, Sequence[Hashable], None] = None + self, + dim: Union[Hashable, Sequence[Hashable], None] = None, + fill_value: Any = dtypes.NA, ) -> "DataArray": """ Unstack existing dimensions corresponding to MultiIndexes into @@ -1756,6 +1759,7 @@ def unstack( dim : hashable or sequence of hashable, optional Dimension(s) over which to unstack. By default unstacks all MultiIndexes. + fill_value: value to be filled. By default, np.nan Returns ------- @@ -1787,7 +1791,7 @@ def unstack( -------- DataArray.stack """ - ds = self._to_temp_dataset().unstack(dim) + ds = self._to_temp_dataset().unstack(dim, fill_value) return self._from_temp_dataset(ds) def to_unstacked_dataset(self, dim, level=0): @@ -2034,44 +2038,69 @@ def fillna(self, value: Any) -> "DataArray": def interpolate_na( self, - dim=None, + dim: Hashable = None, method: str = "linear", limit: int = None, use_coordinate: Union[bool, str] = True, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, **kwargs: Any, ) -> "DataArray": - """Interpolate values according to different methods. + """Fill in NaNs by interpolating according to different methods. Parameters ---------- dim : str Specifies the dimension along which to interpolate. - method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial', 'barycentric', 'krog', 'pchip', - 'spline', 'akima'}, optional + method : str, optional String indicating which method to use for interpolation: - 'linear': linear interpolation (Default). Additional keyword - arguments are passed to ``numpy.interp`` - - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial': are passed to ``scipy.interpolate.interp1d``. If - method=='polynomial', the ``order`` keyword argument must also be + arguments are passed to :py:func:`numpy.interp` + - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': + are passed to :py:func:`scipy.interpolate.interp1d`. If + ``method='polynomial'``, the ``order`` keyword argument must also be provided. - - 'barycentric', 'krog', 'pchip', 'spline', and `akima`: use their - respective``scipy.interpolate`` classes. - use_coordinate : boolean or str, default True + - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their + respective :py:class:`scipy.interpolate` classes. + use_coordinate : bool, str, default True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if - eqaully-spaced along `dim`. If True, the IndexVariable `dim` is - used. If use_coordinate is a string, it specifies the name of a + eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is + used. If ``use_coordinate`` is a string, it specifies the name of a coordinate variariable to use as the index. limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0 - or None for no limit. + or None for no limit. This filling is done regardless of the size of + the gap in the data. To only interpolate over gaps less than a given length, + see ``max_gap``. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + Maximum size of gap, a continuous sequence of NaNs, that will be filled. + Use None for no limit. When interpolating along a datetime64 dimension + and ``use_coordinate=True``, ``max_gap`` can be one of the following: + + - a string that is valid input for pandas.to_timedelta + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled + dimensions has not been implemented yet. Gap length is defined as the difference + between coordinate values at the first data point after a gap and the last value + before a gap. For gaps at the beginning (end), gap length is defined as the difference + between coordinate values at the first (last) valid data point and the first (last) NaN. + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + kwargs : dict, optional + parameters passed verbatim to the underlying interpolation function Returns ------- - DataArray + interpolated: DataArray + Filled in DataArray. See also -------- @@ -2086,6 +2115,7 @@ def interpolate_na( method=method, limit=limit, use_coordinate=use_coordinate, + max_gap=max_gap, **kwargs, ) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f46fe9604ff..e5f9fac9036 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -210,6 +210,7 @@ def merge_indexes( """ vars_to_replace: Dict[Hashable, Variable] = {} vars_to_remove: List[Hashable] = [] + dims_to_replace: Dict[Hashable, Hashable] = {} error_msg = "{} is not the name of an existing variable." for dim, var_names in indexes.items(): @@ -250,7 +251,7 @@ def merge_indexes( if not len(names) and len(var_names) == 1: idx = pd.Index(variables[var_names[0]].values) - else: + else: # MultiIndex for n in var_names: try: var = variables[n] @@ -262,15 +263,22 @@ def merge_indexes( levels.append(cat.categories) idx = pd.MultiIndex(levels, codes, names=names) + for n in names: + dims_to_replace[n] = dim vars_to_replace[dim] = IndexVariable(dim, idx) vars_to_remove.extend(var_names) new_variables = {k: v for k, v in variables.items() if k not in vars_to_remove} new_variables.update(vars_to_replace) + + # update dimensions if necessary GH: 3512 + for k, v in new_variables.items(): + if any(d in dims_to_replace for d in v.dims): + new_dims = [dims_to_replace.get(d, d) for d in v.dims] + new_variables[k] = v._replace(dims=new_dims) new_coord_names = coord_names | set(vars_to_replace) new_coord_names -= set(vars_to_remove) - return new_variables, new_coord_names @@ -417,8 +425,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): coordinates used for label based indexing. """ - _accessors: Optional[Dict[str, Any]] _attrs: Optional[Dict[Hashable, Any]] + _cache: Dict[str, Any] _coord_names: Set[Hashable] _dims: Dict[Hashable, int] _encoding: Optional[Dict[Hashable, Any]] @@ -426,8 +434,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): _variables: Dict[Hashable, Variable] __slots__ = ( - "_accessors", "_attrs", + "_cache", "_coord_names", "_dims", "_encoding", @@ -533,7 +541,6 @@ def __init__( data_vars, coords, compat=compat ) - self._accessors = None self._attrs = dict(attrs) if attrs is not None else None self._file_obj = None self._encoding = None @@ -658,7 +665,11 @@ def load(self, **kwargs) -> "Dataset": return self def __dask_tokenize__(self): - return (type(self), self._variables, self._coord_names, self._attrs) + from dask.base import normalize_token + + return normalize_token( + (type(self), self._variables, self._coord_names, self._attrs) + ) def __dask_graph__(self): graphs = {k: v.__dask_graph__() for k, v in self.variables.items()} @@ -864,7 +875,6 @@ def _construct_direct( obj._attrs = attrs obj._file_obj = file_obj obj._encoding = encoding - obj._accessors = None return obj @classmethod @@ -2665,7 +2675,7 @@ def _rename_indexes(self, name_dict, dims_set): verify_integrity=False, ) else: - index = pd.Index(v, name=new_name) + index = v.rename(new_name) indexes[new_name] = index return indexes @@ -3333,7 +3343,7 @@ def ensure_stackable(val): return data_array - def _unstack_once(self, dim: Hashable) -> "Dataset": + def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset": index = self.get_index(dim) index = index.remove_unused_levels() full_idx = pd.MultiIndex.from_product(index.levels, names=index.names) @@ -3342,7 +3352,7 @@ def _unstack_once(self, dim: Hashable) -> "Dataset": if index.equals(full_idx): obj = self else: - obj = self.reindex({dim: full_idx}, copy=False) + obj = self.reindex({dim: full_idx}, copy=False, fill_value=fill_value) new_dim_names = index.names new_dim_sizes = [lev.size for lev in index.levels] @@ -3368,7 +3378,11 @@ def _unstack_once(self, dim: Hashable) -> "Dataset": variables, coord_names=coord_names, indexes=indexes ) - def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset": + def unstack( + self, + dim: Union[Hashable, Iterable[Hashable]] = None, + fill_value: Any = dtypes.NA, + ) -> "Dataset": """ Unstack existing dimensions corresponding to MultiIndexes into multiple new dimensions. @@ -3380,6 +3394,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset": dim : Hashable or iterable of Hashable, optional Dimension(s) over which to unstack. By default unstacks all MultiIndexes. + fill_value: value to be filled. By default, np.nan Returns ------- @@ -3417,7 +3432,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset": result = self.copy(deep=False) for dim in dims: - result = result._unstack_once(dim) + result = result._unstack_once(dim, fill_value) return result def update(self, other: "CoercibleMapping", inplace: bool = None) -> "Dataset": @@ -3906,42 +3921,65 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, Hashable] = True, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, **kwargs: Any, ) -> "Dataset": - """Interpolate values according to different methods. + """Fill in NaNs by interpolating according to different methods. Parameters ---------- - dim : Hashable + dim : str Specifies the dimension along which to interpolate. - method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial', 'barycentric', 'krog', 'pchip', - 'spline'}, optional + method : str, optional String indicating which method to use for interpolation: - 'linear': linear interpolation (Default). Additional keyword - arguments are passed to ``numpy.interp`` - - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial': are passed to ``scipy.interpolate.interp1d``. If - method=='polynomial', the ``order`` keyword argument must also be + arguments are passed to :py:func:`numpy.interp` + - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': + are passed to :py:func:`scipy.interpolate.interp1d`. If + ``method='polynomial'``, the ``order`` keyword argument must also be provided. - - 'barycentric', 'krog', 'pchip', 'spline': use their respective - ``scipy.interpolate`` classes. - use_coordinate : boolean or str, default True + - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their + respective :py:class:`scipy.interpolate` classes. + use_coordinate : bool, str, default True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if - eqaully-spaced along `dim`. If True, the IndexVariable `dim` is - used. If use_coordinate is a string, it specifies the name of a + eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is + used. If ``use_coordinate`` is a string, it specifies the name of a coordinate variariable to use as the index. limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0 - or None for no limit. - kwargs : any - parameters passed verbatim to the underlying interplation function + or None for no limit. This filling is done regardless of the size of + the gap in the data. To only interpolate over gaps less than a given length, + see ``max_gap``. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + Maximum size of gap, a continuous sequence of NaNs, that will be filled. + Use None for no limit. When interpolating along a datetime64 dimension + and ``use_coordinate=True``, ``max_gap`` can be one of the following: + + - a string that is valid input for pandas.to_timedelta + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled + dimensions has not been implemented yet. Gap length is defined as the difference + between coordinate values at the first data point after a gap and the last value + before a gap. For gaps at the beginning (end), gap length is defined as the difference + between coordinate values at the first (last) valid data point and the first (last) NaN. + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + kwargs : dict, optional + parameters passed verbatim to the underlying interpolation function Returns ------- - Dataset + interpolated: Dataset + Filled in Dataset. See also -------- @@ -3957,6 +3995,7 @@ def interpolate_na( method=method, limit=limit, use_coordinate=use_coordinate, + max_gap=max_gap, **kwargs, ) return new @@ -4037,7 +4076,7 @@ def reduce( keep_attrs: bool = None, keepdims: bool = False, numeric_only: bool = False, - allow_lazy: bool = False, + allow_lazy: bool = None, **kwargs: Any, ) -> "Dataset": """Reduce this dataset by applying `func` along some dimension(s). diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index f473eaa497d..79abbccea39 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -20,10 +20,15 @@ def __get__(self, obj, cls): # we're accessing the attribute of the class, i.e., Dataset.geo return self._accessor + # Use the same dict as @pandas.util.cache_readonly. + # It must be explicitly declared in obj.__slots__. try: - return obj._accessors[self._name] - except TypeError: - obj._accessors = {} + cache = obj._cache + except AttributeError: + cache = obj._cache = {} + + try: + return cache[self._name] except KeyError: pass @@ -35,7 +40,7 @@ def __get__(self, obj, cls): # something else (GH933): raise RuntimeError("error initializing %r accessor." % self._name) - obj._accessors[self._name] = accessor_obj + cache[self._name] = accessor_obj return accessor_obj diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 37ab29dabd2..5606b5d754d 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -558,6 +558,59 @@ def fillna(self, value): out = ops.fillna(self, value) return out + def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + """Compute the qth quantile over each array in the groups and + concatenate them together into a new array. + + Parameters + ---------- + q : float in range of [0,1] (or sequence of floats) + Quantile to compute, which must be between 0 and 1 + inclusive. + dim : `...`, str or sequence of str, optional + Dimension(s) over which to apply quantile. + Defaults to the grouped dimension. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` is + the fractional part of the index surrounded by ``i`` and + ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + + Returns + ------- + quantiles : Variable + If `q` is a single quantile, then the result is a + scalar. If multiple percentiles are given, first axis of + the result corresponds to the quantile. In either case a + quantile dimension is added to the return array. The other + dimensions are the dimensions that remain after the + reduction of the array. + + See Also + -------- + numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, + DataArray.quantile + """ + if dim is None: + dim = self._group_dim + + out = self.map( + self._obj.__class__.quantile, + shortcut=False, + q=q, + dim=dim, + interpolation=interpolation, + keep_attrs=keep_attrs, + ) + + return out + def where(self, cond, other=dtypes.NA): """Return elements from `self` or `other` depending on `cond`. @@ -586,9 +639,7 @@ def _first_or_last(self, op, skipna, keep_attrs): return self._obj if keep_attrs is None: keep_attrs = _get_keep_attrs(default=True) - return self.reduce( - op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True - ) + return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs) def first(self, skipna=None, keep_attrs=None): """Return the first element of each group along the group dimension @@ -741,60 +792,6 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False): combined = self._maybe_unstack(combined) return combined - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): - """Compute the qth quantile over each array in the groups and - concatenate them together into a new array. - - Parameters - ---------- - q : float in range of [0,1] (or sequence of floats) - Quantile to compute, which must be between 0 and 1 - inclusive. - dim : `...`, str or sequence of str, optional - Dimension(s) over which to apply quantile. - Defaults to the grouped dimension. - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. - - Returns - ------- - quantiles : Variable - If `q` is a single quantile, then the result - is a scalar. If multiple percentiles are given, first axis of - the result corresponds to the quantile and a quantile dimension - is added to the return array. The other dimensions are the - dimensions that remain after the reduction of the array. - - See Also - -------- - numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, - DataArray.quantile - """ - if dim is None: - dim = self._group_dim - - out = self.map( - self._obj.__class__.quantile, - shortcut=False, - q=q, - dim=dim, - interpolation=interpolation, - keep_attrs=keep_attrs, - ) - - if np.asarray(q, dtype=np.float64).ndim == 0: - out = out.drop_vars("quantile") - return out - def reduce( self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs ): diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 77dde66484e..117fcaf8f81 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -1,18 +1,46 @@ import warnings from functools import partial -from typing import Any, Callable, Dict, Sequence +from numbers import Number +from typing import Any, Callable, Dict, Hashable, Sequence, Union import numpy as np import pandas as pd from . import utils -from .common import _contains_datetime_like_objects +from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc from .duck_array_ops import dask_array_type from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables +def _get_nan_block_lengths(obj, dim: Hashable, index: Variable): + """ + Return an object where each NaN element in 'obj' is replaced by the + length of the gap the element is in. + """ + + # make variable so that we get broadcasting for free + index = Variable([dim], index) + + # algorithm from https://github.com/pydata/xarray/pull/3302#discussion_r324707072 + arange = ones_like(obj) * index + valid = obj.notnull() + valid_arange = arange.where(valid) + cumulative_nans = valid_arange.ffill(dim=dim).fillna(index[0]) + + nan_block_lengths = ( + cumulative_nans.diff(dim=dim, label="upper") + .reindex({dim: obj[dim]}) + .where(valid) + .bfill(dim=dim) + .where(~valid, 0) + .fillna(index[-1] - valid_arange.max()) + ) + + return nan_block_lengths + + class BaseInterpolator: """Generic interpolator class for normalizing interpolation methods """ @@ -178,7 +206,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): return ds -def get_clean_interp_index(arr, dim, use_coordinate=True): +def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True): """get index to use for x values in interpolation. If use_coordinate is True, the coordinate that shares the name of the @@ -195,23 +223,33 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): index = arr.coords[use_coordinate] if index.ndim != 1: raise ValueError( - "Coordinates used for interpolation must be 1D, " - "%s is %dD." % (use_coordinate, index.ndim) + f"Coordinates used for interpolation must be 1D, " + f"{use_coordinate} is {index.ndim}D." ) + index = index.to_index() + + # TODO: index.name is None for multiindexes + # set name for nice error messages below + if isinstance(index, pd.MultiIndex): + index.name = dim + + if not index.is_monotonic: + raise ValueError(f"Index {index.name!r} must be monotonically increasing") + + if not index.is_unique: + raise ValueError(f"Index {index.name!r} has duplicate values") # raise if index cannot be cast to a float (e.g. MultiIndex) try: index = index.values.astype(np.float64) except (TypeError, ValueError): # pandas raises a TypeError - # xarray/nuppy raise a ValueError + # xarray/numpy raise a ValueError raise TypeError( - "Index must be castable to float64 to support" - "interpolation, got: %s" % type(index) + f"Index {index.name!r} must be castable to float64 to support " + f"interpolation, got {type(index).__name__}." ) - # check index sorting now so we can skip it later - if not (np.diff(index) > 0).all(): - raise ValueError("Index must be monotonicly increasing") + else: axis = arr.get_axis_num(dim) index = np.arange(arr.shape[axis], dtype=np.float64) @@ -220,7 +258,13 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): def interp_na( - self, dim=None, use_coordinate=True, method="linear", limit=None, **kwargs + self, + dim: Hashable = None, + use_coordinate: Union[bool, str] = True, + method: str = "linear", + limit: int = None, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, + **kwargs, ): """Interpolate values according to different methods. """ @@ -230,6 +274,40 @@ def interp_na( if limit is not None: valids = _get_valid_fill_mask(self, dim, limit) + if max_gap is not None: + max_type = type(max_gap).__name__ + if not is_scalar(max_gap): + raise ValueError("max_gap must be a scalar.") + + if ( + dim in self.indexes + and isinstance(self.indexes[dim], pd.DatetimeIndex) + and use_coordinate + ): + if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)): + raise TypeError( + f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}" + ) + + if isinstance(max_gap, str): + try: + max_gap = pd.to_timedelta(max_gap) + except ValueError: + raise ValueError( + f"Could not convert {max_gap!r} to timedelta64 using pandas.to_timedelta" + ) + + if isinstance(max_gap, pd.Timedelta): + max_gap = np.timedelta64(max_gap.value, "ns") + + max_gap = np.timedelta64(max_gap, "ns").astype(np.float64) + + if not use_coordinate: + if not isinstance(max_gap, (Number, np.number)): + raise TypeError( + f"Expected integer or floating point max_gap since use_coordinate=False. Received {max_type}." + ) + # method index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate) interp_class, kwargs = _get_interpolator(method, **kwargs) @@ -253,6 +331,14 @@ def interp_na( if limit is not None: arr = arr.where(valids) + if max_gap is not None: + if dim not in self.coords: + raise NotImplementedError( + "max_gap not implemented for unlabeled coordinates yet." + ) + nan_block_lengths = _get_nan_block_lengths(self, dim, index) + arr = arr.where(nan_block_lengths <= max_gap) + return arr diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index f4e571a8efe..a1864332f4d 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,4 +1,5 @@ import functools +import warnings from typing import Callable import numpy as np @@ -351,6 +352,14 @@ def _bottleneck_reduce(self, func, **kwargs): def _numpy_or_bottleneck_reduce( self, array_agg_func, bottleneck_move_func, **kwargs ): + if "dim" in kwargs: + warnings.warn( + f"Reductions will be applied along the rolling dimension '{self.dim}'. Passing the 'dim' kwarg to reduction operations has no effect and will raise an error in xarray 0.16.0.", + DeprecationWarning, + stacklevel=3, + ) + del kwargs["dim"] + if bottleneck_move_func is not None and not isinstance( self.obj.data, dask_array_type ): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 916df75b3e0..e630dc4b457 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1,5 +1,7 @@ +import copy import functools import itertools +import warnings from collections import defaultdict from datetime import timedelta from distutils.version import LooseVersion @@ -23,10 +25,11 @@ from .pycompat import dask_array_type, integer_types from .utils import ( OrderedSet, + _default, decode_numpy_dict_values, either_dict_or_kwargs, - infix_dims, ensure_us_time_resolution, + infix_dims, ) try: @@ -393,7 +396,9 @@ def compute(self, **kwargs): def __dask_tokenize__(self): # Use v.data, instead of v._data, in order to cope with the wrappers # around NetCDF and the like - return type(self), self._dims, self.data, self._attrs + from dask.base import normalize_token + + return normalize_token((type(self), self._dims, self.data, self._attrs)) def __dask_graph__(self): if isinstance(self._data, dask_array_type): @@ -884,7 +889,20 @@ def copy(self, deep=True, data=None): # note: # dims is already an immutable tuple # attributes and encoding will be copied when the new Array is created - return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True) + return self._replace(data=data) + + def _replace( + self, dims=_default, data=_default, attrs=_default, encoding=_default + ) -> "Variable": + if dims is _default: + dims = copy.copy(self._dims) + if data is _default: + data = copy.copy(self.data) + if attrs is _default: + attrs = copy.copy(self._attrs) + if encoding is _default: + encoding = copy.copy(self._encoding) + return type(self)(dims, data, attrs, encoding, fastpath=True) def __copy__(self): return self.copy(deep=False) @@ -1425,7 +1443,7 @@ def reduce( axis=None, keep_attrs=None, keepdims=False, - allow_lazy=False, + allow_lazy=None, **kwargs, ): """Reduce this array by applying `func` along some dimension(s). @@ -1466,7 +1484,17 @@ def reduce( if dim is not None: axis = self.get_axis_num(dim) + + if allow_lazy is not None: + warnings.warn( + "allow_lazy is deprecated and will be removed in version 0.16.0. It is now True by default.", + DeprecationWarning, + ) + else: + allow_lazy = True + input_data = self.data if allow_lazy else self.values + if axis is not None: data = func(input_data, axis=axis, **kwargs) else: @@ -1973,8 +2001,10 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): self._data = PandasIndexAdapter(self._data) def __dask_tokenize__(self): + from dask.base import normalize_token + # Don't waste time converting pd.Index to np.ndarray - return (type(self), self._dims, self._data.array, self._attrs) + return normalize_token((type(self), self._dims, self._data.array, self._attrs)) def load(self): # data is already loaded into memory for IndexVariable diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 6cd584daa96..3e0474e7b60 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -20,6 +20,23 @@ def test_CFMaskCoder_decode(): assert_identical(expected, encoded) +def test_CFMaskCoder_encode_missing_fill_values_conflict(): + original = xr.Variable( + ("x",), + [0.0, -1.0, 1.0], + encoding={"_FillValue": np.float32(1e20), "missing_value": np.float64(1e20)}, + ) + coder = variables.CFMaskCoder() + encoded = coder.encode(original) + + assert encoded.dtype == encoded.attrs["missing_value"].dtype + assert encoded.dtype == encoded.attrs["_FillValue"].dtype + + with pytest.warns(variables.SerializationWarning): + roundtripped = coder.decode(coder.encode(original)) + assert_identical(roundtripped, original) + + def test_CFMaskCoder_missing_value(): expected = xr.DataArray( np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]), diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index fa8ae9991d7..4c1f317342f 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -12,6 +12,7 @@ import xarray as xr import xarray.ufuncs as xu from xarray import DataArray, Dataset, Variable +from xarray.core import duck_array_ops from xarray.testing import assert_chunks_equal from xarray.tests import mock @@ -217,6 +218,8 @@ def test_reduce(self): self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x")) with raises_regex(NotImplementedError, "dask"): v.median() + with raise_if_dask_computes(): + v.reduce(duck_array_ops.mean) def test_missing_values(self): values = np.array([0, 1, np.nan, 3]) @@ -488,7 +491,17 @@ def test_groupby(self): v = self.lazy_array expected = u.groupby("x").mean(...) - actual = v.groupby("x").mean(...) + with raise_if_dask_computes(): + actual = v.groupby("x").mean(...) + self.assertLazyAndAllClose(expected, actual) + + def test_rolling(self): + u = self.eager_array + v = self.lazy_array + + expected = u.rolling(x=2).mean() + with raise_if_dask_computes(): + actual = v.rolling(x=2).mean() self.assertLazyAndAllClose(expected, actual) def test_groupby_first(self): @@ -500,7 +513,8 @@ def test_groupby_first(self): with raises_regex(NotImplementedError, "dask"): v.groupby("ab").first() expected = u.groupby("ab").first() - actual = v.groupby("ab").first(skipna=False) + with raise_if_dask_computes(): + actual = v.groupby("ab").first(skipna=False) self.assertLazyAndAllClose(expected, actual) def test_reindex(self): @@ -1283,6 +1297,32 @@ def test_token_identical(obj, transform): ) +def test_recursive_token(): + """Test that tokenization is invoked recursively, and doesn't just rely on the + output of str() + """ + a = np.ones(10000) + b = np.ones(10000) + b[5000] = 2 + assert str(a) == str(b) + assert dask.base.tokenize(a) != dask.base.tokenize(b) + + # Test DataArray and Variable + da_a = DataArray(a) + da_b = DataArray(b) + assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b) + + # Test Dataset + ds_a = da_a.to_dataset(name="x") + ds_b = da_b.to_dataset(name="x") + assert dask.base.tokenize(ds_a) != dask.base.tokenize(ds_b) + + # Test IndexVariable + da_a = DataArray(a, dims=["x"], coords={"x": a}) + da_b = DataArray(a, dims=["x"], coords={"x": b}) + assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b) + + @requires_scipy_or_netCDF4 def test_normalize_token_with_backend(map_ds): with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file: diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b0cb9d672d8..35d556c9ef4 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1183,6 +1183,16 @@ def test_selection_multiindex_remove_unused(self): expected = expected.set_index(xy=["x", "y"]).unstack() assert_identical(expected, actual) + def test_selection_multiindex_from_level(self): + # GH: 3512 + da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"}) + db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"}) + data = xr.concat([da, db], dim="x").set_index(xy=["x", "y"]) + assert data.dims == ("xy",) + actual = data.sel(y="a") + expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y").drop("y") + assert_equal(actual, expected) + def test_virtual_default_coords(self): array = DataArray(np.zeros((5,)), dims="x") expected = DataArray(range(5), dims="x", name="x") @@ -4190,6 +4200,9 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods): ) assert_array_equal(actual.values, expected) + with pytest.warns(DeprecationWarning, match="Reductions will be applied"): + getattr(rolling_obj, name)(dim="time") + # Test center rolling_obj = da.rolling(time=7, center=center) actual = getattr(rolling_obj, name)()["time"] @@ -4205,6 +4218,9 @@ def test_rolling_wrapped_dask(da_dask, name, center, min_periods, window): # dask version rolling_obj = da_dask.rolling(time=window, min_periods=min_periods, center=center) actual = getattr(rolling_obj, name)().load() + if name != "count": + with pytest.warns(DeprecationWarning, match="Reductions will be applied"): + getattr(rolling_obj, name)(dim="time") # numpy version rolling_obj = da_dask.load().rolling( time=window, min_periods=min_periods, center=center diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d001c43da94..be40ce7c6e8 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd import pytest +from pandas.core.indexes.datetimes import DatetimeIndex import xarray as xr from xarray import ( @@ -22,6 +23,7 @@ open_dataset, set_options, ) +from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import dtypes, indexing, utils from xarray.core.common import duck_array_ops, full_like from xarray.core.npcompat import IS_NEP18_ACTIVE @@ -90,6 +92,14 @@ def create_append_test_data(seed=None): string_var = np.array(["ae", "bc", "df"], dtype=object) string_var_to_append = np.array(["asdf", "asdfg"], dtype=object) unicode_var = ["áó", "áó", "áó"] + datetime_var = np.array( + ["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[s]" + ) + datetime_var_to_append = np.array( + ["2019-01-04", "2019-01-05"], dtype="datetime64[s]" + ) + bool_var = np.array([True, False, True], dtype=np.bool) + bool_var_to_append = np.array([False, True], dtype=np.bool) ds = xr.Dataset( data_vars={ @@ -102,6 +112,8 @@ def create_append_test_data(seed=None): "unicode_var": xr.DataArray( unicode_var, coords=[time1], dims=["time"] ).astype(np.unicode_), + "datetime_var": xr.DataArray(datetime_var, coords=[time1], dims=["time"]), + "bool_var": xr.DataArray(bool_var, coords=[time1], dims=["time"]), } ) @@ -118,6 +130,10 @@ def create_append_test_data(seed=None): "unicode_var": xr.DataArray( unicode_var[:nt2], coords=[time2], dims=["time"] ).astype(np.unicode_), + "datetime_var": xr.DataArray( + datetime_var_to_append, coords=[time2], dims=["time"] + ), + "bool_var": xr.DataArray(bool_var_to_append, coords=[time2], dims=["time"]), } ) @@ -2444,6 +2460,53 @@ def test_rename_vars(self): with pytest.raises(ValueError): original.rename_vars(names_dict_bad) + @requires_cftime + def test_rename_does_not_change_CFTimeIndex_type(self): + # make sure CFTimeIndex is not converted to DatetimeIndex #3522 + + time = xr.cftime_range(start="2000", periods=6, freq="2MS", calendar="noleap") + orig = Dataset(coords={"time": time}) + + renamed = orig.rename(time="time_new") + assert "time_new" in renamed.indexes + assert isinstance(renamed.indexes["time_new"], CFTimeIndex) + assert renamed.indexes["time_new"].name == "time_new" + + # check original has not changed + assert "time" in orig.indexes + assert isinstance(orig.indexes["time"], CFTimeIndex) + assert orig.indexes["time"].name == "time" + + # note: rename_dims(time="time_new") drops "ds.indexes" + renamed = orig.rename_dims() + assert isinstance(renamed.indexes["time"], CFTimeIndex) + + renamed = orig.rename_vars() + assert isinstance(renamed.indexes["time"], CFTimeIndex) + + def test_rename_does_not_change_DatetimeIndex_type(self): + # make sure DatetimeIndex is conderved on rename + + time = pd.date_range(start="2000", periods=6, freq="2MS") + orig = Dataset(coords={"time": time}) + + renamed = orig.rename(time="time_new") + assert "time_new" in renamed.indexes + assert isinstance(renamed.indexes["time_new"], DatetimeIndex) + assert renamed.indexes["time_new"].name == "time_new" + + # check original has not changed + assert "time" in orig.indexes + assert isinstance(orig.indexes["time"], DatetimeIndex) + assert orig.indexes["time"].name == "time" + + # note: rename_dims(time="time_new") drops "ds.indexes" + renamed = orig.rename_dims() + assert isinstance(renamed.indexes["time"], DatetimeIndex) + + renamed = orig.rename_vars() + assert isinstance(renamed.indexes["time"], DatetimeIndex) + def test_swap_dims(self): original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42}) expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": list("abc")}) @@ -2731,6 +2794,23 @@ def test_unstack_errors(self): with raises_regex(ValueError, "do not have a MultiIndex"): ds.unstack("x") + def test_unstack_fill_value(self): + ds = xr.Dataset( + {"var": (("x",), np.arange(6))}, + coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)}, + ) + # make ds incomplete + ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"]) + # test fill_value + actual = ds.unstack("index", fill_value=-1) + expected = ds.unstack("index").fillna(-1).astype(np.int) + assert actual["var"].dtype == np.int + assert_equal(actual, expected) + + actual = ds["var"].unstack("index", fill_value=-1) + expected = ds["var"].unstack("index").fillna(-1).astype(np.int) + assert actual.equals(expected) + def test_stack_unstack_fast(self): ds = Dataset( { diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 581affa3471..97bd31ae050 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -137,42 +137,58 @@ def test_da_groupby_empty(): def test_da_groupby_quantile(): - array = xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])]) + array = xr.DataArray( + data=[1, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x" + ) # Scalar quantile - expected = xr.DataArray([2, 5], [("x", [1, 2])]) + expected = xr.DataArray( + data=[2, 5], coords={"x": [1, 2], "quantile": 0.5}, dims="x" + ) actual = array.groupby("x").quantile(0.5) assert_identical(expected, actual) # Vector quantile - expected = xr.DataArray([[1, 3], [4, 6]], [("x", [1, 2]), ("quantile", [0, 1])]) + expected = xr.DataArray( + data=[[1, 3], [4, 6]], + coords={"x": [1, 2], "quantile": [0, 1]}, + dims=("x", "quantile"), + ) actual = array.groupby("x").quantile([0, 1]) assert_identical(expected, actual) # Multiple dimensions array = xr.DataArray( - [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], - [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])], + data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], + coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]}, + dims=("x", "y"), ) actual_x = array.groupby("x").quantile(0, dim=...) - expected_x = xr.DataArray([1, 4], [("x", [1, 2])]) + expected_x = xr.DataArray( + data=[1, 4], coords={"x": [1, 2], "quantile": 0}, dims="x" + ) assert_identical(expected_x, actual_x) actual_y = array.groupby("y").quantile(0, dim=...) - expected_y = xr.DataArray([1, 22], [("y", [0, 1])]) + expected_y = xr.DataArray( + data=[1, 22], coords={"y": [0, 1], "quantile": 0}, dims="y" + ) assert_identical(expected_y, actual_y) actual_xx = array.groupby("x").quantile(0) expected_xx = xr.DataArray( - [[1, 11, 22], [4, 15, 24]], [("x", [1, 2]), ("y", [0, 0, 1])] + data=[[1, 11, 22], [4, 15, 24]], + coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0}, + dims=("x", "y"), ) assert_identical(expected_xx, actual_xx) actual_yy = array.groupby("y").quantile(0) expected_yy = xr.DataArray( - [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], - [("x", [1, 1, 1, 2, 2]), ("y", [0, 1])], + data=[[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], + coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0}, + dims=("x", "y"), ) assert_identical(expected_yy, actual_yy) @@ -180,14 +196,14 @@ def test_da_groupby_quantile(): x = [0, 1] foo = xr.DataArray( np.reshape(np.arange(365 * 2), (365, 2)), - coords=dict(time=times, x=x), + coords={"time": times, "x": x}, dims=("time", "x"), ) g = foo.groupby(foo.time.dt.month) actual = g.quantile(0, dim=...) expected = xr.DataArray( - [ + data=[ 0.0, 62.0, 120.0, @@ -201,12 +217,111 @@ def test_da_groupby_quantile(): 610.0, 670.0, ], - [("month", np.arange(1, 13))], + coords={"month": np.arange(1, 13), "quantile": 0}, + dims="month", ) assert_identical(expected, actual) actual = g.quantile(0, dim="time")[:2] - expected = xr.DataArray([[0.0, 1], [62.0, 63]], [("month", [1, 2]), ("x", [0, 1])]) + expected = xr.DataArray( + data=[[0.0, 1], [62.0, 63]], + coords={"month": [1, 2], "x": [0, 1], "quantile": 0}, + dims=("month", "x"), + ) + assert_identical(expected, actual) + + +def test_ds_groupby_quantile(): + ds = xr.Dataset( + data_vars={"a": ("x", [1, 2, 3, 4, 5, 6])}, coords={"x": [1, 1, 1, 2, 2, 2]} + ) + + # Scalar quantile + expected = xr.Dataset( + data_vars={"a": ("x", [2, 5])}, coords={"quantile": 0.5, "x": [1, 2]} + ) + actual = ds.groupby("x").quantile(0.5) + assert_identical(expected, actual) + + # Vector quantile + expected = xr.Dataset( + data_vars={"a": (("x", "quantile"), [[1, 3], [4, 6]])}, + coords={"x": [1, 2], "quantile": [0, 1]}, + ) + actual = ds.groupby("x").quantile([0, 1]) + assert_identical(expected, actual) + + # Multiple dimensions + ds = xr.Dataset( + data_vars={ + "a": ( + ("x", "y"), + [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], + ) + }, + coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]}, + ) + + actual_x = ds.groupby("x").quantile(0, dim=...) + expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2], "quantile": 0}) + assert_identical(expected_x, actual_x) + + actual_y = ds.groupby("y").quantile(0, dim=...) + expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1], "quantile": 0}) + assert_identical(expected_y, actual_y) + + actual_xx = ds.groupby("x").quantile(0) + expected_xx = xr.Dataset( + {"a": (("x", "y"), [[1, 11, 22], [4, 15, 24]])}, + coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0}, + ) + assert_identical(expected_xx, actual_xx) + + actual_yy = ds.groupby("y").quantile(0) + expected_yy = xr.Dataset( + {"a": (("x", "y"), [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]])}, + coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0}, + ).transpose() + assert_identical(expected_yy, actual_yy) + + times = pd.date_range("2000-01-01", periods=365) + x = [0, 1] + foo = xr.Dataset( + {"a": (("time", "x"), np.reshape(np.arange(365 * 2), (365, 2)))}, + coords=dict(time=times, x=x), + ) + g = foo.groupby(foo.time.dt.month) + + actual = g.quantile(0, dim=...) + expected = xr.Dataset( + { + "a": ( + "month", + [ + 0.0, + 62.0, + 120.0, + 182.0, + 242.0, + 304.0, + 364.0, + 426.0, + 488.0, + 548.0, + 610.0, + 670.0, + ], + ) + }, + coords={"month": np.arange(1, 13), "quantile": 0}, + ) + assert_identical(expected, actual) + + actual = g.quantile(0, dim="time").isel(month=slice(None, 2)) + expected = xr.Dataset( + data_vars={"a": (("month", "x"), [[0.0, 1], [62.0, 63]])}, + coords={"month": [1, 2], "x": [0, 1], "quantile": 0}, + ) assert_identical(expected, actual) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index cfce5d6f645..0b410383a34 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -5,7 +5,13 @@ import pytest import xarray as xr -from xarray.core.missing import NumpyInterpolator, ScipyInterpolator, SplineInterpolator +from xarray.core.missing import ( + NumpyInterpolator, + ScipyInterpolator, + SplineInterpolator, + get_clean_interp_index, + _get_nan_block_lengths, +) from xarray.core.pycompat import dask_array_type from xarray.tests import ( assert_array_equal, @@ -153,7 +159,7 @@ def test_interpolate_pd_compat_polynomial(): def test_interpolate_unsorted_index_raises(): vals = np.array([1, 2, 3], dtype=np.float64) expected = xr.DataArray(vals, dims="x", coords={"x": [2, 1, 3]}) - with raises_regex(ValueError, "Index must be monotonicly increasing"): + with raises_regex(ValueError, "Index 'x' must be monotonically increasing"): expected.interpolate_na(dim="x", method="index") @@ -169,12 +175,19 @@ def test_interpolate_invalid_interpolator_raises(): da.interpolate_na(dim="x", method="foo") +def test_interpolate_duplicate_values_raises(): + data = np.random.randn(2, 3) + da = xr.DataArray(data, coords=[("x", ["a", "a"]), ("y", [0, 1, 2])]) + with raises_regex(ValueError, "Index 'x' has duplicate values"): + da.interpolate_na(dim="x", method="foo") + + def test_interpolate_multiindex_raises(): data = np.random.randn(2, 3) data[1, 1] = np.nan da = xr.DataArray(data, coords=[("x", ["a", "b"]), ("y", [0, 1, 2])]) das = da.stack(z=("x", "y")) - with raises_regex(TypeError, "Index must be castable to float64"): + with raises_regex(TypeError, "Index 'z' must be castable to float64"): das.interpolate_na(dim="z") @@ -439,3 +452,114 @@ def test_ffill_dataset(ds): @requires_bottleneck def test_bfill_dataset(ds): ds.ffill(dim="time") + + +@requires_bottleneck +@pytest.mark.parametrize( + "y, lengths", + [ + [np.arange(9), [[3, 3, 3, 0, 3, 3, 0, 2, 2]]], + [np.arange(9) * 3, [[9, 9, 9, 0, 9, 9, 0, 6, 6]]], + [[0, 2, 5, 6, 7, 8, 10, 12, 14], [[6, 6, 6, 0, 4, 4, 0, 4, 4]]], + ], +) +def test_interpolate_na_nan_block_lengths(y, lengths): + arr = [[np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan]] + da = xr.DataArray(arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_nan_block_lengths(da, dim="y", index=index) + expected = da.copy(data=lengths * 2) + assert_equal(actual, expected) + + +@pytest.fixture +def da_time(): + return xr.DataArray( + [np.nan, 1, 2, np.nan, np.nan, 5, np.nan, np.nan, np.nan, np.nan, 10], + dims=["t"], + ) + + +def test_interpolate_na_max_gap_errors(da_time): + with raises_regex( + NotImplementedError, "max_gap not implemented for unlabeled coordinates" + ): + da_time.interpolate_na("t", max_gap=1) + + with raises_regex(ValueError, "max_gap must be a scalar."): + da_time.interpolate_na("t", max_gap=(1,)) + + da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11) + with raises_regex(TypeError, "Underlying index is"): + da_time.interpolate_na("t", max_gap=1) + + with raises_regex(TypeError, "Expected integer or floating point"): + da_time.interpolate_na("t", max_gap="1H", use_coordinate=False) + + with raises_regex(ValueError, "Could not convert 'huh' to timedelta64"): + da_time.interpolate_na("t", max_gap="huh") + + +@requires_bottleneck +@pytest.mark.parametrize( + "time_range_func", + [pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)], +) +@pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")]) +@pytest.mark.parametrize( + "max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")] +) +def test_interpolate_na_max_gap_time_specifier( + da_time, max_gap, transform, time_range_func +): + da_time["t"] = time_range_func("2001-01-01", freq="H", periods=11) + expected = transform( + da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10]) + ) + actual = transform(da_time).interpolate_na("t", max_gap=max_gap) + assert_equal(actual, expected) + + +@requires_bottleneck +@pytest.mark.parametrize( + "coords", + [ + pytest.param(None, marks=pytest.mark.xfail()), + {"x": np.arange(4), "y": np.arange(11)}, + ], +) +def test_interpolate_na_2d(coords): + da = xr.DataArray( + [ + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + ], + dims=["x", "y"], + coords=coords, + ) + + actual = da.interpolate_na("y", max_gap=2) + expected_y = da.copy( + data=[ + [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11], + ] + ) + assert_equal(actual, expected_y) + + actual = da.interpolate_na("x", max_gap=3) + expected_x = xr.DataArray( + [ + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + ], + dims=["x", "y"], + coords=coords, + ) + assert_equal(actual, expected_x) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index a31da162487..a02fef2faeb 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -856,6 +856,10 @@ def test_dask_token(): import dask s = sparse.COO.from_numpy(np.array([0, 0, 1, 2])) + + # https://github.com/pydata/sparse/issues/300 + s.__dask_tokenize__ = lambda: dask.base.normalize_token(s.__dict__) + a = DataArray(s) t1 = dask.base.tokenize(a) t2 = dask.base.tokenize(a) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index fd9e9b039ac..0be6f8af464 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -222,7 +222,9 @@ def convert_units(obj, to): if name != obj.name } - new_obj = xr.DataArray(name=name, data=data, coords=coords, attrs=obj.attrs) + new_obj = xr.DataArray( + name=name, data=data, coords=coords, attrs=obj.attrs, dims=obj.dims + ) elif isinstance(obj, unit_registry.Quantity): units = to.get(None) new_obj = obj.to(units) if units is not None else obj @@ -307,19 +309,689 @@ def __repr__(self): class function: - def __init__(self, name): - self.name = name - self.func = getattr(np, name) + def __init__(self, name_or_function, *args, **kwargs): + if callable(name_or_function): + self.name = name_or_function.__name__ + self.func = name_or_function + else: + self.name = name_or_function + self.func = getattr(np, name_or_function) + if self.func is None: + raise AttributeError( + f"module 'numpy' has no attribute named '{self.name}'" + ) + + self.args = args + self.kwargs = kwargs def __call__(self, *args, **kwargs): - return self.func(*args, **kwargs) + all_args = list(self.args) + list(args) + all_kwargs = {**self.kwargs, **kwargs} + + return self.func(*all_args, **all_kwargs) def __repr__(self): return f"function_{self.name}" +def test_apply_ufunc_dataarray(dtype): + func = function( + xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} + ) + + array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.m + x = np.arange(20) * unit_registry.s + data_array = xr.DataArray(data=array, dims="x", coords={"x": x}) + + expected = attach_units(func(strip_units(data_array)), extract_units(data_array)) + result = func(data_array) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail( + reason="pint does not implement `np.result_type` and align strips units" +) +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan))) +def test_align_dataarray(fill_value, variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit + array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit + x = np.arange(2) * original_unit + x_a1 = np.array([10, 5]) * original_unit + x_a2 = np.array([10, 5]) * coord_unit + + y1 = np.arange(5) * original_unit + y2 = np.arange(2, 7) * dim_unit + + data_array1 = xr.DataArray( + data=array1, coords={"x": x, "x_a": ("x", x_a1), "y": y1}, dims=("x", "y") + ) + data_array2 = xr.DataArray( + data=array2, coords={"x": x, "x_a": ("x", x_a2), "y": y2}, dims=("x", "y") + ) + + fill_value = fill_value * data_unit + func = function(xr.align, join="outer", fill_value=fill_value) + if error is not None: + with pytest.raises(error): + func(data_array1, data_array2) + + return + + stripped_kwargs = { + key: strip_units( + convert_units(value, {None: original_unit}) + if isinstance(value, unit_registry.Quantity) + else value + ) + for key, value in func.kwargs.items() + } + units = extract_units(data_array1) + # FIXME: should the expected_b have the same units as data_array1 + # or data_array2? + expected_a, expected_b = tuple( + attach_units(elem, units) + for elem in func( + strip_units(data_array1), + strip_units(convert_units(data_array2, units)), + **stripped_kwargs, + ) + ) + result_a, result_b = func(data_array1, data_array2) + + assert_equal_with_units(expected_a, result_a) + assert_equal_with_units(expected_b, result_b) + + +@pytest.mark.xfail( + reason="pint does not implement `np.result_type` and align strips units" +) +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan))) +def test_align_dataset(fill_value, unit, variant, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit + array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit + + x = np.arange(2) * original_unit + x_a1 = np.array([10, 5]) * original_unit + x_a2 = np.array([10, 5]) * coord_unit + + y1 = np.arange(5) * original_unit + y2 = np.arange(2, 7) * dim_unit + + ds1 = xr.Dataset( + data_vars={"a": (("x", "y"), array1)}, + coords={"x": x, "x_a": ("x", x_a1), "y": y1}, + ) + ds2 = xr.Dataset( + data_vars={"a": (("x", "y"), array2)}, + coords={"x": x, "x_a": ("x", x_a2), "y": y2}, + ) + + fill_value = fill_value * data_unit + func = function(xr.align, join="outer", fill_value=fill_value) + if error is not None: + with pytest.raises(error): + func(ds1, ds2) + + return + + stripped_kwargs = { + key: strip_units( + convert_units(value, {None: original_unit}) + if isinstance(value, unit_registry.Quantity) + else value + ) + for key, value in func.kwargs.items() + } + units = extract_units(ds1) + # FIXME: should the expected_b have the same units as ds1 or ds2? + expected_a, expected_b = tuple( + attach_units(elem, units) + for elem in func( + strip_units(ds1), strip_units(convert_units(ds2, units)), **stripped_kwargs + ) + ) + result_a, result_b = func(ds1, ds2) + + assert_equal_with_units(expected_a, result_a) + assert_equal_with_units(expected_b, result_b) + + +def test_broadcast_dataarray(dtype): + array1 = np.linspace(0, 10, 2) * unit_registry.Pa + array2 = np.linspace(0, 10, 3) * unit_registry.Pa + + a = xr.DataArray(data=array1, dims="x") + b = xr.DataArray(data=array2, dims="y") + + expected_a, expected_b = tuple( + attach_units(elem, extract_units(a)) + for elem in xr.broadcast(strip_units(a), strip_units(b)) + ) + result_a, result_b = xr.broadcast(a, b) + + assert_equal_with_units(expected_a, result_a) + assert_equal_with_units(expected_b, result_b) + + +def test_broadcast_dataset(dtype): + array1 = np.linspace(0, 10, 2) * unit_registry.Pa + array2 = np.linspace(0, 10, 3) * unit_registry.Pa + + ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("y", array2)}) + + (expected,) = tuple( + attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds)) + ) + (result,) = xr.broadcast(ds) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`combine_by_coords` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +def test_combine_by_coords(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + x = np.arange(1, 4) * 10 * original_unit + y = np.arange(2) * original_unit + z = np.arange(3) * original_unit + + other_array1 = np.ones_like(array1) * data_unit + other_array2 = np.ones_like(array2) * data_unit + other_x = np.arange(1, 4) * 10 * dim_unit + other_y = np.arange(2, 4) * dim_unit + other_z = np.arange(3, 6) * coord_unit + + ds = xr.Dataset( + data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, + coords={"x": x, "y": y, "z": ("x", z)}, + ) + other = xr.Dataset( + data_vars={"a": (("y", "x"), other_array1), "b": (("y", "x"), other_array2)}, + coords={"x": other_x, "y": other_y, "z": ("x", other_z)}, + ) + + if error is not None: + with pytest.raises(error): + xr.combine_by_coords([ds, other]) + + return + + units = extract_units(ds) + expected = attach_units( + xr.combine_by_coords( + [strip_units(ds), strip_units(convert_units(other, units))] + ), + units, + ) + result = xr.combine_by_coords([ds, other]) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="blocked by `where`") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +def test_combine_nested(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + + x = np.arange(1, 4) * 10 * original_unit + y = np.arange(2) * original_unit + z = np.arange(3) * original_unit + + ds1 = xr.Dataset( + data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, + coords={"x": x, "y": y, "z": ("x", z)}, + ) + ds2 = xr.Dataset( + data_vars={ + "a": (("y", "x"), np.ones_like(array1) * data_unit), + "b": (("y", "x"), np.ones_like(array2) * data_unit), + }, + coords={ + "x": np.arange(3) * dim_unit, + "y": np.arange(2, 4) * dim_unit, + "z": ("x", np.arange(-3, 0) * coord_unit), + }, + ) + ds3 = xr.Dataset( + data_vars={ + "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit), + "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit), + }, + coords={ + "x": np.arange(3, 6) * dim_unit, + "y": np.arange(4, 6) * dim_unit, + "z": ("x", np.arange(3, 6) * coord_unit), + }, + ) + ds4 = xr.Dataset( + data_vars={ + "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit), + "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit), + }, + coords={ + "x": np.arange(6, 9) * dim_unit, + "y": np.arange(6, 8) * dim_unit, + "z": ("x", np.arange(6, 9) * coord_unit), + }, + ) + + func = function(xr.combine_nested, concat_dim=["x", "y"]) + if error is not None: + with pytest.raises(error): + func([[ds1, ds2], [ds3, ds4]]) + + return + + units = extract_units(ds1) + convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) + expected = attach_units( + func( + [ + [strip_units(ds1), convert_and_strip(ds2)], + [convert_and_strip(ds3), convert_and_strip(ds4)], + ] + ), + units, + ) + result = func([[ds1, ds2], [ds3, ds4]]) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`concat` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + ), +) +def test_concat_dataarray(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = {"data": (unit, original_unit), "dims": (original_unit, unit)} + data_unit, dims_unit = variants.get(variant) + + array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m + array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit + x1 = np.arange(5, 15) * original_unit + x2 = np.arange(5) * dims_unit + + arr1 = xr.DataArray(data=array1, coords={"x": x1}, dims="x") + arr2 = xr.DataArray(data=array2, coords={"x": x2}, dims="x") + + if error is not None: + with pytest.raises(error): + xr.concat([arr1, arr2], dim="x") + + return + + expected = attach_units( + xr.concat([strip_units(arr1), strip_units(arr2)], dim="x"), extract_units(arr1) + ) + result = xr.concat([arr1, arr2], dim="x") + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`concat` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + ), +) +def test_concat_dataset(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = {"data": (unit, original_unit), "dims": (original_unit, unit)} + data_unit, dims_unit = variants.get(variant) + + array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m + array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit + x1 = np.arange(5, 15) * original_unit + x2 = np.arange(5) * dims_unit + + ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1}) + ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2}) + + if error is not None: + with pytest.raises(error): + xr.concat([ds1, ds2], dim="x") + + return + + expected = attach_units( + xr.concat([strip_units(ds1), strip_units(ds2)], dim="x"), extract_units(ds1) + ) + result = xr.concat([ds1, ds2], dim="x") + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="blocked by `where`") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +def test_merge_dataarray(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit + array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit + array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit + + x = np.arange(2) * original_unit + y = np.arange(3) * original_unit + z = np.arange(4) * original_unit + u = np.linspace(10, 20, 2) * original_unit + v = np.linspace(10, 20, 3) * original_unit + w = np.linspace(10, 20, 4) * original_unit + + arr1 = xr.DataArray( + name="a", + data=array1, + coords={"x": x, "y": y, "u": ("x", u), "v": ("y", v)}, + dims=("x", "y"), + ) + arr2 = xr.DataArray( + name="b", + data=array2, + coords={ + "x": np.arange(2, 4) * dim_unit, + "z": z, + "u": ("x", np.linspace(20, 30, 2) * coord_unit), + "w": ("z", w), + }, + dims=("x", "z"), + ) + arr3 = xr.DataArray( + name="c", + data=array3, + coords={ + "y": np.arange(3, 6) * dim_unit, + "z": np.arange(4, 8) * dim_unit, + "v": ("y", np.linspace(10, 20, 3) * coord_unit), + "w": ("z", np.linspace(10, 20, 4) * coord_unit), + }, + dims=("y", "z"), + ) + + func = function(xr.merge) + if error is not None: + with pytest.raises(error): + func([arr1, arr2, arr3]) + + return + + units = {name: original_unit for name in list("abcuvwxyz")} + convert_and_strip = lambda arr: strip_units(convert_units(arr, units)) + expected = attach_units( + func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]), + units, + ) + result = func([arr1, arr2, arr3]) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="blocked by `where`") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +def test_merge_dataset(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + + x = np.arange(11, 14) * original_unit + y = np.arange(2) * original_unit + z = np.arange(3) * original_unit + + ds1 = xr.Dataset( + data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, + coords={"x": x, "y": y, "z": ("x", z)}, + ) + ds2 = xr.Dataset( + data_vars={ + "a": (("y", "x"), np.ones_like(array1) * data_unit), + "b": (("y", "x"), np.ones_like(array2) * data_unit), + }, + coords={ + "x": np.arange(3) * dim_unit, + "y": np.arange(2, 4) * dim_unit, + "z": ("x", np.arange(-3, 0) * coord_unit), + }, + ) + ds3 = xr.Dataset( + data_vars={ + "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit), + "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit), + }, + coords={ + "x": np.arange(3, 6) * dim_unit, + "y": np.arange(4, 6) * dim_unit, + "z": ("x", np.arange(3, 6) * coord_unit), + }, + ) + + func = function(xr.merge) + if error is not None: + with pytest.raises(error): + func([ds1, ds2, ds3]) + + return + + units = extract_units(ds1) + convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) + expected = attach_units( + func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units + ) + result = func([ds1, ds2, ds3]) + + assert_equal_with_units(expected, result) + + @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) -def test_replication(func, dtype): +def test_replication_dataarray(func, dtype): array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s data_array = xr.DataArray(data=array, dims="x") @@ -330,8 +1002,33 @@ def test_replication(func, dtype): assert_equal_with_units(expected, result) +@pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) +def test_replication_dataset(func, dtype): + array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s + array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa + x = np.arange(20).astype(dtype) * unit_registry.m + y = np.arange(10).astype(dtype) * unit_registry.m + z = y.to(unit_registry.mm) + + ds = xr.Dataset( + data_vars={"a": ("x", array1), "b": ("y", array2)}, + coords={"x": x, "y": y, "z": ("y", z)}, + ) + + numpy_func = getattr(np, func.__name__) + expected = ds.copy( + data={name: numpy_func(array.data) for name, array in ds.data_vars.items()} + ) + result = func(ds) + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail( - reason="np.full_like on Variable strips the unit and pint does not allow mixed args" + reason=( + "pint is undecided on how `full_like` should work, so incorrect errors " + "may be expected: hgrecco/pint#882" + ) ) @pytest.mark.parametrize( "unit,error", @@ -344,8 +1041,9 @@ def test_replication(func, dtype): pytest.param(unit_registry.ms, None, id="compatible_unit"), pytest.param(unit_registry.s, None, id="identical_unit"), ), + ids=repr, ) -def test_replication_full_like(unit, error, dtype): +def test_replication_full_like_dataarray(unit, error, dtype): array = np.linspace(0, 5, 10) * unit_registry.s data_array = xr.DataArray(data=array, dims="x") @@ -360,6 +1058,163 @@ def test_replication_full_like(unit, error, dtype): assert_equal_with_units(expected, result) +@pytest.mark.xfail( + reason=( + "pint is undecided on how `full_like` should work, so incorrect errors " + "may be expected: hgrecco/pint#882" + ) +) +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.ms, None, id="compatible_unit"), + pytest.param(unit_registry.s, None, id="identical_unit"), + ), + ids=repr, +) +def test_replication_full_like_dataset(unit, error, dtype): + array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s + array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa + x = np.arange(20).astype(dtype) * unit_registry.m + y = np.arange(10).astype(dtype) * unit_registry.m + z = y.to(unit_registry.mm) + + ds = xr.Dataset( + data_vars={"a": ("x", array1), "b": ("y", array2)}, + coords={"x": x, "y": y, "z": ("y", z)}, + ) + + fill_value = -1 * unit + if error is not None: + with pytest.raises(error): + xr.full_like(ds, fill_value=fill_value) + + return + + expected = ds.copy( + data={ + name: np.full_like(array, fill_value=fill_value) + for name, array in ds.data_vars.items() + } + ) + result = xr.full_like(ds, fill_value=fill_value) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`where` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize("fill_value", (np.nan, 10.2)) +def test_where_dataarray(fill_value, unit, error, dtype): + array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m + + x = xr.DataArray(data=array, dims="x") + cond = x < 5 * unit_registry.m + # FIXME: this should work without wrapping in array() + fill_value = np.array(fill_value) * unit + + if error is not None: + with pytest.raises(error): + xr.where(cond, x, fill_value) + + return + + fill_value_ = ( + fill_value.to(unit_registry.m) + if isinstance(fill_value, unit_registry.Quantity) + and fill_value.check(unit_registry.m) + else fill_value + ) + expected = attach_units( + xr.where(cond, strip_units(x), strip_units(fill_value_)), extract_units(x) + ) + result = xr.where(cond, x, fill_value) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`where` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize("fill_value", (np.nan, 10.2)) +def test_where_dataset(fill_value, unit, error, dtype): + array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m + array2 = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.m + x = np.arange(10) * unit_registry.s + + ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) + cond = ds.x < 5 * unit_registry.s + # FIXME: this should work without wrapping in array() + fill_value = np.array(fill_value) * unit + + if error is not None: + with pytest.raises(error): + xr.where(cond, ds, fill_value) + + return + + fill_value_ = ( + fill_value.to(unit_registry.m) + if isinstance(fill_value, unit_registry.Quantity) + and fill_value.check(unit_registry.m) + else fill_value + ) + expected = attach_units( + xr.where(cond, strip_units(ds), strip_units(fill_value_)), extract_units(ds) + ) + result = xr.where(cond, ds, fill_value) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="pint does not implement `np.einsum`") +def test_dot_dataarray(dtype): + array1 = ( + np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) + * unit_registry.m + / unit_registry.s + ) + array2 = ( + np.linspace(10, 20, 10 * 20).reshape(10, 20).astype(dtype) * unit_registry.s + ) + + arr1 = xr.DataArray(data=array1, dims=("x", "y")) + arr2 = xr.DataArray(data=array2, dims=("y", "z")) + + expected = array1.dot(array2) + result = xr.dot(arr1, arr2) + + assert_equal_with_units(expected, result) + + class TestDataArray: @pytest.mark.filterwarnings("error:::pint[.*]") @pytest.mark.parametrize( @@ -1114,7 +1969,7 @@ def test_broadcast_equals(self, unit, dtype): dim={"z": np.linspace(10, 20, 12) * unit_registry.s}, axis=1, ), - method("drop", labels="x"), + method("drop_sel", labels="x"), method("reset_coords", names="x2"), method("copy"), pytest.param( @@ -3190,7 +4045,7 @@ def test_reindex_like(self, unit, error, dtype): marks=pytest.mark.xfail(reason="strips units"), ), pytest.param( - method("apply", np.fabs), + method("map", np.fabs), marks=pytest.mark.xfail(reason="fabs strips units"), ), ), @@ -3365,7 +4220,7 @@ def test_grouped_operations(self, func, dtype): method("rename_dims", x="offset_x"), method("swap_dims", {"x": "x2"}), method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1), - method("drop", labels="x"), + method("drop_sel", labels="x"), method("drop_dims", "z"), method("set_coords", names="c"), method("reset_coords", names="x2"), diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 528027ed149..d92a68729b5 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -542,6 +542,15 @@ def test_copy_index_with_data_errors(self): with raises_regex(ValueError, "must match shape of object"): orig.copy(data=new_data) + def test_replace(self): + var = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"}) + result = var._replace() + assert_identical(result, var) + + new_data = np.arange(4).reshape(2, 2) + result = var._replace(data=new_data) + assert_array_equal(result.data, new_data) + def test_real_and_imag(self): v = self.cls("x", np.arange(3) - 1j * np.arange(3), {"foo": "bar"}) expected_re = self.cls("x", np.arange(3), {"foo": "bar"}) @@ -1477,6 +1486,10 @@ def test_reduce(self): with raises_regex(ValueError, "cannot supply both"): v.mean(dim="x", axis=0) + with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"): + v.mean(dim="x", allow_lazy=True) + with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"): + v.mean(dim="x", allow_lazy=False) def test_quantile(self): v = Variable(["x", "y"], self.d)