From 5533a6754226d367f73e84e4cd94d50265173db7 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 19 May 2021 14:15:13 -0700 Subject: [PATCH 01/12] Revert "Use _unstack_once for valid dask and sparse versions (#5315)" This reverts commit 9165c266f52830384c399f0607a4123e65bb7803. --- xarray/core/dataset.py | 46 ++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a42d045705c..f59b9b6bea5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -78,7 +78,7 @@ ) from .missing import get_clean_interp_index from .options import OPTIONS, _get_keep_attrs -from .pycompat import dask_version, is_duck_dask_array +from .pycompat import is_duck_dask_array, sparse_array_type from .utils import ( Default, Frozen, @@ -4028,24 +4028,36 @@ def unstack( result = self.copy(deep=False) for dim in dims: - if not sparse and all( - # Dask arrays recently supports assignment by index, - # https://github.com/dask/dask/pull/7393 - dask_version >= "2021.04.0" and is_duck_dask_array(v.data) - # Numpy arrays handles the fast path: - or isinstance(v.data, np.ndarray) - for v in self.variables.values() + + if ( + # Dask arrays don't support assignment by index, which the fast unstack + # function requires. + # https://github.com/pydata/xarray/pull/4746#issuecomment-753282125 + any(is_duck_dask_array(v.data) for v in self.variables.values()) + # Sparse doesn't currently support (though we could special-case + # it) + # https://github.com/pydata/sparse/issues/422 + or any( + isinstance(v.data, sparse_array_type) + for v in self.variables.values() + ) + or sparse + # Until https://github.com/pydata/xarray/pull/4751 is resolved, + # we check explicitly whether it's a numpy array. Once that is + # resolved, explicitly exclude pint arrays. + # # pint doesn't implement `np.full_like` in a way that's + # # currently compatible. + # # https://github.com/pydata/xarray/pull/4746#issuecomment-753425173 + # # or any( + # # isinstance(v.data, pint_array_type) for v in self.variables.values() + # # ) + or any( + not isinstance(v.data, np.ndarray) for v in self.variables.values() + ) ): - # Fast unstacking path: - result = result._unstack_once(dim, fill_value) - else: - # Slower unstacking path, examples of array types that - # currently has to use this path: - # * sparse doesn't support item assigment, - # https://github.com/pydata/sparse/issues/114 - # * pint has some circular import issues, - # https://github.com/pydata/xarray/pull/4751 result = result._unstack_full_reindex(dim, fill_value, sparse) + else: + result = result._unstack_once(dim, fill_value) return result def update(self, other: "CoercibleMapping") -> "Dataset": From 1b4412eeb7011f53932779e1d7c3534163aedd63 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 19 May 2021 14:18:49 -0700 Subject: [PATCH 02/12] 0.18.2 release notes --- doc/whats-new.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5c0a4d11f53..4b60efe8bb8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,13 @@ What's New np.random.seed(123456) +.. _whats-new.0.18.2: + +v0.18.2 (19 May 2021) +--------------------- + +This release reverts a regression in xarray's unstacking of dask-backed arrays. + .. _whats-new.0.18.1: v0.18.1 (18 May 2021) From 0e52f9f3082fe9fc32e6eb08ef35f4e3583a1ad0 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 23 Jun 2021 19:14:12 +0200 Subject: [PATCH 03/12] fix RTD [skip-ci] (#5518) --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4b60efe8bb8..d28b821d958 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -643,7 +643,7 @@ Documentation By `Pieter Gijsbers `_. - Fix grammar and typos in the :doc:`contributing` guide (:pull:`4545`). By `Sahid Velji `_. -- Fix grammar and typos in the :doc:`io` guide (:pull:`4553`). +- Fix grammar and typos in the :doc:`user-guide/io` guide (:pull:`4553`). By `Sahid Velji `_. - Update link to NumPy docstring standard in the :doc:`contributing` guide (:pull:`4558`). By `Sahid Velji `_. @@ -2952,7 +2952,7 @@ Documentation - Added apply_ufunc example to :ref:`/examples/weather-data.ipynb#Toy-weather-data` (:issue:`1844`). By `Liam Brannigan `_. - New entry `Why don’t aggregations return Python scalars?` in the - :doc:`faq` (:issue:`1726`). + :doc:`getting-started-guide/faq` (:issue:`1726`). By `0x0L `_. Enhancements From c5ee050f5faf09bb047528cca07ab57051282894 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Wed, 21 Jul 2021 17:42:47 -0400 Subject: [PATCH 04/12] Add to_numpy() and as_numpy() methods (#5568) * added to_numpy() and as_numpy() methods * remove special-casing of cupy arrays in .values in favour of using .to_numpy() * lint * Fix mypy (I think?) * added Dataset.as_numpy() * improved docstrings * add what's new * add to API docs * linting * fix failures by only importing pint when needed * refactor pycompat into class * compute instead of load * added tests * fixed sparse test * tests and fixes for ds.as_numpy() * fix sparse tests * fix linting * tests for Variable * test IndexVariable too * use numpy.asarray to avoid a copy * also convert coords * Force tests again after #5600 * Apply suggestions from code review * Update xarray/core/variable.py * fix import * formatting * remove type check Co-authored-by: Stephan Hoyer * remove attempt to call to_numpy Co-authored-by: Maximilian Roos Co-authored-by: Deepak Cherian Co-authored-by: Stephan Hoyer --- doc/api.rst | 3 ++ doc/whats-new.rst | 2 + xarray/core/dataarray.py | 52 ++++++++++++++++++-- xarray/core/dataset.py | 12 +++++ xarray/core/pycompat.py | 76 ++++++++++++++++++------------ xarray/core/variable.py | 27 ++++++++++- xarray/tests/__init__.py | 1 + xarray/tests/test_dataarray.py | 86 ++++++++++++++++++++++++++++++++++ xarray/tests/test_dataset.py | 73 +++++++++++++++++++++++++++++ xarray/tests/test_variable.py | 67 ++++++++++++++++++++++++++ 10 files changed, 363 insertions(+), 36 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index bb3a99bfbb0..0e34b13f9a5 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -686,6 +686,7 @@ Dataset methods open_zarr Dataset.to_netcdf Dataset.to_pandas + Dataset.as_numpy Dataset.to_zarr save_mfdataset Dataset.to_array @@ -716,6 +717,8 @@ DataArray methods DataArray.to_pandas DataArray.to_series DataArray.to_dataframe + DataArray.to_numpy + DataArray.as_numpy DataArray.to_index DataArray.to_masked_array DataArray.to_cdms2 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8045e5d486f..38f152faf5c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -56,6 +56,8 @@ New Features - Allow removal of the coordinate attribute ``coordinates`` on variables by setting ``.attrs['coordinates']= None`` (:issue:`5510`). By `Elle Smith `_. +- Added :py:meth:`DataArray.to_numpy`, :py:meth:`DataArray.as_numpy`, and :py:meth:`Dataset.as_numpy`. (:pull:`5568`). + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b4d553c235a..d567da629ca 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -426,12 +426,12 @@ def __init__( self._close = None def _replace( - self, + self: T_DataArray, variable: Variable = None, coords=None, name: Union[Hashable, None, Default] = _default, indexes=None, - ) -> "DataArray": + ) -> T_DataArray: if variable is None: variable = self.variable if coords is None: @@ -623,7 +623,16 @@ def __len__(self) -> int: @property def data(self) -> Any: - """The array's data as a dask or numpy array""" + """ + The DataArray's data as an array. The underlying array type + (e.g. dask, sparse, pint) is preserved. + + See Also + -------- + DataArray.to_numpy + DataArray.as_numpy + DataArray.values + """ return self.variable.data @data.setter @@ -632,13 +641,46 @@ def data(self, value: Any) -> None: @property def values(self) -> np.ndarray: - """The array's data as a numpy.ndarray""" + """ + The array's data as a numpy.ndarray. + + If the array's data is not a numpy.ndarray this will attempt to convert + it naively using np.array(), which will raise an error if the array + type does not support coercion like this (e.g. cupy). + """ return self.variable.values @values.setter def values(self, value: Any) -> None: self.variable.values = value + def to_numpy(self) -> np.ndarray: + """ + Coerces wrapped data to numpy and returns a numpy.ndarray. + + See also + -------- + DataArray.as_numpy : Same but returns the surrounding DataArray instead. + Dataset.as_numpy + DataArray.values + DataArray.data + """ + return self.variable.to_numpy() + + def as_numpy(self: T_DataArray) -> T_DataArray: + """ + Coerces wrapped data and coordinates into numpy arrays, returning a DataArray. + + See also + -------- + DataArray.to_numpy : Same but returns only the data as a numpy.ndarray object. + Dataset.as_numpy : Converts all variables in a Dataset. + DataArray.values + DataArray.data + """ + coords = {k: v.as_numpy() for k, v in self._coords.items()} + return self._replace(self.variable.as_numpy(), coords, indexes=self._indexes) + @property def _in_memory(self) -> bool: return self.variable._in_memory @@ -931,7 +973,7 @@ def persist(self, **kwargs) -> "DataArray": ds = self._to_temp_dataset().persist(**kwargs) return self._from_temp_dataset(ds) - def copy(self, deep: bool = True, data: Any = None) -> "DataArray": + def copy(self: T_DataArray, deep: bool = True, data: Any = None) -> T_DataArray: """Returns a copy of this array. If `deep=True`, a deep copy is made of the data array. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f3378ad9eda..650b3c80adf 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1323,6 +1323,18 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": return self._replace(variables, attrs=attrs) + def as_numpy(self: "Dataset") -> "Dataset": + """ + Coerces wrapped data and coordinates into numpy arrays, returning a Dataset. + + See also + -------- + DataArray.as_numpy + DataArray.to_numpy : Returns only the data as a numpy.ndarray object. + """ + numpy_variables = {k: v.as_numpy() for k, v in self.variables.items()} + return self._replace(variables=numpy_variables) + @property def _level_coords(self) -> Dict[str, Hashable]: """Return a mapping of all MultiIndex levels and their corresponding diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 9f47da6c8cc..d1649235006 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -1,4 +1,5 @@ from distutils.version import LooseVersion +from importlib import import_module import numpy as np @@ -6,42 +7,57 @@ integer_types = (int, np.integer) -try: - import dask - import dask.array - from dask.base import is_dask_collection - dask_version = LooseVersion(dask.__version__) +class DuckArrayModule: + """ + Solely for internal isinstance and version checks. - # solely for isinstance checks - dask_array_type = (dask.array.Array,) + Motivated by having to only import pint when required (as pint currently imports xarray) + https://github.com/pydata/xarray/pull/5561#discussion_r664815718 + """ - def is_duck_dask_array(x): - return is_duck_array(x) and is_dask_collection(x) + def __init__(self, mod): + try: + duck_array_module = import_module(mod) + duck_array_version = LooseVersion(duck_array_module.__version__) + + if mod == "dask": + duck_array_type = (import_module("dask.array").Array,) + elif mod == "pint": + duck_array_type = (duck_array_module.Quantity,) + elif mod == "cupy": + duck_array_type = (duck_array_module.ndarray,) + elif mod == "sparse": + duck_array_type = (duck_array_module.SparseArray,) + else: + raise NotImplementedError + + except ImportError: # pragma: no cover + duck_array_module = None + duck_array_version = LooseVersion("0.0.0") + duck_array_type = () + self.module = duck_array_module + self.version = duck_array_version + self.type = duck_array_type + self.available = duck_array_module is not None -except ImportError: # pragma: no cover - dask_version = LooseVersion("0.0.0") - dask_array_type = () - is_duck_dask_array = lambda _: False - is_dask_collection = lambda _: False -try: - # solely for isinstance checks - import sparse +def is_duck_dask_array(x): + if DuckArrayModule("dask").available: + from dask.base import is_dask_collection + + return is_duck_array(x) and is_dask_collection(x) + else: + return False + - sparse_version = LooseVersion(sparse.__version__) - sparse_array_type = (sparse.SparseArray,) -except ImportError: # pragma: no cover - sparse_version = LooseVersion("0.0.0") - sparse_array_type = () +dsk = DuckArrayModule("dask") +dask_version = dsk.version +dask_array_type = dsk.type -try: - # solely for isinstance checks - import cupy +sp = DuckArrayModule("sparse") +sparse_array_type = sp.type +sparse_version = sp.version - cupy_version = LooseVersion(cupy.__version__) - cupy_array_type = (cupy.ndarray,) -except ImportError: # pragma: no cover - cupy_version = LooseVersion("0.0.0") - cupy_array_type = () +cupy_array_type = DuckArrayModule("cupy").type diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f57c4495a8a..3d9b5079da2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -29,10 +29,12 @@ from .indexing import BasicIndexer, OuterIndexer, VectorizedIndexer, as_indexable from .options import _get_keep_attrs from .pycompat import ( + DuckArrayModule, cupy_array_type, dask_array_type, integer_types, is_duck_dask_array, + sparse_array_type, ) from .utils import ( NdimSizeLenMixin, @@ -259,7 +261,7 @@ def _as_array_or_item(data): TODO: remove this (replace with np.asarray) once these issues are fixed """ - data = data.get() if isinstance(data, cupy_array_type) else np.asarray(data) + data = np.asarray(data) if data.ndim == 0: if data.dtype.kind == "M": data = np.datetime64(data, "ns") @@ -1069,6 +1071,29 @@ def chunk(self, chunks={}, name=None, lock=False): return self._replace(data=data) + def to_numpy(self) -> np.ndarray: + """Coerces wrapped data to numpy and returns a numpy.ndarray""" + # TODO an entrypoint so array libraries can choose coercion method? + data = self.data + # TODO first attempt to call .to_numpy() once some libraries implement it + if isinstance(data, dask_array_type): + data = data.compute() + if isinstance(data, cupy_array_type): + data = data.get() + # pint has to be imported dynamically as pint imports xarray + pint_array_type = DuckArrayModule("pint").type + if isinstance(data, pint_array_type): + data = data.magnitude + if isinstance(data, sparse_array_type): + data = data.todense() + data = np.asarray(data) + + return data + + def as_numpy(self: VariableType) -> VariableType: + """Coerces wrapped data into a numpy array, returning a Variable.""" + return self._replace(data=self.to_numpy()) + def _as_sparse(self, sparse_format=_default, fill_value=dtypes.NA): """ use sparse-array as backend. diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 9029dc1c621..d757fb451cc 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -83,6 +83,7 @@ def LooseVersion(vstring): has_numbagg, requires_numbagg = _importorskip("numbagg") has_seaborn, requires_seaborn = _importorskip("seaborn") has_sparse, requires_sparse = _importorskip("sparse") +has_cupy, requires_cupy = _importorskip("cupy") has_cartopy, requires_cartopy = _importorskip("cartopy") # Need Pint 0.15 for __dask_tokenize__ tests for Quantity wrapped Dask Arrays has_pint_0_15, requires_pint_0_15 = _importorskip("pint", minversion="0.15") diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b9f04085935..79370adb016 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -36,10 +36,12 @@ has_dask, raise_if_dask_computes, requires_bottleneck, + requires_cupy, requires_dask, requires_iris, requires_numbagg, requires_numexpr, + requires_pint_0_15, requires_scipy, requires_sparse, source_ndarray, @@ -7375,3 +7377,87 @@ def test_drop_duplicates(keep): expected = xr.DataArray(data, dims="time", coords={"time": time}, name="test") result = ds.drop_duplicates("time", keep=keep) assert_equal(expected, result) + + +class TestNumpyCoercion: + # TODO once flexible indexes refactor complete also test coercion of dimension coords + def test_from_numpy(self): + da = xr.DataArray([1, 2, 3], dims="x", coords={"lat": ("x", [4, 5, 6])}) + + assert_identical(da.as_numpy(), da) + np.testing.assert_equal(da.to_numpy(), np.array([1, 2, 3])) + np.testing.assert_equal(da["lat"].to_numpy(), np.array([4, 5, 6])) + + @requires_dask + def test_from_dask(self): + da = xr.DataArray([1, 2, 3], dims="x", coords={"lat": ("x", [4, 5, 6])}) + da_chunked = da.chunk(1) + + assert_identical(da_chunked.as_numpy(), da.compute()) + np.testing.assert_equal(da.to_numpy(), np.array([1, 2, 3])) + np.testing.assert_equal(da["lat"].to_numpy(), np.array([4, 5, 6])) + + @requires_pint_0_15 + def test_from_pint(self): + from pint import Quantity + + arr = np.array([1, 2, 3]) + da = xr.DataArray( + Quantity(arr, units="Pa"), + dims="x", + coords={"lat": ("x", Quantity(arr + 3, units="m"))}, + ) + + expected = xr.DataArray(arr, dims="x", coords={"lat": ("x", arr + 3)}) + assert_identical(da.as_numpy(), expected) + np.testing.assert_equal(da.to_numpy(), arr) + np.testing.assert_equal(da["lat"].to_numpy(), arr + 3) + + @requires_sparse + def test_from_sparse(self): + import sparse + + arr = np.diagflat([1, 2, 3]) + sparr = sparse.COO.from_numpy(arr) + da = xr.DataArray( + sparr, dims=["x", "y"], coords={"elev": (("x", "y"), sparr + 3)} + ) + + expected = xr.DataArray( + arr, dims=["x", "y"], coords={"elev": (("x", "y"), arr + 3)} + ) + assert_identical(da.as_numpy(), expected) + np.testing.assert_equal(da.to_numpy(), arr) + + @requires_cupy + def test_from_cupy(self): + import cupy as cp + + arr = np.array([1, 2, 3]) + da = xr.DataArray( + cp.array(arr), dims="x", coords={"lat": ("x", cp.array(arr + 3))} + ) + + expected = xr.DataArray(arr, dims="x", coords={"lat": ("x", arr + 3)}) + assert_identical(da.as_numpy(), expected) + np.testing.assert_equal(da.to_numpy(), arr) + + @requires_dask + @requires_pint_0_15 + def test_from_pint_wrapping_dask(self): + import dask + from pint import Quantity + + arr = np.array([1, 2, 3]) + d = dask.array.from_array(arr) + da = xr.DataArray( + Quantity(d, units="Pa"), + dims="x", + coords={"lat": ("x", Quantity(d, units="m") * 2)}, + ) + + result = da.as_numpy() + result.name = None # remove dask-assigned name + expected = xr.DataArray(arr, dims="x", coords={"lat": ("x", arr * 2)}) + assert_identical(result, expected) + np.testing.assert_equal(da.to_numpy(), arr) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 317602c889f..f6fb4aa0cde 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -44,9 +44,11 @@ has_dask, requires_bottleneck, requires_cftime, + requires_cupy, requires_dask, requires_numbagg, requires_numexpr, + requires_pint_0_15, requires_scipy, requires_sparse, source_ndarray, @@ -6760,3 +6762,74 @@ def test_clip(ds): result = ds.clip(min=ds.mean("y"), max=ds.mean("y")) assert result.dims == ds.dims + + +class TestNumpyCoercion: + def test_from_numpy(self): + ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])}) + + assert_identical(ds.as_numpy(), ds) + + @requires_dask + def test_from_dask(self): + ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])}) + ds_chunked = ds.chunk(1) + + assert_identical(ds_chunked.as_numpy(), ds.compute()) + + @requires_pint_0_15 + def test_from_pint(self): + from pint import Quantity + + arr = np.array([1, 2, 3]) + ds = xr.Dataset( + {"a": ("x", Quantity(arr, units="Pa"))}, + coords={"lat": ("x", Quantity(arr + 3, units="m"))}, + ) + + expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)}) + assert_identical(ds.as_numpy(), expected) + + @requires_sparse + def test_from_sparse(self): + import sparse + + arr = np.diagflat([1, 2, 3]) + sparr = sparse.COO.from_numpy(arr) + ds = xr.Dataset( + {"a": (["x", "y"], sparr)}, coords={"elev": (("x", "y"), sparr + 3)} + ) + + expected = xr.Dataset( + {"a": (["x", "y"], arr)}, coords={"elev": (("x", "y"), arr + 3)} + ) + assert_identical(ds.as_numpy(), expected) + + @requires_cupy + def test_from_cupy(self): + import cupy as cp + + arr = np.array([1, 2, 3]) + ds = xr.Dataset( + {"a": ("x", cp.array(arr))}, coords={"lat": ("x", cp.array(arr + 3))} + ) + + expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)}) + assert_identical(ds.as_numpy(), expected) + + @requires_dask + @requires_pint_0_15 + def test_from_pint_wrapping_dask(self): + import dask + from pint import Quantity + + arr = np.array([1, 2, 3]) + d = dask.array.from_array(arr) + ds = xr.Dataset( + {"a": ("x", Quantity(d, units="Pa"))}, + coords={"lat": ("x", Quantity(d, units="m") * 2)}, + ) + + result = ds.as_numpy() + expected = xr.Dataset({"a": ("x", arr)}, coords={"lat": ("x", arr * 2)}) + assert_identical(result, expected) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 0247892931a..d999bc1eb8b 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -33,7 +33,9 @@ assert_equal, assert_identical, raise_if_dask_computes, + requires_cupy, requires_dask, + requires_pint_0_15, requires_sparse, source_ndarray, ) @@ -2554,3 +2556,68 @@ def test_clip(var): var.mean("z").data[:, :, np.newaxis], ), ) + + +@pytest.mark.parametrize("Var", [Variable, IndexVariable]) +class TestNumpyCoercion: + def test_from_numpy(self, Var): + v = Var("x", [1, 2, 3]) + + assert_identical(v.as_numpy(), v) + np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3])) + + @requires_dask + def test_from_dask(self, Var): + v = Var("x", [1, 2, 3]) + v_chunked = v.chunk(1) + + assert_identical(v_chunked.as_numpy(), v.compute()) + np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3])) + + @requires_pint_0_15 + def test_from_pint(self, Var): + from pint import Quantity + + arr = np.array([1, 2, 3]) + v = Var("x", Quantity(arr, units="m")) + + assert_identical(v.as_numpy(), Var("x", arr)) + np.testing.assert_equal(v.to_numpy(), arr) + + @requires_sparse + def test_from_sparse(self, Var): + if Var is IndexVariable: + pytest.skip("Can't have 2D IndexVariables") + + import sparse + + arr = np.diagflat([1, 2, 3]) + sparr = sparse.COO(coords=[[0, 1, 2], [0, 1, 2]], data=[1, 2, 3]) + v = Variable(["x", "y"], sparr) + + assert_identical(v.as_numpy(), Variable(["x", "y"], arr)) + np.testing.assert_equal(v.to_numpy(), arr) + + @requires_cupy + def test_from_cupy(self, Var): + import cupy as cp + + arr = np.array([1, 2, 3]) + v = Var("x", cp.array(arr)) + + assert_identical(v.as_numpy(), Var("x", arr)) + np.testing.assert_equal(v.to_numpy(), arr) + + @requires_dask + @requires_pint_0_15 + def test_from_pint_wrapping_dask(self, Var): + import dask + from pint import Quantity + + arr = np.array([1, 2, 3]) + d = dask.array.from_array(np.array([1, 2, 3])) + v = Var("x", Quantity(d, units="m")) + + result = v.as_numpy() + assert_identical(result, Var("x", arr)) + np.testing.assert_equal(v.to_numpy(), arr) From 92cb751a52eec9e8b5fa521e0f9f83162eff7e3b Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Wed, 21 Jul 2021 18:38:34 -0400 Subject: [PATCH 05/12] Plots get labels from pint arrays (#5561) * test labels come from pint units * values demotes pint arrays before returning * plot labels look for pint units first * pre-commit * added to_numpy() and as_numpy() methods * remove special-casing of cupy arrays in .values in favour of using .to_numpy() * .values -> .to_numpy() * lint * Fix mypy (I think?) * added Dataset.as_numpy() * improved docstrings * add what's new * add to API docs * linting * fix failures by only importing pint when needed * refactor pycompat into class * pycompat import changes applied to plotting code * what's new * compute instead of load * added tests * fixed sparse test * tests and fixes for ds.as_numpy() * fix sparse tests * fix linting * tests for Variable * test IndexVariable too * use numpy.asarray to avoid a copy * also convert coords * Force tests again after #5600 Co-authored-by: Maximilian Roos --- doc/whats-new.rst | 2 ++ xarray/core/dataarray.py | 2 +- xarray/core/variable.py | 1 + xarray/plot/plot.py | 10 +++++----- xarray/plot/utils.py | 21 ++++++++++++++------ xarray/tests/test_units.py | 40 +++++++++++++++++++++++++++++++++++++- 6 files changed, 63 insertions(+), 13 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 38f152faf5c..8d8598b6830 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,8 @@ New Features By `Elle Smith `_. - Added :py:meth:`DataArray.to_numpy`, :py:meth:`DataArray.as_numpy`, and :py:meth:`Dataset.as_numpy`. (:pull:`5568`). By `Tom Nicholas `_. +- Units in plot labels are now automatically inferred from wrapped :py:meth:`pint.Quantity` arrays. (:pull:`5561`). + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d567da629ca..46c488b83a9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2784,7 +2784,7 @@ def to_masked_array(self, copy: bool = True) -> np.ma.MaskedArray: result : MaskedArray Masked where invalid values (nan or inf) occur. """ - values = self.values # only compute lazy arrays once + values = self.to_numpy() # only compute lazy arrays once isnull = pd.isnull(values) return np.ma.MaskedArray(data=values, mask=isnull, copy=copy) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 3d9b5079da2..f228ef43e32 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1075,6 +1075,7 @@ def to_numpy(self) -> np.ndarray: """Coerces wrapped data to numpy and returns a numpy.ndarray""" # TODO an entrypoint so array libraries can choose coercion method? data = self.data + # TODO first attempt to call .to_numpy() once some libraries implement it if isinstance(data, dask_array_type): data = data.compute() diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 2ab85e60725..e20b6568e79 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -430,7 +430,7 @@ def line( # Remove pd.Intervals if contained in xplt.values and/or yplt.values. xplt_val, yplt_val, x_suffix, y_suffix, kwargs = _resolve_intervals_1dplot( - xplt.values, yplt.values, kwargs + xplt.to_numpy(), yplt.to_numpy(), kwargs ) xlabel = label_from_attrs(xplt, extra=x_suffix) ylabel = label_from_attrs(yplt, extra=y_suffix) @@ -449,7 +449,7 @@ def line( ax.set_title(darray._title_for_slice()) if darray.ndim == 2 and add_legend: - ax.legend(handles=primitive, labels=list(hueplt.values), title=hue_label) + ax.legend(handles=primitive, labels=list(hueplt.to_numpy()), title=hue_label) # Rotate dates on xlabels # Do this without calling autofmt_xdate so that x-axes ticks @@ -551,7 +551,7 @@ def hist( """ ax = get_axis(figsize, size, aspect, ax) - no_nan = np.ravel(darray.values) + no_nan = np.ravel(darray.to_numpy()) no_nan = no_nan[pd.notnull(no_nan)] primitive = ax.hist(no_nan, **kwargs) @@ -1153,8 +1153,8 @@ def newplotfunc( dims = (yval.dims[0], xval.dims[0]) # better to pass the ndarrays directly to plotting functions - xval = xval.values - yval = yval.values + xval = xval.to_numpy() + yval = yval.to_numpy() # May need to transpose for correct x, y labels # xlab may be the name of a coord, we have to check for dim names diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 2bc806af14b..f2f296096a5 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -9,6 +9,7 @@ import pandas as pd from ..core.options import OPTIONS +from ..core.pycompat import DuckArrayModule from ..core.utils import is_scalar try: @@ -474,12 +475,20 @@ def label_from_attrs(da, extra=""): else: name = "" - if da.attrs.get("units"): - units = " [{}]".format(da.attrs["units"]) - elif da.attrs.get("unit"): - units = " [{}]".format(da.attrs["unit"]) + def _get_units_from_attrs(da): + if da.attrs.get("units"): + units = " [{}]".format(da.attrs["units"]) + elif da.attrs.get("unit"): + units = " [{}]".format(da.attrs["unit"]) + else: + units = "" + return units + + pint_array_type = DuckArrayModule("pint").type + if isinstance(da.data, pint_array_type): + units = " [{}]".format(str(da.data.units)) else: - units = "" + units = _get_units_from_attrs(da) return "\n".join(textwrap.wrap(name + extra + units, 30)) @@ -896,7 +905,7 @@ def _get_nice_quiver_magnitude(u, v): import matplotlib as mpl ticker = mpl.ticker.MaxNLocator(3) - mean = np.mean(np.hypot(u.values, v.values)) + mean = np.mean(np.hypot(u.to_numpy(), v.to_numpy())) magnitude = ticker.tick_values(0, mean)[-2] return magnitude diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 17086049cc7..2140047f38e 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -5,10 +5,22 @@ import pandas as pd import pytest +try: + import matplotlib.pyplot as plt +except ImportError: + pass + import xarray as xr from xarray.core import dtypes, duck_array_ops -from . import assert_allclose, assert_duckarray_allclose, assert_equal, assert_identical +from . import ( + assert_allclose, + assert_duckarray_allclose, + assert_equal, + assert_identical, + requires_matplotlib, +) +from .test_plot import PlotTestCase from .test_variable import _PAD_XR_NP_ARGS pint = pytest.importorskip("pint") @@ -5564,3 +5576,29 @@ def test_merge(self, variant, unit, error, dtype): assert_units_equal(expected, actual) assert_equal(expected, actual) + + +@requires_matplotlib +class TestPlots(PlotTestCase): + def test_units_in_line_plot_labels(self): + arr = np.linspace(1, 10, 3) * unit_registry.Pa + # TODO make coord a Quantity once unit-aware indexes supported + x_coord = xr.DataArray( + np.linspace(1, 3, 3), dims="x", attrs={"units": "meters"} + ) + da = xr.DataArray(data=arr, dims="x", coords={"x": x_coord}, name="pressure") + + da.plot.line() + + ax = plt.gca() + assert ax.get_ylabel() == "pressure [pascal]" + assert ax.get_xlabel() == "x [meters]" + + def test_units_in_2d_plot_labels(self): + arr = np.ones((2, 3)) * unit_registry.Pa + da = xr.DataArray(data=arr, dims=["x", "y"], name="pressure") + + fig, (ax, cax) = plt.subplots(1, 2) + ax = da.plot.contourf(ax=ax, cbar_ax=cax, add_colorbar=True) + + assert cax.get_ylabel() == "pressure [pascal]" From deaca14295d23c021a8352e485b199318907f59b Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Thu, 22 Jul 2021 16:02:03 -0700 Subject: [PATCH 06/12] Make typing-extensions optional (#5624) --- .pre-commit-config.yaml | 1 + doc/getting-started-guide/installing.rst | 1 - setup.cfg | 2 +- xarray/core/utils.py | 44 ++++++++++++++++++------ 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 232dbec36fe..3b490dccb75 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,6 +43,7 @@ repos: types-pytz, # Dependencies that are typed numpy, + typing-extensions==3.10.0.0, ] # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194 # - repo: https://github.com/asottile/pyupgrade diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index 2411a2c67ba..506236f3b9a 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -8,7 +8,6 @@ Required dependencies - Python (3.7 or later) - setuptools (40.4 or later) -- typing-extensions (3.10 or later) - `numpy `__ (1.17 or later) - `pandas `__ (1.0 or later) diff --git a/setup.cfg b/setup.cfg index 5a6e0b3435d..c44d207bf0f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -67,6 +67,7 @@ classifiers = Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 Topic :: Scientific/Engineering [options] @@ -78,7 +79,6 @@ install_requires = numpy >= 1.17 pandas >= 1.0 setuptools >= 40.4 # For pkg_resources - typing-extensions >= 3.10 # Backported type hints [options.extras_require] io = diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 72e34932579..a139d2ef10a 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -10,6 +10,7 @@ import warnings from enum import Enum from typing import ( + TYPE_CHECKING, Any, Callable, Collection, @@ -32,12 +33,6 @@ import numpy as np import pandas as pd -if sys.version_info >= (3, 10): - from typing import TypeGuard -else: - from typing_extensions import TypeGuard - - K = TypeVar("K") V = TypeVar("V") T = TypeVar("T") @@ -297,11 +292,7 @@ def either_dict_or_kwargs( return pos_kwargs -def is_scalar(value: Any, include_0d: bool = True) -> TypeGuard[Hashable]: - """Whether to treat a value as a scalar. - - Any non-iterable, string, or 0-D array - """ +def _is_scalar(value, include_0d): from .variable import NON_NUMPY_SUPPORTED_ARRAY_TYPES if include_0d: @@ -316,6 +307,37 @@ def is_scalar(value: Any, include_0d: bool = True) -> TypeGuard[Hashable]: ) +# See GH5624, this is a convoluted way to allow type-checking to use `TypeGuard` without +# requiring typing_extensions as a required dependency to _run_ the code (it is required +# to type-check). +try: + if sys.version_info >= (3, 10): + from typing import TypeGuard + else: + from typing_extensions import TypeGuard +except ImportError: + if TYPE_CHECKING: + raise + else: + + def is_scalar(value: Any, include_0d: bool = True) -> bool: + """Whether to treat a value as a scalar. + + Any non-iterable, string, or 0-D array + """ + return _is_scalar(value, include_0d) + + +else: + + def is_scalar(value: Any, include_0d: bool = True) -> TypeGuard[Hashable]: + """Whether to treat a value as a scalar. + + Any non-iterable, string, or 0-D array + """ + return _is_scalar(value, include_0d) + + def is_valid_numpy_dtype(dtype: Any) -> bool: try: np.dtype(dtype) From c5530d52d1bcbd071f4a22d471b728a4845ea36f Mon Sep 17 00:00:00 2001 From: keewis Date: Fri, 23 Jul 2021 22:12:38 +0200 Subject: [PATCH 07/12] remove deprecations scheduled for 0.19 (#5630) * remove the deprecated dim kwarg for DataArray.integrate * remove the deprecated keep_attrs kwarg to .rolling * remove the keep_attrs kwarg to .coarsen * raise a TypeError when passing DataArray to Variable * remove the deprecated return value of Dataset.update * remove the outdated datasets argument to combine_by_coords * fixed linting for combine_by_coords deprecation * all deprecations in the what's new * remove the documentation pages for the removed attributes [skip-ci] * Undo the deprecation and schedule the removal for 0.21 This reverts commit 85f5d2c25f4c4a0e0ac9aa75a8c31a2cd60f3033. * update whats-new.rst * point to Dataset.merge [skip-ci] * Undo the removal of the update return value and update the scheduled version Co-authored-by: Thomas Nicholas --- doc/api-hidden.rst | 4 --- doc/whats-new.rst | 4 +++ xarray/core/combine.py | 6 ++-- xarray/core/common.py | 7 +--- xarray/core/dataarray.py | 19 ----------- xarray/core/dataset.py | 8 ++++- xarray/core/rolling.py | 57 ++++++--------------------------- xarray/core/variable.py | 11 ++----- xarray/tests/test_coarsen.py | 58 ---------------------------------- xarray/tests/test_dask.py | 2 +- xarray/tests/test_dataarray.py | 27 ---------------- xarray/tests/test_dataset.py | 44 ++------------------------ xarray/tests/test_variable.py | 2 +- 13 files changed, 33 insertions(+), 216 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 076b0eb452a..fc27d9c3fe8 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -54,7 +54,6 @@ core.rolling.DatasetCoarsen.var core.rolling.DatasetCoarsen.boundary core.rolling.DatasetCoarsen.coord_func - core.rolling.DatasetCoarsen.keep_attrs core.rolling.DatasetCoarsen.obj core.rolling.DatasetCoarsen.side core.rolling.DatasetCoarsen.trim_excess @@ -120,7 +119,6 @@ core.rolling.DatasetRolling.var core.rolling.DatasetRolling.center core.rolling.DatasetRolling.dim - core.rolling.DatasetRolling.keep_attrs core.rolling.DatasetRolling.min_periods core.rolling.DatasetRolling.obj core.rolling.DatasetRolling.rollings @@ -199,7 +197,6 @@ core.rolling.DataArrayCoarsen.var core.rolling.DataArrayCoarsen.boundary core.rolling.DataArrayCoarsen.coord_func - core.rolling.DataArrayCoarsen.keep_attrs core.rolling.DataArrayCoarsen.obj core.rolling.DataArrayCoarsen.side core.rolling.DataArrayCoarsen.trim_excess @@ -263,7 +260,6 @@ core.rolling.DataArrayRolling.var core.rolling.DataArrayRolling.center core.rolling.DataArrayRolling.dim - core.rolling.DataArrayRolling.keep_attrs core.rolling.DataArrayRolling.min_periods core.rolling.DataArrayRolling.obj core.rolling.DataArrayRolling.window diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8d8598b6830..6127edcfc87 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -77,6 +77,10 @@ Breaking changes Deprecations ~~~~~~~~~~~~ +- Removed the deprecated ``dim`` kwarg to :py:func:`DataArray.integrate` (:pull:`5630`) +- Removed the deprecated ``keep_attrs`` kwarg to :py:func:`DataArray.rolling` (:pull:`5630`) +- Removed the deprecated ``keep_attrs`` kwarg to :py:func:`DataArray.coarsen` (:pull:`5630`) +- Completed deprecation of passing an ``xarray.DataArray`` to :py:func:`Variable` - will now raise a ``TypeError`` (:pull:`5630`) Bug fixes ~~~~~~~~~ diff --git a/xarray/core/combine.py b/xarray/core/combine.py index de6d16ef5c3..be9c2992832 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -635,7 +635,7 @@ def _combine_single_variable_hypercube( return concatenated -# TODO remove empty list default param after version 0.19, see PR4696 +# TODO remove empty list default param after version 0.21, see PR4696 def combine_by_coords( data_objects=[], compat="no_conflicts", @@ -849,11 +849,11 @@ def combine_by_coords( precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176 """ - # TODO remove after version 0.19, see PR4696 + # TODO remove after version 0.21, see PR4696 if datasets is not None: warnings.warn( "The datasets argument has been renamed to `data_objects`." - " In future passing a value for datasets will raise an error." + " From 0.21 on passing a value for datasets will raise an error." ) data_objects = datasets diff --git a/xarray/core/common.py b/xarray/core/common.py index 7b6e9198b43..ab822f576d3 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -821,7 +821,6 @@ def rolling( dim: Mapping[Hashable, int] = None, min_periods: int = None, center: Union[bool, Mapping[Hashable, bool]] = False, - keep_attrs: bool = None, **window_kwargs: int, ): """ @@ -889,9 +888,7 @@ def rolling( """ dim = either_dict_or_kwargs(dim, window_kwargs, "rolling") - return self._rolling_cls( - self, dim, min_periods=min_periods, center=center, keep_attrs=keep_attrs - ) + return self._rolling_cls(self, dim, min_periods=min_periods, center=center) def rolling_exp( self, @@ -940,7 +937,6 @@ def coarsen( boundary: str = "exact", side: Union[str, Mapping[Hashable, str]] = "left", coord_func: str = "mean", - keep_attrs: bool = None, **window_kwargs: int, ): """ @@ -1009,7 +1005,6 @@ def coarsen( boundary=boundary, side=side, coord_func=coord_func, - keep_attrs=keep_attrs, ) def resample( diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 46c488b83a9..cb2c4d30a69 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3582,8 +3582,6 @@ def integrate( self, coord: Union[Hashable, Sequence[Hashable]] = None, datetime_unit: str = None, - *, - dim: Union[Hashable, Sequence[Hashable]] = None, ) -> "DataArray": """Integrate along the given coordinate using the trapezoidal rule. @@ -3595,8 +3593,6 @@ def integrate( ---------- coord : hashable, or sequence of hashable Coordinate(s) used for the integration. - dim : hashable, or sequence of hashable - Coordinate(s) used for the integration. datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as'}, optional Specify the unit if a datetime coordinate is used. @@ -3633,21 +3629,6 @@ def integrate( array([5.4, 6.6, 7.8]) Dimensions without coordinates: y """ - if dim is not None and coord is not None: - raise ValueError( - "Cannot pass both 'dim' and 'coord'. Please pass only 'coord' instead." - ) - - if dim is not None and coord is None: - coord = dim - msg = ( - "The `dim` keyword argument to `DataArray.integrate` is " - "being replaced with `coord`, for consistency with " - "`Dataset.integrate`. Please pass `coord` instead." - " `dim` will be removed in version 0.19.0." - ) - warnings.warn(msg, FutureWarning, stacklevel=2) - ds = self._to_temp_dataset().integrate(coord, datetime_unit) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 650b3c80adf..5f5c01ad4c9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4173,6 +4173,7 @@ def update(self, other: "CoercibleMapping") -> "Dataset": """Update this dataset's variables with those from another dataset. Just like :py:meth:`dict.update` this is a in-place operation. + For a non-inplace version, see :py:meth:`Dataset.merge`. Parameters ---------- @@ -4191,7 +4192,7 @@ def update(self, other: "CoercibleMapping") -> "Dataset": Updated dataset. Note that since the update is in-place this is the input dataset. - It is deprecated since version 0.17 and scheduled to be removed in 0.19. + It is deprecated since version 0.17 and scheduled to be removed in 0.21. Raises ------ @@ -4202,6 +4203,7 @@ def update(self, other: "CoercibleMapping") -> "Dataset": See Also -------- Dataset.assign + Dataset.merge """ merge_result = dataset_update_method(self, other) return self._replace(inplace=True, **merge_result._asdict()) @@ -4275,6 +4277,10 @@ def merge( ------ MergeError If any variables conflict (see ``compat``). + + See Also + -------- + Dataset.update """ other = other.to_dataset() if isinstance(other, xr.DataArray) else other merge_result = dataset_merge_method( diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index b87dcda24b0..04052510f5d 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -48,10 +48,10 @@ class Rolling: xarray.DataArray.rolling """ - __slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs") - _attributes = ("window", "min_periods", "center", "dim", "keep_attrs") + __slots__ = ("obj", "window", "min_periods", "center", "dim") + _attributes = ("window", "min_periods", "center", "dim") - def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): + def __init__(self, obj, windows, min_periods=None, center=False): """ Moving window object. @@ -89,15 +89,6 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None self.min_periods = np.prod(self.window) if min_periods is None else min_periods - if keep_attrs is not None: - warnings.warn( - "Passing ``keep_attrs`` to ``rolling`` is deprecated and will raise an" - " error in xarray 0.18. Please pass ``keep_attrs`` directly to the" - " applied function. Note that keep_attrs is now True per default.", - FutureWarning, - ) - self.keep_attrs = keep_attrs - def __repr__(self): """provide a nice str repr of our rolling object""" @@ -188,15 +179,8 @@ def _mapping_to_list( ) def _get_keep_attrs(self, keep_attrs): - if keep_attrs is None: - # TODO: uncomment the next line and remove the others after the deprecation - # keep_attrs = _get_keep_attrs(default=True) - - if self.keep_attrs is None: - keep_attrs = _get_keep_attrs(default=True) - else: - keep_attrs = self.keep_attrs + keep_attrs = _get_keep_attrs(default=True) return keep_attrs @@ -204,7 +188,7 @@ def _get_keep_attrs(self, keep_attrs): class DataArrayRolling(Rolling): __slots__ = ("window_labels",) - def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): + def __init__(self, obj, windows, min_periods=None, center=False): """ Moving window object for DataArray. You should use DataArray.rolling() method to construct this object @@ -235,9 +219,7 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None xarray.Dataset.rolling xarray.Dataset.groupby """ - super().__init__( - obj, windows, min_periods=min_periods, center=center, keep_attrs=keep_attrs - ) + super().__init__(obj, windows, min_periods=min_periods, center=center) # TODO legacy attribute self.window_labels = self.obj[self.dim[0]] @@ -561,7 +543,7 @@ def _numpy_or_bottleneck_reduce( class DatasetRolling(Rolling): __slots__ = ("rollings",) - def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): + def __init__(self, obj, windows, min_periods=None, center=False): """ Moving window object for Dataset. You should use Dataset.rolling() method to construct this object @@ -592,7 +574,7 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None xarray.Dataset.groupby xarray.DataArray.groupby """ - super().__init__(obj, windows, min_periods, center, keep_attrs) + super().__init__(obj, windows, min_periods, center) if any(d not in self.obj.dims for d in self.dim): raise KeyError(self.dim) # Keep each Rolling object as a dictionary @@ -768,11 +750,10 @@ class Coarsen(CoarsenArithmetic): "windows", "side", "trim_excess", - "keep_attrs", ) _attributes = ("windows", "side", "trim_excess") - def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs): + def __init__(self, obj, windows, boundary, side, coord_func): """ Moving window object. @@ -799,17 +780,6 @@ def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs): self.side = side self.boundary = boundary - if keep_attrs is not None: - warnings.warn( - "Passing ``keep_attrs`` to ``coarsen`` is deprecated and will raise an" - " error in xarray 0.19. Please pass ``keep_attrs`` directly to the" - " applied function, i.e. use ``ds.coarsen(...).mean(keep_attrs=False)``" - " instead of ``ds.coarsen(..., keep_attrs=False).mean()``" - " Note that keep_attrs is now True per default.", - FutureWarning, - ) - self.keep_attrs = keep_attrs - absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] if absent_dims: raise ValueError( @@ -823,15 +793,8 @@ def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs): self.coord_func = coord_func def _get_keep_attrs(self, keep_attrs): - if keep_attrs is None: - # TODO: uncomment the next line and remove the others after the deprecation - # keep_attrs = _get_keep_attrs(default=True) - - if self.keep_attrs is None: - keep_attrs = _get_keep_attrs(default=True) - else: - keep_attrs = self.keep_attrs + keep_attrs = _get_keep_attrs(default=True) return keep_attrs diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f228ef43e32..f69951580c7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -118,14 +118,9 @@ def as_variable(obj, name=None) -> "Union[Variable, IndexVariable]": obj = obj.copy(deep=False) elif isinstance(obj, tuple): if isinstance(obj[1], DataArray): - # TODO: change into TypeError - warnings.warn( - ( - "Using a DataArray object to construct a variable is" - " ambiguous, please extract the data using the .data property." - " This will raise a TypeError in 0.19.0." - ), - DeprecationWarning, + raise TypeError( + "Using a DataArray object to construct a variable is" + " ambiguous, please extract the data using the .data property." ) try: obj = Variable(*obj) diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index 503c742252a..278a961166f 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -153,39 +153,6 @@ def test_coarsen_keep_attrs(funcname, argument): assert result.da_not_coarsend.name == "da_not_coarsend" -def test_coarsen_keep_attrs_deprecated(): - global_attrs = {"units": "test", "long_name": "testing"} - attrs_da = {"da_attr": "test"} - - data = np.linspace(10, 15, 100) - coords = np.linspace(1, 10, 100) - - ds = Dataset( - data_vars={"da": ("coord", data)}, - coords={"coord": coords}, - attrs=global_attrs, - ) - ds.da.attrs = attrs_da - - # deprecated option - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" - ): - result = ds.coarsen(dim={"coord": 5}, keep_attrs=False).mean() - - assert result.attrs == {} - assert result.da.attrs == {} - - # the keep_attrs in the reduction function takes precedence - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" - ): - result = ds.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False) - - assert result.attrs == {} - assert result.da.attrs == {} - - @pytest.mark.slow @pytest.mark.parametrize("ds", (1, 2), indirect=True) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @@ -267,31 +234,6 @@ def test_coarsen_da_keep_attrs(funcname, argument): assert result.name == "name" -def test_coarsen_da_keep_attrs_deprecated(): - attrs_da = {"da_attr": "test"} - - data = np.linspace(10, 15, 100) - coords = np.linspace(1, 10, 100) - - da = DataArray(data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da) - - # deprecated option - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" - ): - result = da.coarsen(dim={"coord": 5}, keep_attrs=False).mean() - - assert result.attrs == {} - - # the keep_attrs in the reduction function takes precedence - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" - ): - result = da.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False) - - assert result.attrs == {} - - @pytest.mark.parametrize("da", (1, 2), indirect=True) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize("name", ("sum", "mean", "std", "max")) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 0a4b8725be6..d5d460056aa 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1593,7 +1593,7 @@ def test_more_transforms_pass_lazy_array_equiv(map_da, map_ds): assert_equal(xr.broadcast(map_ds.cxy, map_ds.cxy)[0], map_ds.cxy) assert_equal(map_ds.map(lambda x: x), map_ds) assert_equal(map_ds.set_coords("a").reset_coords("a"), map_ds) - assert_equal(map_ds.update({"a": map_ds.a}), map_ds) + assert_equal(map_ds.assign({"a": map_ds.a}), map_ds) # fails because of index error # assert_equal( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 79370adb016..012b070f1ee 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6867,33 +6867,6 @@ def test_rolling_keep_attrs(funcname, argument): assert result.name == "name" -def test_rolling_keep_attrs_deprecated(): - attrs_da = {"da_attr": "test"} - - data = np.linspace(10, 15, 100) - coords = np.linspace(1, 10, 100) - - da = DataArray(data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da) - - # deprecated option - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``rolling`` is deprecated" - ): - result = da.rolling(dim={"coord": 5}, keep_attrs=False).construct("window_dim") - - assert result.attrs == {} - - # the keep_attrs in the reduction function takes precedence - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``rolling`` is deprecated" - ): - result = da.rolling(dim={"coord": 5}, keep_attrs=True).construct( - "window_dim", keep_attrs=False - ) - - assert result.attrs == {} - - def test_raise_no_warning_for_nan_in_binary_ops(): with pytest.warns(None) as record: xr.DataArray([1, 2, np.NaN]) > 0 diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f6fb4aa0cde..02d27ade161 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3193,13 +3193,13 @@ def test_update(self): data = create_test_data(seed=0) expected = data.copy() var2 = Variable("dim1", np.arange(8)) - actual = data.update({"var2": var2}) + actual = data + actual.update({"var2": var2}) expected["var2"] = var2 assert_identical(expected, actual) actual = data.copy() - actual_result = actual.update(data) - assert actual_result is actual + actual.update(data) assert_identical(expected, actual) other = Dataset(attrs={"new": "attr"}) @@ -6111,41 +6111,6 @@ def test_rolling_keep_attrs(funcname, argument): assert result.da_not_rolled.name == "da_not_rolled" -def test_rolling_keep_attrs_deprecated(): - global_attrs = {"units": "test", "long_name": "testing"} - attrs_da = {"da_attr": "test"} - - data = np.linspace(10, 15, 100) - coords = np.linspace(1, 10, 100) - - ds = Dataset( - data_vars={"da": ("coord", data)}, - coords={"coord": coords}, - attrs=global_attrs, - ) - ds.da.attrs = attrs_da - - # deprecated option - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``rolling`` is deprecated" - ): - result = ds.rolling(dim={"coord": 5}, keep_attrs=False).construct("window_dim") - - assert result.attrs == {} - assert result.da.attrs == {} - - # the keep_attrs in the reduction function takes precedence - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``rolling`` is deprecated" - ): - result = ds.rolling(dim={"coord": 5}, keep_attrs=True).construct( - "window_dim", keep_attrs=False - ) - - assert result.attrs == {} - assert result.da.attrs == {} - - def test_rolling_properties(ds): # catching invalid args with pytest.raises(ValueError, match="window must be > 0"): @@ -6591,9 +6556,6 @@ def test_integrate(dask): with pytest.raises(ValueError): da.integrate("x2d") - with pytest.warns(FutureWarning): - da.integrate(dim="x") - @requires_scipy @pytest.mark.parametrize("dask", [True, False]) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index d999bc1eb8b..96072a4e1e0 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1162,7 +1162,7 @@ def test_as_variable(self): td = np.array([timedelta(days=x) for x in range(10)]) assert as_variable(td, "time").dtype.kind == "m" - with pytest.warns(DeprecationWarning): + with pytest.raises(TypeError): as_variable(("x", DataArray([]))) def test_repr(self): From 53790638f3eb0801efd2ac2aa0ae205b8a8f10e0 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Fri, 23 Jul 2021 17:12:52 -0400 Subject: [PATCH 08/12] v0.19.0 release notes (#5632) * Revert "Use _unstack_once for valid dask and sparse versions (#5315)" This reverts commit 9165c266f52830384c399f0607a4123e65bb7803. * 0.18.2 release notes * fix RTD [skip-ci] (#5518) * v0.19.0 release notes * Update doc/whats-new.rst [skip-ci] * remove empty sections Co-authored-by: Maximilian Roos Co-authored-by: keewis --- doc/whats-new.rst | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6127edcfc87..f8bf76acc9e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,10 +14,23 @@ What's New np.random.seed(123456) -.. _whats-new.0.18.3: -v0.18.3 (unreleased) ---------------------- +.. _whats-new.0.19.0: + +v0.19.0 (23 July 2021) +---------------------- + +This release brings improvements to plotting of categorical data, the ability to specify how attributes +are combined in xarray operations, a new high-level :py:func:`unify_chunks` function, as well as various +deprecations, bug fixes, and minor improvements. + + +Many thanks to the 29 contributors to this release!: + +Andrew Williams, Augustus, Aureliana Barghini, Benoit Bovy, crusaderky, Deepak Cherian, ellesmith88, +Elliott Sales de Andrade, Giacomo Caria, github-actions[bot], Illviljan, Joeperdefloep, joooeey, Julia Kent, +Julius Busecke, keewis, Mathias Hauser, Matthias Göbel, Mattia Almansi, Maximilian Roos, Peter Andreas Entschev, +Ray Bell, Sander, Santiago Soler, Sebastian, Spencer Clark, Stephan Hoyer, Thomas Hirtz, Thomas Nicholas. New Features ~~~~~~~~~~~~ @@ -105,10 +118,6 @@ Bug fixes By `Augustus Ijams `_. -Documentation -~~~~~~~~~~~~~ - - Internal Changes ~~~~~~~~~~~~~~~~ - Run CI on the first & last python versions supported only; currently 3.7 & 3.9. @@ -117,7 +126,6 @@ Internal Changes - Publish test results & timings on each PR. (:pull:`5537`) By `Maximilian Roos `_. - - Explicit indexes refactor: add a ``xarray.Index.query()`` method in which one may eventually provide a custom implementation of label-based data selection (not ready yet for public use). Also refactor the internal, @@ -162,13 +170,6 @@ New Features - Raise more informative error when decoding time variables with invalid reference dates. (:issue:`5199`, :pull:`5288`). By `Giacomo Caria `_. -Breaking changes -~~~~~~~~~~~~~~~~ - - -Deprecations -~~~~~~~~~~~~ - Bug fixes ~~~~~~~~~ From 12810c2c264a004648fdb750262f2da4973a7ca5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 23 Jul 2021 17:50:08 -0400 Subject: [PATCH 09/12] new blank whats-new for v0.19.1 --- doc/whats-new.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f8bf76acc9e..c05fb9cd555 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -15,6 +15,34 @@ What's New np.random.seed(123456) +.. _whats-new.0.19.1: + +v0.19.1 (unreleased) +--------------------- + +New Features +~~~~~~~~~~~~ + + +Breaking changes +~~~~~~~~~~~~~~~~ + + +Deprecations +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + .. _whats-new.0.19.0: v0.19.0 (23 July 2021) From da99a5664df4f5013c2f6b0e758394bec5e0bc80 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 25 Jul 2021 23:42:26 -0700 Subject: [PATCH 10/12] Bump codecov/codecov-action from 1 to 2.0.2 (#5633) Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 1 to 2.0.2. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/master/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v1...v2.0.2) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci-additional.yaml | 2 +- .github/workflows/ci.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 2b9a6405f21..ed731b25f76 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -103,7 +103,7 @@ jobs: $PYTEST_EXTRA_FLAGS - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v2.0.2 with: file: ./coverage.xml flags: unittests,${{ matrix.env }} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3918f92574d..22a05eb1fc0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -100,7 +100,7 @@ jobs: path: pytest.xml - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v2.0.2 with: file: ./coverage.xml flags: unittests From 78cec7c5faf3176fc67ddffb68e85e772d9f276a Mon Sep 17 00:00:00 2001 From: Pushkar Kopparla <76039838+pkopparla@users.noreply.github.com> Date: Fri, 30 Jul 2021 09:09:14 +0200 Subject: [PATCH 11/12] Kwargs to rasterio open (#5609) * Added kwargs option to be passed to rasterio.open * Added description to whats-new.rst * Fixed whitespace * Update doc/whats-new.rst Co-authored-by: Deepak Cherian * Update doc/whats-new.rst Co-authored-by: keewis * Updated whats-new.rst * Merged backend_kwargs with kwargs * Added check for no kwargs * Update doc/whats-new.rst Co-authored-by: keewis * Removed backend_kwargs Co-authored-by: Deepak Cherian Co-authored-by: keewis --- doc/whats-new.rst | 2 ++ xarray/backends/rasterio_.py | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c05fb9cd555..09ffbb557f5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -97,6 +97,8 @@ New Features - Allow removal of the coordinate attribute ``coordinates`` on variables by setting ``.attrs['coordinates']= None`` (:issue:`5510`). By `Elle Smith `_. +- Added ``**kwargs`` argument to :py:meth:`open_rasterio` to access overviews (:issue:`3269`). + By `Pushkar Kopparla `_. - Added :py:meth:`DataArray.to_numpy`, :py:meth:`DataArray.as_numpy`, and :py:meth:`Dataset.as_numpy`. (:pull:`5568`). By `Tom Nicholas `_. - Units in plot labels are now automatically inferred from wrapped :py:meth:`pint.Quantity` arrays. (:pull:`5561`). diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index 49a5a9ec7ae..1891fac8668 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -162,7 +162,14 @@ def default(s): return parsed_meta -def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, lock=None): +def open_rasterio( + filename, + parse_coordinates=None, + chunks=None, + cache=None, + lock=None, + **kwargs, +): """Open a file with rasterio (experimental). This should work with any file that rasterio can open (most often: @@ -272,7 +279,13 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc if lock is None: lock = RASTERIO_LOCK - manager = CachingFileManager(rasterio.open, filename, lock=lock, mode="r") + manager = CachingFileManager( + rasterio.open, + filename, + lock=lock, + mode="r", + kwargs=kwargs, + ) riods = manager.acquire() if vrt_params is not None: riods = WarpedVRT(riods, **vrt_params) From 297756319738938e7431b829ca7390e7f2a79fd2 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Fri, 30 Jul 2021 10:20:15 +0200 Subject: [PATCH 12/12] Update api.rst (#5639) --- doc/api.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 0e34b13f9a5..fb2296d1226 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -24,7 +24,6 @@ Top-level functions combine_by_coords combine_nested where - set_options infer_freq full_like zeros_like