From 17c575565c8e29f4fcd944f489a7c1a54ee3ecd5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Jul 2021 16:16:30 -0400 Subject: [PATCH 01/28] added to_numpy() and as_numpy() methods --- xarray/core/dataarray.py | 22 ++++++++++++++++++++-- xarray/core/pycompat.py | 10 ++++++++++ xarray/core/variable.py | 25 +++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index cfb7230d1ae..65d70a2e325 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -76,6 +76,7 @@ assert_unique_multiindex_level_names, ) + T_DataArray = TypeVar("T_DataArray", bound="DataArray") T_DSorDA = TypeVar("T_DSorDA", "DataArray", Dataset) if TYPE_CHECKING: @@ -627,7 +628,7 @@ def __len__(self) -> int: @property def data(self) -> Any: - """The array's data as a dask or numpy array""" + """The array's data as a numpy-like array""" return self.variable.data @data.setter @@ -636,13 +637,30 @@ def data(self, value: Any) -> None: @property def values(self) -> np.ndarray: - """The array's data as a numpy.ndarray""" + """ + The array's data as a numpy.ndarray. + + If the array's data is not a numpy.ndarray this will attempt to convert + it naively using np.array(), which will raise an error if the array + type does not support coercion like this. + """ return self.variable.values @values.setter def values(self, value: Any) -> None: self.variable.values = value + def to_numpy(self) -> np.ndarray: + """Coerces wrapped data to numpy and returns a numpy.ndarray""" + return self.variable.to_numpy() + + def as_numpy(self) -> T_DataArray: + """ + Coerces wrapped data into a numpy array, and returns it wrapped inside + a DataArray. + """ + return self.copy(data=self.to_numpy()) + @property def _in_memory(self) -> bool: return self.variable._in_memory diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 9f47da6c8cc..b016b1e1864 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -45,3 +45,13 @@ def is_duck_dask_array(x): except ImportError: # pragma: no cover cupy_version = LooseVersion("0.0.0") cupy_array_type = () + +try: + # solely for isinstance checks + import pint + + pint_version = LooseVersion(pint.__version__) + pint_array_type = (pint.Quantity,) +except ImportError: # pragma: no cover + pint_version = LooseVersion("0.0.0") + pint_array_type = () diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ace09c6f482..e9cbbcc4e94 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -31,6 +31,8 @@ from .pycompat import ( cupy_array_type, dask_array_type, + pint_array_type, + sparse_array_type, integer_types, is_duck_dask_array, ) @@ -1069,6 +1071,29 @@ def chunk(self, chunks={}, name=None, lock=False): return self._replace(data=data) + def to_numpy(self) -> np.ndarray: + """Coerces wrapped data to numpy and returns a numpy.ndarray""" + # TODO an entrypoint so array libraries can choose coercion method? + data = self.data + try: + return data.to_numpy() + except AttributeError: + if isinstance(data, dask_array_type): + data = self.load().data + if isinstance(data, cupy_array_type): + data = data.get() + if isinstance(data, pint_array_type): + data = data.magnitude + if isinstance(data, sparse_array_type): + data = data.to_dense() + if type(data) != np.ndarray: + data = np.array(data) + return data + + def as_numpy(self) -> VariableType: + """Coerces wrapped data into a numpy array, returning a Variable.""" + return self._replace(data=self.data.to_numpy()) + def _as_sparse(self, sparse_format=_default, fill_value=dtypes.NA): """ use sparse-array as backend. From 48ba107b9f2851fa50f1f2cf42047d5fba47cc95 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Jul 2021 16:21:03 -0400 Subject: [PATCH 02/28] remove special-casing of cupy arrays in .values in favour of using .to_numpy() --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e9cbbcc4e94..07fce29f454 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -261,7 +261,7 @@ def _as_array_or_item(data): TODO: remove this (replace with np.asarray) once these issues are fixed """ - data = data.get() if isinstance(data, cupy_array_type) else np.asarray(data) + data = np.asarray(data) if data.ndim == 0: if data.dtype.kind == "M": data = np.datetime64(data, "ns") From ae6e931e59faf5800734ff9378e3040956fadd07 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 2 Jul 2021 14:54:02 -0700 Subject: [PATCH 03/28] lint --- xarray/core/dataarray.py | 1 - xarray/core/variable.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 65d70a2e325..3c5f7ee654b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -76,7 +76,6 @@ assert_unique_multiindex_level_names, ) - T_DataArray = TypeVar("T_DataArray", bound="DataArray") T_DSorDA = TypeVar("T_DSorDA", "DataArray", Dataset) if TYPE_CHECKING: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 07fce29f454..4c23f8759ec 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -31,10 +31,10 @@ from .pycompat import ( cupy_array_type, dask_array_type, - pint_array_type, - sparse_array_type, integer_types, is_duck_dask_array, + pint_array_type, + sparse_array_type, ) from .utils import ( NdimSizeLenMixin, From dc24d3fec309b2b78eb04f690ca299216887f81e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 2 Jul 2021 15:42:57 -0700 Subject: [PATCH 04/28] Fix mypy (I think?) --- xarray/core/dataarray.py | 8 ++++---- xarray/core/variable.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3c5f7ee654b..40c88c31c93 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -430,12 +430,12 @@ def __init__( self._close = None def _replace( - self, + self: T_DataArray, variable: Variable = None, coords=None, name: Union[Hashable, None, Default] = _default, indexes=None, - ) -> "DataArray": + ) -> T_DataArray: if variable is None: variable = self.variable if coords is None: @@ -653,7 +653,7 @@ def to_numpy(self) -> np.ndarray: """Coerces wrapped data to numpy and returns a numpy.ndarray""" return self.variable.to_numpy() - def as_numpy(self) -> T_DataArray: + def as_numpy(self: T_DataArray) -> T_DataArray: """ Coerces wrapped data into a numpy array, and returns it wrapped inside a DataArray. @@ -952,7 +952,7 @@ def persist(self, **kwargs) -> "DataArray": ds = self._to_temp_dataset().persist(**kwargs) return self._from_temp_dataset(ds) - def copy(self, deep: bool = True, data: Any = None) -> "DataArray": + def copy(self: T_DataArray, deep: bool = True, data: Any = None) -> T_DataArray: """Returns a copy of this array. If `deep=True`, a deep copy is made of the data array. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 4c23f8759ec..f8791740801 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1090,7 +1090,7 @@ def to_numpy(self) -> np.ndarray: data = np.array(data) return data - def as_numpy(self) -> VariableType: + def as_numpy(self: VariableType) -> VariableType: """Coerces wrapped data into a numpy array, returning a Variable.""" return self._replace(data=self.data.to_numpy()) From ee346496e0d7e7b2a03615cc055979e74c5ac69d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 3 Jul 2021 19:06:03 -0400 Subject: [PATCH 05/28] added Dataset.as_numpy() --- xarray/core/dataset.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 13da8cfad03..042782f0406 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1198,6 +1198,8 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset": obj = obj.rename(dim_names) return obj + + def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": """Returns a copy of this dataset. @@ -1323,6 +1325,18 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": return self._replace(variables, attrs=attrs) + def as_numpy(self: "Dataset") -> "Dataset": + """ + Coerces wrapped data into numpy arrays, returning a Dataset. + + See also + -------- + DataArray.as_numpy + DataArray.to_numpy : Returns only the numpy.ndarray object. + """ + numpy_variables = {k: v.as_numpy() for k, v in self._variables.items()} + return self.copy(data=numpy_variables) + @property def _level_coords(self) -> Dict[str, Hashable]: """Return a mapping of all MultiIndex levels and their corresponding From 552b3229c376a4fb82107f70d38127bf84984184 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 3 Jul 2021 19:08:16 -0400 Subject: [PATCH 06/28] improved docstrings --- xarray/core/dataarray.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 40c88c31c93..96d67ed6e21 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -650,13 +650,25 @@ def values(self, value: Any) -> None: self.variable.values = value def to_numpy(self) -> np.ndarray: - """Coerces wrapped data to numpy and returns a numpy.ndarray""" + """ + Coerces wrapped data to numpy and returns a numpy.ndarray. + + See also + -------- + DataArray.as_numpy : Same but returns the surrounding DataArray instead. + Dataset.as_numpy + """ return self.variable.to_numpy() def as_numpy(self: T_DataArray) -> T_DataArray: """ Coerces wrapped data into a numpy array, and returns it wrapped inside a DataArray. + + See also + -------- + DataArray.to_numpy : Same but returns only the numpy.ndarray object. + Dataset.as_numpy : Converts all variables in a Dataset. """ return self.copy(data=self.to_numpy()) From 1215e6917bfffc1c980df0f4399b2f5fa696c289 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 3 Jul 2021 19:15:10 -0400 Subject: [PATCH 07/28] add what's new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 21f453b6865..c6334fc754c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -51,6 +51,8 @@ New Features - Allow removal of the coordinate attribute ``coordinates`` on variables by setting ``.attrs['coordinates']= None`` (:issue:`5510`). By `Elle Smith `_. +- Added :py:meth:`DataArray.to_numpy`, :py:meth:`DataArray.as_numpy`, and :py:meth:`Dataset.as_numpy`. (:pull:`5568`). + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ From af8a1ee091bfcb37146f2406ea40f1e2172940e7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 3 Jul 2021 19:31:35 -0400 Subject: [PATCH 08/28] add to API docs --- doc/api.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index bb3a99bfbb0..0e34b13f9a5 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -686,6 +686,7 @@ Dataset methods open_zarr Dataset.to_netcdf Dataset.to_pandas + Dataset.as_numpy Dataset.to_zarr save_mfdataset Dataset.to_array @@ -716,6 +717,8 @@ DataArray methods DataArray.to_pandas DataArray.to_series DataArray.to_dataframe + DataArray.to_numpy + DataArray.as_numpy DataArray.to_index DataArray.to_masked_array DataArray.to_cdms2 From e095bf08d53c534298ebbd46ede371519ced3284 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 3 Jul 2021 19:48:17 -0400 Subject: [PATCH 09/28] linting --- xarray/core/dataset.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 042782f0406..9fc15a05120 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1198,8 +1198,6 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset": obj = obj.rename(dim_names) return obj - - def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": """Returns a copy of this dataset. From eb7d84d7b36091d0ee6d645b633f9bdd1e37bb0e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Jul 2021 11:26:57 -0400 Subject: [PATCH 10/28] fix failures by only importing pint when needed --- xarray/core/pycompat.py | 23 ++++++++++++++--------- xarray/core/variable.py | 3 ++- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index b016b1e1864..26d03b5d858 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -46,12 +46,17 @@ def is_duck_dask_array(x): cupy_version = LooseVersion("0.0.0") cupy_array_type = () -try: - # solely for isinstance checks - import pint - - pint_version = LooseVersion(pint.__version__) - pint_array_type = (pint.Quantity,) -except ImportError: # pragma: no cover - pint_version = LooseVersion("0.0.0") - pint_array_type = () +def _get_pint_array_type(): + # Have to only import pint when required because pint currently imports xarray + # https://github.com/pydata/xarray/pull/5561#discussion_r664815718 + try: + # solely for isinstance checks + import pint + + pint_version = LooseVersion(pint.__version__) + pint_array_type = (pint.Quantity,) + except ImportError: # pragma: no cover + pint_version = LooseVersion("0.0.0") + pint_array_type = () + + return pint_version, pint_array_type diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f8791740801..a8eaf017129 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -33,7 +33,7 @@ dask_array_type, integer_types, is_duck_dask_array, - pint_array_type, + _get_pint_array_type, sparse_array_type, ) from .utils import ( @@ -1082,6 +1082,7 @@ def to_numpy(self) -> np.ndarray: data = self.load().data if isinstance(data, cupy_array_type): data = data.get() + _, pint_array_type = _get_pint_array_type() if isinstance(data, pint_array_type): data = data.magnitude if isinstance(data, sparse_array_type): From 74c05e31472b5814fd04078d4c7fcce679b80eb5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Jul 2021 15:13:03 -0400 Subject: [PATCH 11/28] refactor pycompat into class --- xarray/core/pycompat.py | 97 +++++++++++++++++++++-------------------- xarray/core/variable.py | 17 ++++---- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 26d03b5d858..697b0978ba4 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -1,4 +1,5 @@ from distutils.version import LooseVersion +from importlib import import_module import numpy as np @@ -6,57 +7,57 @@ integer_types = (int, np.integer) -try: - import dask - import dask.array - from dask.base import is_dask_collection - dask_version = LooseVersion(dask.__version__) +class DuckArrayModule: + """ + Solely for internal isinstance and version checks. - # solely for isinstance checks - dask_array_type = (dask.array.Array,) + Motivated by having to only import pint when required (as pint currently imports xarray) + https://github.com/pydata/xarray/pull/5561#discussion_r664815718 + """ + + def __init__(self, mod): + try: + duck_array_module = import_module(mod) + duck_array_version = LooseVersion(duck_array_module.__version__) + + if mod == "dask": + duck_array_type = (import_module("dask.array").Array,) + elif mod == "pint": + duck_array_type = (duck_array_module.Quantity,) + elif mod == "cupy": + duck_array_type = (duck_array_module.ndarray,) + elif mod == "sparse": + duck_array_type = (duck_array_module.SparseArray,) + else: + raise NotImplementedError + + except ImportError: # pragma: no cover + duck_array_module = None + duck_array_version = LooseVersion("0.0.0") + duck_array_type = () + + self.module = duck_array_module + self.version = duck_array_version + self.type = duck_array_type + self.available = self.version > "0.0.0" + + +def is_duck_dask_array(x): + if DuckArrayModule("dask").available: + from dask.base import is_dask_collection - def is_duck_dask_array(x): return is_duck_array(x) and is_dask_collection(x) + else: + return False + + +dsk = DuckArrayModule("dask") +dask_version = dsk.version +dask_array_type = dsk.type +sp = DuckArrayModule("sparse") +sparse_array_type = sp.type +sparse_version = sp.version -except ImportError: # pragma: no cover - dask_version = LooseVersion("0.0.0") - dask_array_type = () - is_duck_dask_array = lambda _: False - is_dask_collection = lambda _: False - -try: - # solely for isinstance checks - import sparse - - sparse_version = LooseVersion(sparse.__version__) - sparse_array_type = (sparse.SparseArray,) -except ImportError: # pragma: no cover - sparse_version = LooseVersion("0.0.0") - sparse_array_type = () - -try: - # solely for isinstance checks - import cupy - - cupy_version = LooseVersion(cupy.__version__) - cupy_array_type = (cupy.ndarray,) -except ImportError: # pragma: no cover - cupy_version = LooseVersion("0.0.0") - cupy_array_type = () - -def _get_pint_array_type(): - # Have to only import pint when required because pint currently imports xarray - # https://github.com/pydata/xarray/pull/5561#discussion_r664815718 - try: - # solely for isinstance checks - import pint - - pint_version = LooseVersion(pint.__version__) - pint_array_type = (pint.Quantity,) - except ImportError: # pragma: no cover - pint_version = LooseVersion("0.0.0") - pint_array_type = () - - return pint_version, pint_array_type +cupy_array_type = DuckArrayModule("cupy").type diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a8eaf017129..5665d39f97b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -28,14 +28,7 @@ from .indexes import PandasIndex, wrap_pandas_index from .indexing import BasicIndexer, OuterIndexer, VectorizedIndexer, as_indexable from .options import _get_keep_attrs -from .pycompat import ( - cupy_array_type, - dask_array_type, - integer_types, - is_duck_dask_array, - _get_pint_array_type, - sparse_array_type, -) +from .pycompat import DuckArrayModule, integer_types, is_duck_dask_array from .utils import ( NdimSizeLenMixin, OrderedSet, @@ -49,6 +42,11 @@ maybe_coerce_to_str, ) +dask_array_type = DuckArrayModule("dask").type +cupy_array_type = DuckArrayModule("cupy").type +sparse_array_type = DuckArrayModule("sparse").type + + NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( ( indexing.ExplicitlyIndexed, @@ -1082,7 +1080,8 @@ def to_numpy(self) -> np.ndarray: data = self.load().data if isinstance(data, cupy_array_type): data = data.get() - _, pint_array_type = _get_pint_array_type() + # pint has to be imported dynamically as pint imports xarray + pint_array_type = DuckArrayModule("pint").type if isinstance(data, pint_array_type): data = data.magnitude if isinstance(data, sparse_array_type): From 45245d027cdb041c880fa757e12d8ac2082b3940 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 8 Jul 2021 15:23:21 -0400 Subject: [PATCH 12/28] compute instead of load --- xarray/core/variable.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 5665d39f97b..d53d300db95 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1074,10 +1074,10 @@ def to_numpy(self) -> np.ndarray: # TODO an entrypoint so array libraries can choose coercion method? data = self.data try: - return data.to_numpy() + data = data.to_numpy() except AttributeError: if isinstance(data, dask_array_type): - data = self.load().data + data = self.compute().data if isinstance(data, cupy_array_type): data = data.get() # pint has to be imported dynamically as pint imports xarray @@ -1085,10 +1085,11 @@ def to_numpy(self) -> np.ndarray: if isinstance(data, pint_array_type): data = data.magnitude if isinstance(data, sparse_array_type): - data = data.to_dense() - if type(data) != np.ndarray: + data = data.todense() + if type(data) != np.ndarray: # noqa : Don't allow subclasses data = np.array(data) - return data + + return data def as_numpy(self: VariableType) -> VariableType: """Coerces wrapped data into a numpy array, returning a Variable.""" From 27fc4e58cd2a3feaacfa2594425b3c6daead82b9 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 8 Jul 2021 15:23:41 -0400 Subject: [PATCH 13/28] added tests --- xarray/tests/__init__.py | 1 + xarray/tests/test_dataarray.py | 62 ++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 9029dc1c621..d757fb451cc 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -83,6 +83,7 @@ def LooseVersion(vstring): has_numbagg, requires_numbagg = _importorskip("numbagg") has_seaborn, requires_seaborn = _importorskip("seaborn") has_sparse, requires_sparse = _importorskip("sparse") +has_cupy, requires_cupy = _importorskip("cupy") has_cartopy, requires_cartopy = _importorskip("cartopy") # Need Pint 0.15 for __dask_tokenize__ tests for Quantity wrapped Dask Arrays has_pint_0_15, requires_pint_0_15 = _importorskip("pint", minversion="0.15") diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 8a82c8c37f3..2c77adc6e26 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -36,10 +36,12 @@ has_dask, raise_if_dask_computes, requires_bottleneck, + requires_cupy, requires_dask, requires_iris, requires_numbagg, requires_numexpr, + requires_pint_0_15, requires_scipy, requires_sparse, source_ndarray, @@ -7368,3 +7370,63 @@ def test_drop_duplicates(keep): expected = xr.DataArray(data, dims="time", coords={"time": time}, name="test") result = ds.drop_duplicates("time", keep=keep) assert_equal(expected, result) + + +class TestNumpyCoercion: + def test_from_numpy(self): + da = xr.DataArray([1, 2, 3]) + + assert_identical(da.as_numpy(), da) + np.testing.assert_equal(da.to_numpy(), np.array([1, 2, 3])) + + @requires_dask + def test_from_dask(self): + da = xr.DataArray([1, 2, 3]) + da_chunked = da.chunk(1) + + assert_identical(da_chunked.as_numpy(), da.compute()) + np.testing.assert_equal(da.to_numpy(), np.array([1, 2, 3])) + + @requires_pint_0_15 + def test_from_pint(self): + from pint import Quantity + + arr = np.array([1, 2, 3]) + da = xr.DataArray(Quantity(arr, units="m")) + + assert_identical(da.as_numpy(), xr.DataArray(arr)) + np.testing.assert_equal(da.to_numpy(), arr) + + @requires_sparse + def test_from_sparse(self): + arr = np.array([1, 2, 3]) + va = Variable(data=arr, dims="x")._as_sparse() + da = xr.DataArray(va, fastpath=True) + + assert_identical(da.as_numpy(), xr.DataArray(arr)) + np.testing.assert_equal(da.to_numpy(), arr) + + @requires_cupy + def test_from_cupy(self): + import cupy as cp + + arr = np.array([1, 2, 3]) + da = xr.DataArray(cp.array(arr)) + + assert_identical(da.as_numpy(), xr.DataArray(arr)) + np.testing.assert_equal(da.to_numpy(), arr) + + @requires_dask + @requires_pint_0_15 + def test_from_pint_wrapping_dask(self): + import dask + from pint import Quantity + + arr = np.array([1, 2, 3]) + d = dask.array.from_array(np.array([1, 2, 3])) + da = xr.DataArray(Quantity(d, units="m")) + + result = da.as_numpy() + result.name = None # remove dask-assigned name + assert_identical(result, xr.DataArray(arr)) + np.testing.assert_equal(da.to_numpy(), arr) From 3e8cb24f09f2976ac9a4b061ba2c9a2ca5d32def Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 8 Jul 2021 16:20:08 -0400 Subject: [PATCH 14/28] fixed sparse test --- xarray/tests/test_dataarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2c77adc6e26..b6d46b374e4 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -7400,8 +7400,8 @@ def test_from_pint(self): @requires_sparse def test_from_sparse(self): arr = np.array([1, 2, 3]) - va = Variable(data=arr, dims="x")._as_sparse() - da = xr.DataArray(va, fastpath=True) + va = Variable(data=arr, dims="dim_0")._as_sparse() + da = xr.DataArray(va) assert_identical(da.as_numpy(), xr.DataArray(arr)) np.testing.assert_equal(da.to_numpy(), arr) From f9d6370b1b0dc848e206bede7b306def25981350 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Jul 2021 12:20:01 -0400 Subject: [PATCH 15/28] tests and fixes for ds.as_numpy() --- xarray/core/dataset.py | 2 +- xarray/core/variable.py | 2 +- xarray/tests/test_dataset.py | 56 ++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9fc15a05120..77c6d72b40c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1332,7 +1332,7 @@ def as_numpy(self: "Dataset") -> "Dataset": DataArray.as_numpy DataArray.to_numpy : Returns only the numpy.ndarray object. """ - numpy_variables = {k: v.as_numpy() for k, v in self._variables.items()} + numpy_variables = {k: v.as_numpy() for k, v in self.data_vars.items()} return self.copy(data=numpy_variables) @property diff --git a/xarray/core/variable.py b/xarray/core/variable.py index d53d300db95..0a9c0b3fd88 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1093,7 +1093,7 @@ def to_numpy(self) -> np.ndarray: def as_numpy(self: VariableType) -> VariableType: """Coerces wrapped data into a numpy array, returning a Variable.""" - return self._replace(data=self.data.to_numpy()) + return self._replace(data=self.to_numpy()) def _as_sparse(self, sparse_format=_default, fill_value=dtypes.NA): """ diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 9a001117c59..c6b5dff09d1 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -44,9 +44,11 @@ has_dask, requires_bottleneck, requires_cftime, + requires_cupy, requires_dask, requires_numbagg, requires_numexpr, + requires_pint_0_15, requires_scipy, requires_sparse, source_ndarray, @@ -6755,3 +6757,57 @@ def test_clip(ds): result = ds.clip(min=ds.mean("y"), max=ds.mean("y")) assert result.dims == ds.dims + + +class TestNumpyCoercion: + def test_from_numpy(self): + ds = xr.Dataset({'a': [1, 2, 3]}) + + assert_identical(ds.as_numpy(), ds) + + @requires_dask + def test_from_dask(self): + ds = xr.Dataset({'a': [1, 2, 3]}) + ds_chunked = ds.chunk(1) + + assert_identical(ds_chunked.as_numpy(), ds.compute()) + + @requires_pint_0_15 + def test_from_pint(self): + from pint import Quantity + + arr = np.array([1, 2, 3]) + ds = xr.Dataset({'a': Quantity(arr, units="m")}) + + assert_identical(ds.as_numpy(), xr.Dataset({'a': arr})) + + @requires_sparse + def test_from_sparse(self): + arr = np.array([1, 2, 3]) + va = Variable(data=arr, dims="dim_0")._as_sparse() + da = xr.DataArray(va, dims='a') + ds = xr.DataArray({'a': va}) + + assert_identical(ds.as_numpy(), xr.Dataset({'a': arr})) + + @requires_cupy + def test_from_cupy(self): + import cupy as cp + + arr = np.array([1, 2, 3]) + ds = xr.Dataset({'a': cp.array(arr)}) + + assert_identical(ds.as_numpy(), xr.Dataset({'a': arr})) + + @requires_dask + @requires_pint_0_15 + def test_from_pint_wrapping_dask(self): + import dask + from pint import Quantity + + arr = np.array([1, 2, 3]) + d = dask.array.from_array(np.array([1, 2, 3])) + ds = xr.Dataset({'a': Quantity(arr, units="m")}) + + result = ds.as_numpy() + assert_identical(result, xr.Dataset({'a': arr})) From 50fdf4cf8801e7968d56fb980ebde7726f302fc6 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Jul 2021 14:12:29 -0400 Subject: [PATCH 16/28] fix sparse tests --- xarray/tests/test_dataarray.py | 8 +++++--- xarray/tests/test_dataset.py | 27 ++++++++++++++------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b6d46b374e4..45c2926cb38 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -7399,9 +7399,11 @@ def test_from_pint(self): @requires_sparse def test_from_sparse(self): - arr = np.array([1, 2, 3]) - va = Variable(data=arr, dims="dim_0")._as_sparse() - da = xr.DataArray(va) + import sparse + + arr = np.diagflat([1, 2, 3]) + sparr = sparse.COO(coords=[[0, 1, 2], [0, 1, 2]], data=[1, 2, 3]) + da = xr.DataArray(sparr) assert_identical(da.as_numpy(), xr.DataArray(arr)) np.testing.assert_equal(da.to_numpy(), arr) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c6b5dff09d1..a3799cd7597 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6761,13 +6761,13 @@ def test_clip(ds): class TestNumpyCoercion: def test_from_numpy(self): - ds = xr.Dataset({'a': [1, 2, 3]}) + ds = xr.Dataset({"a": ("x", [1, 2, 3])}) assert_identical(ds.as_numpy(), ds) @requires_dask def test_from_dask(self): - ds = xr.Dataset({'a': [1, 2, 3]}) + ds = xr.Dataset({"a": ("x", [1, 2, 3])}) ds_chunked = ds.chunk(1) assert_identical(ds_chunked.as_numpy(), ds.compute()) @@ -6777,27 +6777,28 @@ def test_from_pint(self): from pint import Quantity arr = np.array([1, 2, 3]) - ds = xr.Dataset({'a': Quantity(arr, units="m")}) + ds = xr.Dataset({"a": ("x", Quantity(arr, units="m"))}) - assert_identical(ds.as_numpy(), xr.Dataset({'a': arr})) + assert_identical(ds.as_numpy(), xr.Dataset({"a": ("x", [1, 2, 3])})) @requires_sparse def test_from_sparse(self): - arr = np.array([1, 2, 3]) - va = Variable(data=arr, dims="dim_0")._as_sparse() - da = xr.DataArray(va, dims='a') - ds = xr.DataArray({'a': va}) + import sparse + + arr = np.diagflat([1, 2, 3]) + sparr = sparse.COO(coords=[[0, 1, 2], [0, 1, 2]], data=[1, 2, 3]) + ds = xr.Dataset({"a": (["x", "y"], sparr)}) - assert_identical(ds.as_numpy(), xr.Dataset({'a': arr})) + assert_identical(ds.as_numpy(), xr.Dataset({"a": (["x", "y"], arr)})) @requires_cupy def test_from_cupy(self): import cupy as cp arr = np.array([1, 2, 3]) - ds = xr.Dataset({'a': cp.array(arr)}) + ds = xr.Dataset({"a": ("x", cp.array(arr))}) - assert_identical(ds.as_numpy(), xr.Dataset({'a': arr})) + assert_identical(ds.as_numpy(), xr.Dataset({"a": ("x", [1, 2, 3])})) @requires_dask @requires_pint_0_15 @@ -6807,7 +6808,7 @@ def test_from_pint_wrapping_dask(self): arr = np.array([1, 2, 3]) d = dask.array.from_array(np.array([1, 2, 3])) - ds = xr.Dataset({'a': Quantity(arr, units="m")}) + ds = xr.Dataset({"a": ("x", Quantity(d, units="m"))}) result = ds.as_numpy() - assert_identical(result, xr.Dataset({'a': arr})) + assert_identical(result, xr.Dataset({"a": ("x", [1, 2, 3])})) From 1c94a97f83b73031026e75b94500d8b121639345 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Jul 2021 14:17:23 -0400 Subject: [PATCH 17/28] fix linting --- xarray/tests/test_dataarray.py | 2 +- xarray/tests/test_dataset.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 45c2926cb38..1eacb69c160 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -7425,7 +7425,7 @@ def test_from_pint_wrapping_dask(self): from pint import Quantity arr = np.array([1, 2, 3]) - d = dask.array.from_array(np.array([1, 2, 3])) + d = dask.array.from_array(arr) da = xr.DataArray(Quantity(d, units="m")) result = da.as_numpy() diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a3799cd7597..baedd318329 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6806,7 +6806,6 @@ def test_from_pint_wrapping_dask(self): import dask from pint import Quantity - arr = np.array([1, 2, 3]) d = dask.array.from_array(np.array([1, 2, 3])) ds = xr.Dataset({"a": ("x", Quantity(d, units="m"))}) From 2d07c0f1081628eb1d2b445a01bee9fdb5816afb Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Jul 2021 14:29:14 -0400 Subject: [PATCH 18/28] tests for Variable --- xarray/tests/test_variable.py | 64 +++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 1e0dff45dd2..ade220d6d20 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -33,7 +33,9 @@ assert_equal, assert_identical, raise_if_dask_computes, + requires_cupy, requires_dask, + requires_pint_0_15, requires_sparse, source_ndarray, ) @@ -2540,3 +2542,65 @@ def test_clip(var): var.mean("z").data[:, :, np.newaxis], ), ) + + +# parameterize over Variable and IndexVariable +class TestNumpyCoercion: + def test_from_numpy(self): + v = Variable("x", [1, 2, 3]) + + assert_identical(v.as_numpy(), v) + np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3])) + + @requires_dask + def test_from_dask(self): + v = Variable("x", [1, 2, 3]) + v_chunked = v.chunk(1) + + assert_identical(v_chunked.as_numpy(), v.compute()) + np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3])) + + @requires_pint_0_15 + def test_from_pint(self): + from pint import Quantity + + arr = np.array([1, 2, 3]) + v = Variable("x", Quantity(arr, units="m")) + + assert_identical(v.as_numpy(), Variable("x", arr)) + np.testing.assert_equal(v.to_numpy(), arr) + + @requires_sparse + def test_from_sparse(self): + import sparse + + arr = np.diagflat([1, 2, 3]) + sparr = sparse.COO(coords=[[0, 1, 2], [0, 1, 2]], data=[1, 2, 3]) + v = Variable(["x", "y"], sparr) + + assert_identical(v.as_numpy(), Variable(["x", "y"], arr)) + np.testing.assert_equal(v.to_numpy(), arr) + + @requires_cupy + def test_from_cupy(self): + import cupy as cp + + arr = np.array([1, 2, 3]) + v = Variable("x", cp.array(arr)) + + assert_identical(v.as_numpy(), Variable("x", arr)) + np.testing.assert_equal(v.to_numpy(), arr) + + @requires_dask + @requires_pint_0_15 + def test_from_pint_wrapping_dask(self): + import dask + from pint import Quantity + + arr = np.array([1, 2, 3]) + d = dask.array.from_array(np.array([1, 2, 3])) + v = Variable("x", Quantity(d, units="m")) + + result = v.as_numpy() + assert_identical(result, Variable("x", arr)) + np.testing.assert_equal(v.to_numpy(), arr) From 9673cea75de384f11573d41823f299074b204d21 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Jul 2021 14:43:26 -0400 Subject: [PATCH 19/28] test IndexVariable too --- xarray/tests/test_variable.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index ade220d6d20..ec0b0f950c7 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2544,34 +2544,37 @@ def test_clip(var): ) -# parameterize over Variable and IndexVariable +@pytest.mark.parametrize("Var", [Variable, IndexVariable]) class TestNumpyCoercion: - def test_from_numpy(self): - v = Variable("x", [1, 2, 3]) + def test_from_numpy(self, Var): + v = Var("x", [1, 2, 3]) assert_identical(v.as_numpy(), v) np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3])) @requires_dask - def test_from_dask(self): - v = Variable("x", [1, 2, 3]) + def test_from_dask(self, Var): + v = Var("x", [1, 2, 3]) v_chunked = v.chunk(1) assert_identical(v_chunked.as_numpy(), v.compute()) np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3])) @requires_pint_0_15 - def test_from_pint(self): + def test_from_pint(self, Var): from pint import Quantity arr = np.array([1, 2, 3]) - v = Variable("x", Quantity(arr, units="m")) + v = Var("x", Quantity(arr, units="m")) - assert_identical(v.as_numpy(), Variable("x", arr)) + assert_identical(v.as_numpy(), Var("x", arr)) np.testing.assert_equal(v.to_numpy(), arr) @requires_sparse - def test_from_sparse(self): + def test_from_sparse(self, Var): + if Var is IndexVariable: + pytest.skip("Can't have 2D IndexVariables") + import sparse arr = np.diagflat([1, 2, 3]) @@ -2582,25 +2585,25 @@ def test_from_sparse(self): np.testing.assert_equal(v.to_numpy(), arr) @requires_cupy - def test_from_cupy(self): + def test_from_cupy(self, Var): import cupy as cp arr = np.array([1, 2, 3]) - v = Variable("x", cp.array(arr)) + v = Var("x", cp.array(arr)) - assert_identical(v.as_numpy(), Variable("x", arr)) + assert_identical(v.as_numpy(), Var("x", arr)) np.testing.assert_equal(v.to_numpy(), arr) @requires_dask @requires_pint_0_15 - def test_from_pint_wrapping_dask(self): + def test_from_pint_wrapping_dask(self, Var): import dask from pint import Quantity arr = np.array([1, 2, 3]) d = dask.array.from_array(np.array([1, 2, 3])) - v = Variable("x", Quantity(d, units="m")) + v = Var("x", Quantity(d, units="m")) result = v.as_numpy() - assert_identical(result, Variable("x", arr)) + assert_identical(result, Var("x", arr)) np.testing.assert_equal(v.to_numpy(), arr) From 0d624cc466efe0816171802415831f5c1abd48c4 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 12 Jul 2021 11:38:45 -0400 Subject: [PATCH 20/28] use numpy.asarray to avoid a copy --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 0a9c0b3fd88..abafb7e907c 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1087,7 +1087,7 @@ def to_numpy(self) -> np.ndarray: if isinstance(data, sparse_array_type): data = data.todense() if type(data) != np.ndarray: # noqa : Don't allow subclasses - data = np.array(data) + data = np.asarray(data) return data From 2f1ff4629cdaf0a4f4fae8cd8b1a17fb5ce5c616 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 14 Jul 2021 16:35:09 -0400 Subject: [PATCH 21/28] also convert coords --- xarray/core/dataarray.py | 8 +++---- xarray/core/dataset.py | 8 +++---- xarray/tests/test_dataarray.py | 44 +++++++++++++++++++++++++--------- xarray/tests/test_dataset.py | 41 +++++++++++++++++++++---------- 4 files changed, 70 insertions(+), 31 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 96d67ed6e21..e1b5809df1a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -662,15 +662,15 @@ def to_numpy(self) -> np.ndarray: def as_numpy(self: T_DataArray) -> T_DataArray: """ - Coerces wrapped data into a numpy array, and returns it wrapped inside - a DataArray. + Coerces wrapped data and coordinates into numpy arrays, returning a DataArray. See also -------- - DataArray.to_numpy : Same but returns only the numpy.ndarray object. + DataArray.to_numpy : Same but returns only the data as a numpy.ndarray object. Dataset.as_numpy : Converts all variables in a Dataset. """ - return self.copy(data=self.to_numpy()) + coords = {k: v.as_numpy() for k, v in self._coords.items()} + return self._replace(self.variable.as_numpy(), coords, indexes=self._indexes) @property def _in_memory(self) -> bool: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 77c6d72b40c..26b97f6e657 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1325,15 +1325,15 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": def as_numpy(self: "Dataset") -> "Dataset": """ - Coerces wrapped data into numpy arrays, returning a Dataset. + Coerces wrapped data and coordinates into numpy arrays, returning a Dataset. See also -------- DataArray.as_numpy - DataArray.to_numpy : Returns only the numpy.ndarray object. + DataArray.to_numpy : Returns only the data as a numpy.ndarray object. """ - numpy_variables = {k: v.as_numpy() for k, v in self.data_vars.items()} - return self.copy(data=numpy_variables) + numpy_variables = {k: v.as_numpy() for k, v in self.variables.items()} + return self._replace(variables=numpy_variables) @property def _level_coords(self) -> Dict[str, Hashable]: diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 1eacb69c160..26b14710823 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -7373,39 +7373,53 @@ def test_drop_duplicates(keep): class TestNumpyCoercion: + # TODO once flexible indexes refactor complete also test coercion of dimension coords def test_from_numpy(self): - da = xr.DataArray([1, 2, 3]) + da = xr.DataArray([1, 2, 3], dims="x", coords={"lat": ("x", [4, 5, 6])}) assert_identical(da.as_numpy(), da) np.testing.assert_equal(da.to_numpy(), np.array([1, 2, 3])) + np.testing.assert_equal(da["lat"].to_numpy(), np.array([4, 5, 6])) @requires_dask def test_from_dask(self): - da = xr.DataArray([1, 2, 3]) + da = xr.DataArray([1, 2, 3], dims="x", coords={"lat": ("x", [4, 5, 6])}) da_chunked = da.chunk(1) assert_identical(da_chunked.as_numpy(), da.compute()) np.testing.assert_equal(da.to_numpy(), np.array([1, 2, 3])) + np.testing.assert_equal(da["lat"].to_numpy(), np.array([4, 5, 6])) @requires_pint_0_15 def test_from_pint(self): from pint import Quantity arr = np.array([1, 2, 3]) - da = xr.DataArray(Quantity(arr, units="m")) + da = xr.DataArray( + Quantity(arr, units="Pa"), + dims="x", + coords={"lat": ("x", Quantity(arr + 3, units="m"))}, + ) - assert_identical(da.as_numpy(), xr.DataArray(arr)) + expected = xr.DataArray(arr, dims="x", coords={"lat": ("x", arr + 3)}) + assert_identical(da.as_numpy(), expected) np.testing.assert_equal(da.to_numpy(), arr) + np.testing.assert_equal(da["lat"].to_numpy(), arr + 3) @requires_sparse def test_from_sparse(self): import sparse arr = np.diagflat([1, 2, 3]) - sparr = sparse.COO(coords=[[0, 1, 2], [0, 1, 2]], data=[1, 2, 3]) - da = xr.DataArray(sparr) + sparr = sparse.COO.from_numpy(arr) + da = xr.DataArray( + sparr, dims=["x", "y"], coords={"elev": (("x", "y"), sparr + 3)} + ) - assert_identical(da.as_numpy(), xr.DataArray(arr)) + expected = xr.DataArray( + arr, dims=["x", "y"], coords={"elev": (("x", "y"), arr + 3)} + ) + assert_identical(da.as_numpy(), expected) np.testing.assert_equal(da.to_numpy(), arr) @requires_cupy @@ -7413,9 +7427,12 @@ def test_from_cupy(self): import cupy as cp arr = np.array([1, 2, 3]) - da = xr.DataArray(cp.array(arr)) + da = xr.DataArray( + cp.array(arr), dims="x", coords={"lat": ("x", cp.array(arr + 3))} + ) - assert_identical(da.as_numpy(), xr.DataArray(arr)) + expected = xr.DataArray(arr, dims="x", coords={"lat": ("x", arr + 3)}) + assert_identical(da.as_numpy(), expected) np.testing.assert_equal(da.to_numpy(), arr) @requires_dask @@ -7426,9 +7443,14 @@ def test_from_pint_wrapping_dask(self): arr = np.array([1, 2, 3]) d = dask.array.from_array(arr) - da = xr.DataArray(Quantity(d, units="m")) + da = xr.DataArray( + Quantity(d, units="Pa"), + dims="x", + coords={"lat": ("x", Quantity(d, units="m") * 2)}, + ) result = da.as_numpy() result.name = None # remove dask-assigned name - assert_identical(result, xr.DataArray(arr)) + expected = xr.DataArray(arr, dims="x", coords={"lat": ("x", arr * 2)}) + assert_identical(result, expected) np.testing.assert_equal(da.to_numpy(), arr) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index baedd318329..25f74086c7a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6761,13 +6761,13 @@ def test_clip(ds): class TestNumpyCoercion: def test_from_numpy(self): - ds = xr.Dataset({"a": ("x", [1, 2, 3])}) + ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])}) assert_identical(ds.as_numpy(), ds) @requires_dask def test_from_dask(self): - ds = xr.Dataset({"a": ("x", [1, 2, 3])}) + ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])}) ds_chunked = ds.chunk(1) assert_identical(ds_chunked.as_numpy(), ds.compute()) @@ -6777,28 +6777,40 @@ def test_from_pint(self): from pint import Quantity arr = np.array([1, 2, 3]) - ds = xr.Dataset({"a": ("x", Quantity(arr, units="m"))}) + ds = xr.Dataset( + {"a": ("x", Quantity(arr, units="Pa"))}, + coords={"lat": ("x", Quantity(arr + 3, units="m"))}, + ) - assert_identical(ds.as_numpy(), xr.Dataset({"a": ("x", [1, 2, 3])})) + expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)}) + assert_identical(ds.as_numpy(), expected) @requires_sparse def test_from_sparse(self): import sparse arr = np.diagflat([1, 2, 3]) - sparr = sparse.COO(coords=[[0, 1, 2], [0, 1, 2]], data=[1, 2, 3]) - ds = xr.Dataset({"a": (["x", "y"], sparr)}) + sparr = sparse.COO.from_numpy(arr) + ds = xr.Dataset( + {"a": (["x", "y"], sparr)}, coords={"elev": (("x", "y"), sparr + 3)} + ) - assert_identical(ds.as_numpy(), xr.Dataset({"a": (["x", "y"], arr)})) + expected = xr.Dataset( + {"a": (["x", "y"], arr)}, coords={"elev": (("x", "y"), arr + 3)} + ) + assert_identical(ds.as_numpy(), expected) @requires_cupy def test_from_cupy(self): import cupy as cp arr = np.array([1, 2, 3]) - ds = xr.Dataset({"a": ("x", cp.array(arr))}) + ds = xr.Dataset( + {"a": ("x", cp.array(arr))}, coords={"lat": ("x", cp.array(arr + 3))} + ) - assert_identical(ds.as_numpy(), xr.Dataset({"a": ("x", [1, 2, 3])})) + expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)}) + assert_identical(ds.as_numpy(), expected) @requires_dask @requires_pint_0_15 @@ -6806,8 +6818,13 @@ def test_from_pint_wrapping_dask(self): import dask from pint import Quantity - d = dask.array.from_array(np.array([1, 2, 3])) - ds = xr.Dataset({"a": ("x", Quantity(d, units="m"))}) + arr = np.array([1, 2, 3]) + d = dask.array.from_array(arr) + ds = xr.Dataset( + {"a": ("x", Quantity(d, units="Pa"))}, + coords={"lat": ("x", Quantity(d, units="m") * 2)}, + ) result = ds.as_numpy() - assert_identical(result, xr.Dataset({"a": ("x", [1, 2, 3])})) + expected = xr.Dataset({"a": ("x", arr)}, coords={"lat": ("x", arr * 2)}) + assert_identical(result, expected) From 6d33b35e6df483a88503b1ccdb74ffb9ed88f35d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 15 Jul 2021 21:04:31 -0400 Subject: [PATCH 22/28] Force tests again after #5600 From b90b7e3458d39fb030a1e4b97f432f452675f08a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 16 Jul 2021 10:56:52 -0600 Subject: [PATCH 23/28] Apply suggestions from code review --- xarray/core/dataarray.py | 17 +++++++++++++++-- xarray/core/pycompat.py | 2 +- xarray/core/variable.py | 13 ++++++++----- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 63aaadc3db6..1fa511db758 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -623,7 +623,16 @@ def __len__(self) -> int: @property def data(self) -> Any: - """The array's data as a numpy-like array""" + """ + The DataArray's data as an array. The underlying array type + (e.g. dask, sparse, pint) is preserved. + + See Also + -------- + DataArray.to_numpy + DataArray.as_numpy + DataArray.values + """ return self.variable.data @data.setter @@ -637,7 +646,7 @@ def values(self) -> np.ndarray: If the array's data is not a numpy.ndarray this will attempt to convert it naively using np.array(), which will raise an error if the array - type does not support coercion like this. + type does not support coercion like this (e.g. cupy). """ return self.variable.values @@ -653,6 +662,8 @@ def to_numpy(self) -> np.ndarray: -------- DataArray.as_numpy : Same but returns the surrounding DataArray instead. Dataset.as_numpy + DataArray.values + DataArray.data """ return self.variable.to_numpy() @@ -664,6 +675,8 @@ def as_numpy(self: T_DataArray) -> T_DataArray: -------- DataArray.to_numpy : Same but returns only the data as a numpy.ndarray object. Dataset.as_numpy : Converts all variables in a Dataset. + DataArray.values + DataArray.data """ coords = {k: v.as_numpy() for k, v in self._coords.items()} return self._replace(self.variable.as_numpy(), coords, indexes=self._indexes) diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 697b0978ba4..d1649235006 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -40,7 +40,7 @@ def __init__(self, mod): self.module = duck_array_module self.version = duck_array_version self.type = duck_array_type - self.available = self.version > "0.0.0" + self.available = duck_array_module is not None def is_duck_dask_array(x): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index abafb7e907c..4160a35c51f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -28,7 +28,13 @@ from .indexes import PandasIndex, wrap_pandas_index from .indexing import BasicIndexer, OuterIndexer, VectorizedIndexer, as_indexable from .options import _get_keep_attrs -from .pycompat import DuckArrayModule, integer_types, is_duck_dask_array +from .pycompat import ( + cupy_array_type, + dask_array_type, + integer_types, + is_duck_dask_array, + sparse_array_type, +) from .utils import ( NdimSizeLenMixin, OrderedSet, @@ -42,9 +48,6 @@ maybe_coerce_to_str, ) -dask_array_type = DuckArrayModule("dask").type -cupy_array_type = DuckArrayModule("cupy").type -sparse_array_type = DuckArrayModule("sparse").type NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( @@ -1077,7 +1080,7 @@ def to_numpy(self) -> np.ndarray: data = data.to_numpy() except AttributeError: if isinstance(data, dask_array_type): - data = self.compute().data + data = data.compute() if isinstance(data, cupy_array_type): data = data.get() # pint has to be imported dynamically as pint imports xarray From f39b3014abc056435e2969ba77d85652f4aced00 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 16 Jul 2021 10:57:35 -0600 Subject: [PATCH 24/28] Update xarray/core/variable.py --- xarray/core/variable.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 4160a35c51f..9bb6dd37987 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -48,8 +48,6 @@ maybe_coerce_to_str, ) - - NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( ( indexing.ExplicitlyIndexed, From 8b346d3cdb8028ef9a65d825b04d992d92def27e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 21 Jul 2021 12:00:02 -0400 Subject: [PATCH 25/28] fix import --- xarray/core/variable.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9bb6dd37987..f4439942964 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -29,6 +29,7 @@ from .indexing import BasicIndexer, OuterIndexer, VectorizedIndexer, as_indexable from .options import _get_keep_attrs from .pycompat import ( + DuckArrayModule, cupy_array_type, dask_array_type, integer_types, From 576ab7b22f28bb17074dc58bf90b90b21eb9322e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 21 Jul 2021 12:04:48 -0400 Subject: [PATCH 26/28] formatting --- xarray/core/dataarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1fa511db758..d567da629ca 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -624,9 +624,9 @@ def __len__(self) -> int: @property def data(self) -> Any: """ - The DataArray's data as an array. The underlying array type + The DataArray's data as an array. The underlying array type (e.g. dask, sparse, pint) is preserved. - + See Also -------- DataArray.to_numpy From 976f89a6e4d1426fc102b10b5f5f4156b7cd50f2 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Wed, 21 Jul 2021 17:06:05 -0400 Subject: [PATCH 27/28] remove type check Co-authored-by: Stephan Hoyer --- xarray/core/variable.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 933decd8305..fe2fd3371e8 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1088,8 +1088,7 @@ def to_numpy(self) -> np.ndarray: data = data.magnitude if isinstance(data, sparse_array_type): data = data.todense() - if type(data) != np.ndarray: # noqa : Don't allow subclasses - data = np.asarray(data) + data = np.asarray(data) return data From 7bc5d6ff47f88f5685bd0a7124ce0ef54d87d7f8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 21 Jul 2021 17:10:51 -0400 Subject: [PATCH 28/28] remove attempt to call to_numpy --- xarray/core/variable.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index fe2fd3371e8..3d9b5079da2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1075,20 +1075,18 @@ def to_numpy(self) -> np.ndarray: """Coerces wrapped data to numpy and returns a numpy.ndarray""" # TODO an entrypoint so array libraries can choose coercion method? data = self.data - try: - data = data.to_numpy() - except AttributeError: - if isinstance(data, dask_array_type): - data = data.compute() - if isinstance(data, cupy_array_type): - data = data.get() - # pint has to be imported dynamically as pint imports xarray - pint_array_type = DuckArrayModule("pint").type - if isinstance(data, pint_array_type): - data = data.magnitude - if isinstance(data, sparse_array_type): - data = data.todense() - data = np.asarray(data) + # TODO first attempt to call .to_numpy() once some libraries implement it + if isinstance(data, dask_array_type): + data = data.compute() + if isinstance(data, cupy_array_type): + data = data.get() + # pint has to be imported dynamically as pint imports xarray + pint_array_type = DuckArrayModule("pint").type + if isinstance(data, pint_array_type): + data = data.magnitude + if isinstance(data, sparse_array_type): + data = data.todense() + data = np.asarray(data) return data