Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid calling np.asarray on lazy indexing classes #6874

Merged
merged 59 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
45cd500
Add get_array to lazy indexing array types.
dcherian Aug 2, 2022
9c0350c
Rename to short_array_repr; use Variable.data
dcherian Aug 2, 2022
74afa53
Fix Variable.load
dcherian Aug 2, 2022
9de7427
Make get_array recursive.
dcherian Aug 2, 2022
cc0a653
Some cleanups
dcherian Aug 2, 2022
59c7ead
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 3, 2022
2aa0830
Add get_array to PandasIndexingAdaptor
dcherian Aug 3, 2022
1306758
Finish short_array_repr refactoring
dcherian Aug 3, 2022
cf67972
Rename to get_duck_array
dcherian Aug 3, 2022
0209900
Try without hasattr check
dcherian Aug 3, 2022
536648a
Return bare array from LazilyIndexedArray.get_duck_array
dcherian Aug 3, 2022
f2514c7
Add get_duck_array to AbstractArray
dcherian Aug 3, 2022
3c597d4
Fix zerodim test
dcherian Aug 3, 2022
201eeba
Fix LazilyVectorizedIndexedArray
dcherian Aug 5, 2022
cd02a8a
Inherit __array__ from ExplicitlyIndexed
dcherian Aug 5, 2022
7ef55e0
Fix InaccessibleArray in tests
dcherian Aug 5, 2022
4e77fec
Fix BackendArray
dcherian Aug 5, 2022
d14c61f
Merge branch 'main' into kvikio
dcherian Aug 6, 2022
19af950
Merge branch 'main' into kvikio
dcherian Aug 9, 2022
22db817
reprs Use .data on AbstractArray
dcherian Aug 10, 2022
2bbcc16
Force netCDF and h5netCDF to return arrays
dcherian Aug 10, 2022
ca2a10a
Add whats-new
dcherian Aug 10, 2022
9256dd0
Merge branch 'main' into kvikio
dcherian Aug 12, 2022
906c3b3
Add comments; review feedback
dcherian Aug 16, 2022
598c201
Merge branch 'main' into kvikio
dcherian Nov 27, 2022
941c643
Fix merge.
dcherian Nov 27, 2022
d1127fe
Remove another np.asarray
dcherian Aug 16, 2022
c0c78a1
Avoid np.asarray on __getitem__.
dcherian Jan 17, 2023
9b727e6
[WIP] ExplicitlyIndexedBackendArray
dcherian Oct 11, 2022
46d98ec
Handle Indexing Adapter classes explicitly.
dcherian Jan 17, 2023
b19a24b
Revert "Handle Indexing Adapter classes explicitly."
dcherian Jan 18, 2023
84f560f
Revert "[WIP] ExplicitlyIndexedBackendArray"
dcherian Jan 18, 2023
426519f
Merge branch 'main' into kvikio
dcherian Jan 18, 2023
c4b81bf
Fix pydap now that NumpyIndexingAdapter does not automatically cast t…
dcherian Jan 19, 2023
d11a3cf
Update xarray/backends/pydap_.py
dcherian Jan 19, 2023
c223617
Add test store.
dcherian Jan 19, 2023
51552d4
Merge branch 'main' into kvikio
dcherian Jan 19, 2023
5f1cf53
[skip-ci] Update whats-new
dcherian Jan 19, 2023
937d572
Fix test
dcherian Jan 19, 2023
cc7d0b5
fix mypy?
dcherian Jan 19, 2023
1576261
Fix Zarr test
dcherian Jan 19, 2023
7d8459e
test the repr too
dcherian Jan 19, 2023
9815b75
Guard np.asarray for scalars.
dcherian Jan 20, 2023
39e7529
Revert casting to arrays in backend
dcherian Jan 24, 2023
f304bcb
Wrap numpy scalars in Explicitly*Indexed*.get_duck_aray
dcherian Jan 24, 2023
6cb1677
Merge branch 'main' into kvikio
dcherian Jan 26, 2023
2c7da96
Merge branch 'main' into kvikio
dcherian Feb 13, 2023
26d224c
Apply suggestions from code review
dcherian Feb 16, 2023
65da209
Update xarray/tests/__init__.py
dcherian Feb 16, 2023
0bc1175
Update xarray/core/indexing.py
dcherian Mar 3, 2023
8c2d74c
Apply suggestions from code review
dcherian Mar 3, 2023
20c8c81
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 3, 2023
77f7059
Bring back the ugly check
dcherian Mar 4, 2023
517f195
Merge branch 'main' into kvikio
dcherian Mar 26, 2023
5c23bd2
Update whats-new
dcherian Mar 26, 2023
887e1c5
Fix pre-commit
dcherian Mar 26, 2023
2557d02
silence mypy error
dcherian Mar 26, 2023
b313258
minimize diff
dcherian Mar 26, 2023
cbd030e
Merge branch 'main' into kvikio
dcherian Mar 29, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500
class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed):
__slots__ = ()

def __array__(self, dtype=None):
def get_duck_array(self, dtype=None):
dcherian marked this conversation as resolved.
Show resolved Hide resolved
key = indexing.BasicIndexer((slice(None),) * self.ndim)
return np.asarray(self[key], dtype=dtype)
return self[key]


class AbstractDataStore:
Expand Down
12 changes: 9 additions & 3 deletions xarray/coding/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pandas as pd

from ..core import dtypes, duck_array_ops, indexing
from ..core.pycompat import is_duck_dask_array
from ..core.pycompat import is_duck_array, is_duck_dask_array
from ..core.variable import Variable


Expand Down Expand Up @@ -69,7 +69,10 @@ def __getitem__(self, key):
return type(self)(self.array[key], self.func, self.dtype)

def __array__(self, dtype=None):
return self.func(self.array)
return self.get_duck_array()

def get_duck_array(self):
return self.func(self.array.get_duck_array())

def __repr__(self):
return "{}({!r}, func={!r}, dtype={!r})".format(
Expand Down Expand Up @@ -216,7 +219,10 @@ def decode(self, variable, name=None):


def _scale_offset_decoding(data, scale_factor, add_offset, dtype):
data = np.array(data, dtype=dtype, copy=True)
if not is_duck_array(data):
data = np.array(data, dtype=dtype, copy=True)
else:
data = data.astype(dtype=dtype, copy=True)
dcherian marked this conversation as resolved.
Show resolved Hide resolved
if scale_factor is not None:
data *= scale_factor
if add_offset is not None:
Expand Down
3 changes: 3 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ def __complex__(self: Any) -> complex:
def __array__(self: Any, dtype: DTypeLike = None) -> np.ndarray:
return np.asarray(self.values, dtype=dtype)

def get_duck_array(self):
return self.data
dcherian marked this conversation as resolved.
Show resolved Hide resolved

def __repr__(self) -> str:
return formatting.array_repr(self)

Expand Down
17 changes: 11 additions & 6 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pandas.errors import OutOfBoundsDatetime

from .duck_array_ops import array_equiv
from .indexing import MemoryCachedArray
from .indexing import ExplicitlyIndexed, MemoryCachedArray
from .options import OPTIONS, _get_boolean_with_default
from .pycompat import dask_array_type, sparse_array_type
from .utils import is_duck_array
Expand Down Expand Up @@ -516,8 +516,13 @@ def limit_lines(string: str, *, limit: int):
return string


def short_numpy_repr(array):
array = np.asarray(array)
def short_array_repr(array):
from .common import AbstractArray

if isinstance(array, (ExplicitlyIndexed, AbstractArray)):
array = array.get_duck_array()
if not is_duck_array(array):
array = np.asarray(array)

# default to lower precision so a full (abbreviated) line can fit on
# one line with the default display_width
Expand All @@ -541,11 +546,11 @@ def short_data_repr(array):
"""Format "data" for DataArray and Variable."""
internal_data = getattr(array, "variable", array)._data
if isinstance(array, np.ndarray):
return short_numpy_repr(array)
return short_array_repr(array)
elif is_duck_array(internal_data):
return limit_lines(repr(array.data), limit=40)
elif array._in_memory or array.size < 1e5:
return short_numpy_repr(array)
return short_array_repr(array)
else:
# internal xarray array type
return f"[{array.size} values with dtype={array.dtype}]"
Expand Down Expand Up @@ -765,7 +770,7 @@ def diff_array_repr(a, b, compat):
equiv = array_equiv

if not equiv(a.data, b.data):
temp = [wrap_indent(short_numpy_repr(obj), start=" ") for obj in (a, b)]
temp = [wrap_indent(short_array_repr(obj), start=" ") for obj in (a, b)]
diff_data_repr = [
ab_side + "\n" + ab_data_repr
for ab_side, ab_data_repr in zip(("L", "R"), temp)
Expand Down
51 changes: 39 additions & 12 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@
from .npcompat import DTypeLike
from .nputils import NumpyVIndexAdapter
from .options import OPTIONS
from .pycompat import dask_version, integer_types, is_duck_dask_array, sparse_array_type
from .pycompat import (
dask_version,
integer_types,
is_duck_array,
is_duck_dask_array,
sparse_array_type,
)
from .types import T_Xarray
from .utils import (
NDArrayMixin,
Expand Down Expand Up @@ -437,13 +443,16 @@ class ExplicitlyIndexed:

__slots__ = ()

def __array__(self, dtype=None):
dcherian marked this conversation as resolved.
Show resolved Hide resolved
return np.asarray(self.get_duck_array(), dtype=dtype)


class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed):
__slots__ = ()

def __array__(self, dtype=None):
def get_duck_array(self):
key = BasicIndexer((slice(None),) * self.ndim)
return np.asarray(self[key], dtype=dtype)
return self[key]


class ImplicitToExplicitIndexingAdapter(NDArrayMixin):
Expand All @@ -456,7 +465,10 @@ def __init__(self, array, indexer_cls=BasicIndexer):
self.indexer_cls = indexer_cls

def __array__(self, dtype=None):
dcherian marked this conversation as resolved.
Show resolved Hide resolved
return np.asarray(self.array, dtype=dtype)
return np.asarray(self.get_duck_array(), dtype=dtype)

def get_duck_array(self):
return self.array.get_duck_array()

def __getitem__(self, key):
key = expanded_indexer(key, self.ndim)
Expand Down Expand Up @@ -519,9 +531,15 @@ def shape(self) -> tuple[int, ...]:
shape.append(k.size)
return tuple(shape)

def __array__(self, dtype=None):
def get_duck_array(self):
array = as_indexable(self.array)
return np.asarray(array[self.key], dtype=None)
array = array[self.key]
if isinstance(array, ExplicitlyIndexed):
array = array.get_duck_array()
dcherian marked this conversation as resolved.
Show resolved Hide resolved
if not is_duck_array(array):
# This hack is necessary for 0D netCDF4 variables
array = np.asarray(array)
dcherian marked this conversation as resolved.
Show resolved Hide resolved
return array

def transpose(self, order):
return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order)
Expand Down Expand Up @@ -572,8 +590,11 @@ def __init__(self, array, key):
def shape(self) -> tuple[int, ...]:
return np.broadcast(*self.key.tuple).shape

def __array__(self, dtype=None):
return np.asarray(self.array[self.key], dtype=None)
def get_duck_array(self):
array = self.array[self.key]
if isinstance(array, ExplicitlyIndexed):
array = array.get_duck_array()
return array

def _updated_key(self, new_key):
return _combine_indexers(self.key, self.shape, new_key)
Expand Down Expand Up @@ -619,8 +640,8 @@ def _ensure_copied(self):
self.array = as_indexable(np.array(self.array))
self._copied = True

def __array__(self, dtype=None):
return np.asarray(self.array, dtype=dtype)
def get_duck_array(self):
return self.array.get_duck_array()

def __getitem__(self, key):
return type(self)(_wrap_numpy_scalars(self.array[key]))
Expand Down Expand Up @@ -653,6 +674,9 @@ def __array__(self, dtype=None):
self._ensure_cached()
return np.asarray(self.array, dtype=dtype)

def get_duck_array(self):
return self.array.get_duck_array()

def __getitem__(self, key):
return type(self)(_wrap_numpy_scalars(self.array[key]))

Expand Down Expand Up @@ -1436,6 +1460,9 @@ def __array__(self, dtype: DTypeLike = None) -> np.ndarray:
array = array.astype("object")
return np.asarray(array.values, dtype=dtype)

def get_duck_array(self) -> np.ndarray:
return np.asarray(self)

@property
def shape(self) -> tuple[int, ...]:
return (len(self.array),)
Expand Down Expand Up @@ -1572,9 +1599,9 @@ def _repr_inline_(self, max_width: int) -> str:
return format_array_flat(self._get_array_subset(), max_width)

def _repr_html_(self) -> str:
from .formatting import short_numpy_repr
from .formatting import short_array_repr

array_repr = short_numpy_repr(self._get_array_subset())
array_repr = short_array_repr(self._get_array_subset())
return f"<pre>{escape(array_repr)}</pre>"

def copy(self, deep: bool = True) -> PandasMultiIndexingAdapter:
Expand Down
4 changes: 4 additions & 0 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,8 @@ def _in_memory(self):
def data(self):
if is_duck_array(self._data):
return self._data
elif isinstance(self._data, indexing.ExplicitlyIndexed):
return self._data.get_duck_array()
else:
return self.values

Expand Down Expand Up @@ -463,6 +465,8 @@ def load(self, **kwargs):
"""
if is_duck_dask_array(self._data):
self._data = as_compatible_data(self._data.compute(**kwargs))
elif isinstance(self._data, indexing.ExplicitlyIndexed):
self._data = self._data.get_duck_array()
elif not is_duck_array(self._data):
self._data = np.asarray(self._data)
return self
Expand Down
3 changes: 3 additions & 0 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ class InaccessibleArray(utils.NDArrayMixin, ExplicitlyIndexed):
def __init__(self, array):
self.array = array

def get_duck_array(self):
raise UnexpectedDataAccess("Tried accessing data")

def __getitem__(self, key):
raise UnexpectedDataAccess("Tried accessing data")

Expand Down
10 changes: 5 additions & 5 deletions xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ def test_set_numpy_options() -> None:
assert np.get_printoptions() == original_options


def test_short_numpy_repr() -> None:
def test_short_array_repr() -> None:
cases = [
np.random.randn(500),
np.random.randn(20, 20),
Expand All @@ -510,16 +510,16 @@ def test_short_numpy_repr() -> None:
]
# number of lines:
# for default numpy repr: 167, 140, 254, 248, 599
# for short_numpy_repr: 1, 7, 24, 19, 25
# for short_array_repr: 1, 7, 24, 19, 25
for array in cases:
num_lines = formatting.short_numpy_repr(array).count("\n") + 1
num_lines = formatting.short_array_repr(array).count("\n") + 1
assert num_lines < 30

# threshold option (default: 200)
array2 = np.arange(100)
assert "..." not in formatting.short_numpy_repr(array2)
assert "..." not in formatting.short_array_repr(array2)
with xr.set_options(display_values_threshold=10):
assert "..." in formatting.short_numpy_repr(array2)
assert "..." in formatting.short_array_repr(array2)


def test_large_array_repr_length() -> None:
Expand Down