Skip to content

Commit

Permalink
Fix isel performance regression (#3319)
Browse files Browse the repository at this point in the history
* Fix isel performance regression

xref GH2227

Before:
    indexing.BooleanIndexing.time_indexing      898±0ms

After
    indexing.BooleanIndexing.time_indexing      401±0ms

* mypy fix
  • Loading branch information
shoyer authored Sep 18, 2019
1 parent fddced0 commit df25933
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
13 changes: 13 additions & 0 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,16 @@ def setup(self, key):
requires_dask()
super().setup(key)
self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50})


class BooleanIndexing:
# https://github.com/pydata/xarray/issues/2227
def setup(self):
self.ds = xr.Dataset(
{"a": ("time", np.arange(10_000_000))},
coords={"time": np.arange(10_000_000)},
)
self.time_filter = self.ds.time > 50_000

def time_indexing(self):
self.ds.isel(time=self.time_filter)
12 changes: 6 additions & 6 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1781,7 +1781,7 @@ def _validate_indexers(
elif isinstance(v, Dataset):
raise TypeError("cannot use a Dataset as an indexer")
elif isinstance(v, Sequence) and len(v) == 0:
v = IndexVariable((k,), np.zeros((0,), dtype="int64"))
v = Variable((k,), np.zeros((0,), dtype="int64"))
else:
v = np.asarray(v)

Expand All @@ -1795,16 +1795,13 @@ def _validate_indexers(
if v.ndim == 0:
v = Variable((), v)
elif v.ndim == 1:
v = IndexVariable((k,), v)
v = Variable((k,), v)
else:
raise IndexError(
"Unlabeled multi-dimensional array cannot be "
"used for indexing: {}".format(k)
)

if v.ndim == 1:
v = v.to_index_variable()

indexers_list.append((k, v))

return indexers_list
Expand Down Expand Up @@ -2367,7 +2364,10 @@ def interp(
if kwargs is None:
kwargs = {}
coords = either_dict_or_kwargs(coords, coords_kwargs, "interp")
indexers = OrderedDict(self._validate_indexers(coords))
indexers = OrderedDict(
(k, v.to_index_variable() if isinstance(v, Variable) and v.ndim == 1 else v)
for k, v in self._validate_indexers(coords)
)

obj = self if assume_sorted else self.sortby([k for k in coords])

Expand Down

0 comments on commit df25933

Please sign in to comment.