From df259331c5b66088f67738338e0b1b3f940e09c2 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 18 Sep 2019 11:33:15 -0700 Subject: [PATCH] Fix isel performance regression (#3319) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix isel performance regression xref GH2227 Before: indexing.BooleanIndexing.time_indexing 898±0ms After indexing.BooleanIndexing.time_indexing 401±0ms * mypy fix --- asv_bench/benchmarks/indexing.py | 13 +++++++++++++ xarray/core/dataset.py | 12 ++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index cd212895d99..c4cfbbbdfdf 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -125,3 +125,16 @@ def setup(self, key): requires_dask() super().setup(key) self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50}) + + +class BooleanIndexing: + # https://github.com/pydata/xarray/issues/2227 + def setup(self): + self.ds = xr.Dataset( + {"a": ("time", np.arange(10_000_000))}, + coords={"time": np.arange(10_000_000)}, + ) + self.time_filter = self.ds.time > 50_000 + + def time_indexing(self): + self.ds.isel(time=self.time_filter) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3ba1bd9e3d8..270ab81d264 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1781,7 +1781,7 @@ def _validate_indexers( elif isinstance(v, Dataset): raise TypeError("cannot use a Dataset as an indexer") elif isinstance(v, Sequence) and len(v) == 0: - v = IndexVariable((k,), np.zeros((0,), dtype="int64")) + v = Variable((k,), np.zeros((0,), dtype="int64")) else: v = np.asarray(v) @@ -1795,16 +1795,13 @@ def _validate_indexers( if v.ndim == 0: v = Variable((), v) elif v.ndim == 1: - v = IndexVariable((k,), v) + v = Variable((k,), v) else: raise IndexError( "Unlabeled multi-dimensional array cannot be " "used for indexing: {}".format(k) ) - if v.ndim == 1: - v = v.to_index_variable() - indexers_list.append((k, v)) return indexers_list @@ -2367,7 +2364,10 @@ def interp( if kwargs is None: kwargs = {} coords = either_dict_or_kwargs(coords, coords_kwargs, "interp") - indexers = OrderedDict(self._validate_indexers(coords)) + indexers = OrderedDict( + (k, v.to_index_variable() if isinstance(v, Variable) and v.ndim == 1 else v) + for k, v in self._validate_indexers(coords) + ) obj = self if assume_sorted else self.sortby([k for k in coords])