From 5056b4540486aef3b234f250b303c588dd53c108 Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 2 Jul 2021 00:50:59 +0200 Subject: [PATCH 1/9] add the "use_bottleneck" option --- xarray/core/options.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/core/options.py b/xarray/core/options.py index 7104e12c29f..71358916243 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -14,6 +14,7 @@ FILE_CACHE_MAXSIZE = "file_cache_maxsize" KEEP_ATTRS = "keep_attrs" WARN_FOR_UNCLOSED_FILES = "warn_for_unclosed_files" +USE_BOTTLENECK = "use_bottleneck" OPTIONS = { @@ -31,6 +32,7 @@ FILE_CACHE_MAXSIZE: 128, KEEP_ATTRS: "default", WARN_FOR_UNCLOSED_FILES: False, + USE_BOTTLENECK: True, } _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) @@ -54,6 +56,7 @@ def _positive_integer(value): FILE_CACHE_MAXSIZE: _positive_integer, KEEP_ATTRS: lambda choice: choice in [True, False, "default"], WARN_FOR_UNCLOSED_FILES: lambda value: isinstance(value, bool), + USE_BOTTLENECK: lambda choice: choice in [True, False], } @@ -122,6 +125,9 @@ class set_options: attrs, ``False`` to always discard them, or ``'default'`` to use original logic that attrs should only be kept in unambiguous circumstances. Default: ``'default'``. + - ``use_bottleneck``: allow using bottleneck. Either ``True`` to accelerate + operations using bottleneck if it is installed or ``False`` to never use it. + Default: ``True`` - ``display_style``: display style to use in jupyter for xarray objects. Default: ``'html'``. Other options are ``'text'``. - ``display_expand_attrs``: whether to expand the attributes section for From f13620877c6583bae608e4f82e340e90c84dd6ea Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 2 Jul 2021 00:51:58 +0200 Subject: [PATCH 2/9] conditionally disable bottleneck where possible --- xarray/core/nputils.py | 3 +++ xarray/core/rolling.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 3aaed08575a..7d5398374aa 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -4,6 +4,8 @@ import pandas as pd from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined] +from .options import OPTIONS + try: import bottleneck as bn @@ -138,6 +140,7 @@ def f(values, axis=None, **kwargs): if ( _USE_BOTTLENECK + and OPTIONS["USE_BOTTLENECK"] and isinstance(values, np.ndarray) and bn_func is not None and not isinstance(axis, tuple) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index b87dcda24b0..285897e483c 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -7,7 +7,7 @@ from . import dtypes, duck_array_ops, utils from .arithmetic import CoarsenArithmetic -from .options import _get_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import is_duck_dask_array from .utils import either_dict_or_kwargs @@ -535,7 +535,8 @@ def _numpy_or_bottleneck_reduce( del kwargs["dim"] if ( - bottleneck_move_func is not None + OPTIONS["USE_BOTTLENECK"] + and bottleneck_move_func is not None and not is_duck_dask_array(self.obj.data) and len(self.dim) == 1 ): From 3623f8de5727c3431ee8a7c059482bb7db9ea0cd Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 2 Jul 2021 01:58:48 +0200 Subject: [PATCH 3/9] fix the option name --- xarray/core/nputils.py | 2 +- xarray/core/rolling.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 7d5398374aa..3e0f550dd30 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -140,7 +140,7 @@ def f(values, axis=None, **kwargs): if ( _USE_BOTTLENECK - and OPTIONS["USE_BOTTLENECK"] + and OPTIONS["use_bottleneck"] and isinstance(values, np.ndarray) and bn_func is not None and not isinstance(axis, tuple) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 285897e483c..6c2019c36f4 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -535,7 +535,7 @@ def _numpy_or_bottleneck_reduce( del kwargs["dim"] if ( - OPTIONS["USE_BOTTLENECK"] + OPTIONS["use_bottleneck"] and bottleneck_move_func is not None and not is_duck_dask_array(self.obj.data) and len(self.dim) == 1 From 83ff0bb00ae0cc6034b2d46344b77227256326b6 Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 21 Jul 2021 19:22:08 +0200 Subject: [PATCH 4/9] add a entry to whats-new.rst --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8045e5d486f..af4bd60b85a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -56,6 +56,8 @@ New Features - Allow removal of the coordinate attribute ``coordinates`` on variables by setting ``.attrs['coordinates']= None`` (:issue:`5510`). By `Elle Smith `_. +- Add a option to disable the use of ``bottleneck`` (:pull:`5560`) + By `Justus Magin `_. Breaking changes ~~~~~~~~~~~~~~~~ From 514db3a7090a1dc546d91412831aa10cc8f5d51e Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 30 Jul 2021 11:23:50 +0200 Subject: [PATCH 5/9] also check use_bottleneck in ffill and bfill --- xarray/core/missing.py | 8 +++++++- xarray/tests/test_missing.py | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 6b5742104e4..99b18268a8c 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -12,7 +12,7 @@ from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc from .duck_array_ops import datetime_to_numeric, push, timedelta_to_numeric -from .options import _get_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_version, is_duck_dask_array from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -405,6 +405,9 @@ def _bfill(arr, n=None, axis=-1): def ffill(arr, dim=None, limit=None): """forward fill missing values""" + if not OPTIONS["use_bottleneck"]: + raise RuntimeError("ffill requires bottleneck to be enabled") + axis = arr.get_axis_num(dim) # work around for bottleneck 178 @@ -422,6 +425,9 @@ def ffill(arr, dim=None, limit=None): def bfill(arr, dim=None, limit=None): """backfill missing values""" + if not OPTIONS["use_bottleneck"]: + raise RuntimeError("bfill requires bottleneck to be enabled") + axis = arr.get_axis_num(dim) # work around for bottleneck 178 diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index e2dfac04222..10c7823caf7 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -392,6 +392,30 @@ def test_ffill(): assert_equal(actual, expected) +def test_ffill_use_bottleneck(): + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + da.ffill("x") + + da = da.chunk({"x": 1}) + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + da.ffill("x") + + +def test_bfill_use_bottleneck(): + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + da.bfill("x") + + da = da.chunk({"x": 1}) + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + da.bfill("x") + + @requires_bottleneck @requires_dask @pytest.mark.parametrize("method", ["ffill", "bfill"]) From ad91a083faedf556c0ae2aed7b0e9456223178ce Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 30 Jul 2021 12:52:25 +0200 Subject: [PATCH 6/9] check for use_bottleneck in rank --- xarray/core/dataset.py | 3 +++ xarray/core/variable.py | 5 ++++- xarray/tests/test_dataset.py | 6 ++++++ xarray/tests/test_variable.py | 6 ++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f3378ad9eda..b0193dffb55 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6198,6 +6198,9 @@ def rank(self, dim, pct=False, keep_attrs=None): ranked : Dataset Variables that do not depend on `dim` are dropped. """ + if not OPTIONS["use_bottleneck"]: + raise RuntimeError("rank requires bottleneck to be enabled") + if dim not in self.dims: raise ValueError(f"Dataset does not contain the dimension: {dim}") diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f57c4495a8a..670b8d0a2c6 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -27,7 +27,7 @@ from .common import AbstractArray from .indexes import PandasIndex, wrap_pandas_index from .indexing import BasicIndexer, OuterIndexer, VectorizedIndexer, as_indexable -from .options import _get_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import ( cupy_array_type, dask_array_type, @@ -2016,6 +2016,9 @@ def rank(self, dim, pct=False): -------- Dataset.rank, DataArray.rank """ + if not OPTIONS["use_bottleneck"]: + raise RuntimeError("rank requires bottleneck to be enabled") + import bottleneck as bn data = self.data diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 317602c889f..560ebe819d6 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4969,6 +4969,12 @@ def test_rank(self): with pytest.raises(ValueError, match=r"does not contain"): x.rank("invalid_dim") + def test_rank_use_bottleneck(self): + ds = Dataset({"a": ("x", [0, np.nan, 2]), "b": ("y", [4, 6, 3, 4])}) + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + ds.rank("x") + def test_count(self): ds = Dataset({"x": ("a", [np.nan, 1]), "y": 0, "z": np.nan}) expected = Dataset({"x": 1, "y": 1, "z": 0}) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 0247892931a..d8f31ec32f2 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1718,6 +1718,12 @@ def test_rank_dask_raises(self): with pytest.raises(TypeError, match=r"arrays stored as dask"): v.rank("x") + def test_rank_use_bottleneck(self): + v = Variable(["x"], [3.0, 1.0, np.nan, 2.0, 4.0]) + with set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + v.rank("x") + @requires_bottleneck def test_rank(self): import bottleneck as bn From 26b6015f8fbcf5e70b5c4e1cee2d1a317e070918 Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 30 Jul 2021 13:58:15 +0200 Subject: [PATCH 7/9] split out the dask tests --- xarray/tests/test_missing.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 10c7823caf7..1ebcd9ac6f7 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -398,6 +398,10 @@ def test_ffill_use_bottleneck(): with pytest.raises(RuntimeError): da.ffill("x") + +@requires_dask +def test_ffill_use_bottleneck_dask(): + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") da = da.chunk({"x": 1}) with xr.set_options(use_bottleneck=False): with pytest.raises(RuntimeError): @@ -410,6 +414,10 @@ def test_bfill_use_bottleneck(): with pytest.raises(RuntimeError): da.bfill("x") + +@requires_dask +def test_bfill_use_bottleneck_dask(): + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") da = da.chunk({"x": 1}) with xr.set_options(use_bottleneck=False): with pytest.raises(RuntimeError): From 3c04ee19d90678b2475170cbe1fc34bce03dac9b Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 11 Aug 2021 21:36:07 +0200 Subject: [PATCH 8/9] make sure bottleneck is not used for reduce functions --- xarray/tests/test_variable.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 686667d0a17..c51313fcf95 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1673,6 +1673,23 @@ def test_reduce(self): with pytest.raises(ValueError, match=r"cannot supply both"): v.mean(dim="x", axis=0) + @requires_bottleneck + def test_reduce_use_bottleneck(self, monkeypatch): + def raise_if_called(*args, **kwargs): + raise RuntimeError("should not have been called") + + import bottleneck as bn + + monkeypatch.setattr(bn, "nanmin", raise_if_called) + + v = Variable("x", [0.0, np.nan, 1.0]) + with pytest.raises(RuntimeError, match="should not have been called"): + with set_options(use_bottleneck=True): + v.min() + + with set_options(use_bottleneck=False): + v.min() + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( From 76a22f3f683fcfd331005ebc29e650f0954dd603 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 12 Aug 2021 15:32:38 +0200 Subject: [PATCH 9/9] explain how to enable bottleneck in the error messages --- xarray/core/dataset.py | 5 ++++- xarray/core/missing.py | 10 ++++++++-- xarray/core/variable.py | 5 ++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 07acba737b2..4bfc1ccbdf1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6229,7 +6229,10 @@ def rank(self, dim, pct=False, keep_attrs=None): Variables that do not depend on `dim` are dropped. """ if not OPTIONS["use_bottleneck"]: - raise RuntimeError("rank requires bottleneck to be enabled") + raise RuntimeError( + "rank requires bottleneck to be enabled." + " Call `xr.set_options(use_bottleneck=True)` to enable it." + ) if dim not in self.dims: raise ValueError(f"Dataset does not contain the dimension: {dim}") diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 99b18268a8c..36983a227b9 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -406,7 +406,10 @@ def _bfill(arr, n=None, axis=-1): def ffill(arr, dim=None, limit=None): """forward fill missing values""" if not OPTIONS["use_bottleneck"]: - raise RuntimeError("ffill requires bottleneck to be enabled") + raise RuntimeError( + "ffill requires bottleneck to be enabled." + " Call `xr.set_options(use_bottleneck=True)` to enable it." + ) axis = arr.get_axis_num(dim) @@ -426,7 +429,10 @@ def ffill(arr, dim=None, limit=None): def bfill(arr, dim=None, limit=None): """backfill missing values""" if not OPTIONS["use_bottleneck"]: - raise RuntimeError("bfill requires bottleneck to be enabled") + raise RuntimeError( + "bfill requires bottleneck to be enabled." + " Call `xr.set_options(use_bottleneck=True)` to enable it." + ) axis = arr.get_axis_num(dim) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 40398a31847..6b971389de7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2053,7 +2053,10 @@ def rank(self, dim, pct=False): Dataset.rank, DataArray.rank """ if not OPTIONS["use_bottleneck"]: - raise RuntimeError("rank requires bottleneck to be enabled") + raise RuntimeError( + "rank requires bottleneck to be enabled." + " Call `xr.set_options(use_bottleneck=True)` to enable it." + ) import bottleneck as bn