diff --git a/requirements/tests.txt b/requirements/tests.txt index 0c580890..7c28c17d 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -1,3 +1,4 @@ +dask[array] pytest>=3.5 pytest-black -pytest-cov \ No newline at end of file +pytest-cov diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index bc5cc93f..74820fb4 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -284,6 +284,14 @@ def __setstate__(self, state): self.coords, self.data, self.shape, self.fill_value = state self._cache = None + def __dask_tokenize__(self): + "Produce a deterministic, content-based hash for dask." + from dask.base import normalize_token + + return normalize_token( + (type(self), self.coords, self.data, self.shape, self.fill_value) + ) + def copy(self, deep=True): """Return a copy of the array. diff --git a/sparse/tests/test_dask_interop.py b/sparse/tests/test_dask_interop.py new file mode 100644 index 00000000..4ff58156 --- /dev/null +++ b/sparse/tests/test_dask_interop.py @@ -0,0 +1,11 @@ +from dask.base import tokenize +import sparse + + +def test_deterministic_token(): + a = sparse.COO(data=[1, 2, 3], coords=[10, 20, 30], shape=(40,)) + b = sparse.COO(data=[1, 2, 3], coords=[10, 20, 30], shape=(40,)) + assert tokenize(a) == tokenize(b) + # One of these things is not like the other.... + c = sparse.COO(data=[1, 2, 4], coords=[10, 20, 30], shape=(40,)) + assert tokenize(a) != tokenize(c)