Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fail with NotImplementedError if backend Zarr is missing nchunks_initialized #491

Merged
merged 2 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ omit =
cubed/runtime/executors/dask*.py
cubed/runtime/executors/lithops.py
cubed/runtime/executors/modal*.py
cubed/storage/backends/tensorstore.py
cubed/vendor/*
3 changes: 1 addition & 2 deletions .github/workflows/tensorstore-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ jobs:

- name: Run tests
run: |
# exclude tests that rely on the nchunks_initialized array attribute
pytest -k "not test_resume"
pytest -v
env:
CUBED_STORAGE_NAME: tensorstore
4 changes: 4 additions & 0 deletions cubed/runtime/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ def already_computed(
target = nodes[output].get("target", None)
if target is not None:
target = open_if_lazy_zarr_array(target)
if not hasattr(target, "nchunks_initialized"):
raise NotImplementedError(
f"Zarr array type {type(target)} does not support resume since it doesn't have a 'nchunks_initialized' property"
)
# this check can be expensive since it has to list the directory to find nchunks_initialized
if target.ndim == 0 or target.nchunks_initialized != target.nchunks:
return False
Expand Down
27 changes: 19 additions & 8 deletions cubed/tests/test_executor_features.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import platform

import fsspec
Expand Down Expand Up @@ -161,14 +162,24 @@ def test_resume(spec, executor):
num_created_arrays = 1 # c
assert task_counter.value == num_created_arrays + 4

# since c has already been computed, when computing d only 4 tasks are run, instead of 8
task_counter = TaskCounter()
d.compute(
executor=executor, callbacks=[task_counter], optimize_graph=False, resume=True
)
# the create arrays tasks are run again, even though they exist
num_created_arrays = 2 # c, d
assert task_counter.value == num_created_arrays + 4
if not hasattr(c.zarray, "nchunks_initialized"):
# We expect resume to fail if there is no 'nchunks_initialized' property on the Zarr array
cm = pytest.raises(NotImplementedError)
else:
cm = contextlib.nullcontext()

with cm:
# since c has already been computed, when computing d only 4 tasks are run, instead of 8
task_counter = TaskCounter()
d.compute(
executor=executor,
callbacks=[task_counter],
optimize_graph=False,
resume=True,
)
# the create arrays tasks are run again, even though they exist
num_created_arrays = 2 # c, d
assert task_counter.value == num_created_arrays + 4


@pytest.mark.parametrize("compute_arrays_in_parallel", [True, False])
Expand Down
Loading