Skip to content

Commit

Permalink
fix: availability checks for basins
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Oct 23, 2023
1 parent aa3ee69 commit 37b1d6b
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
0.54.2
- fix: availability checks for basins
0.54.1
- fix: properly check for basin availability before getting features
0.54.0
Expand Down
15 changes: 9 additions & 6 deletions dclab/rtdc_dataset/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,19 +244,22 @@ def _get_basin_feature_data(
if basin_type is not None and basin_type != bn.basin_type:
# User asked for specific basin type
continue
if feat in bn.features:
try:
# There are all kinds of errors that may happen here.
# Note that `bn.features` can already trigger an
# availability check that may raise a ValueError.
# TODO:
# Introduce some kind of callback so the user knows
# why the data are not available. The current solution
# (fail silently) is not sufficiently transparent,
# especially when considering networking issues.
try:
if feat in bn.features:
data = bn.get_feature_data(feat)
except BaseException:
# Basin data not available
pass
break
# The data are available, we may abort the search.
break
except BaseException:
# Basin data not available
pass
return data

@staticmethod
Expand Down
35 changes: 35 additions & 0 deletions tests/test_rtdc_fmt_hdf5_basins.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,41 @@ def test_basin_not_available():
_ = bn.ds


def test_basin_nothing_available():
h5path = retrieve_data("fmt-hdf5_fl_wide-channel_2023.zip")
h5path_small = h5path.with_name("smaller.rtdc")

# Dataset creation
with h5py.File(h5path) as src, h5py.File(h5path_small, "w") as dst:
# first, copy all the scalar features to the new file
rtdc_dataset.rtdc_copy(src_h5file=src,
dst_h5file=dst,
features="scalar")
# Next, store the basin information in the new dataset
bdat = {
"type": "file",
"format": "hdf5",
"paths": [
"fake.rtdc", # fake path
]
}
blines = json.dumps(bdat, indent=2).split("\n")
basins = dst.require_group("basins")
with RTDCWriter(dst, mode="append") as hw:
hw.write_text(basins, "my_basin", blines)
# sanity checks
assert "deform" in dst["events"]
assert "image" not in dst["events"]

h5path.unlink()

# Now open the scalar dataset and check whether basins missing
with new_dataset(h5path_small) as ds:
assert "image" not in ds
assert not ds.features_basin
_ = ds["index"]


def test_basin_features_path_absolute():
"""Create a dataset that refers to a basin in a relative path"""
h5path = retrieve_data("fmt-hdf5_fl_wide-channel_2023.zip")
Expand Down
3 changes: 3 additions & 0 deletions tests/test_rtdc_fmt_s3_basin.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ def test_basin_not_available(url):
# Open the dataset and check whether basin is missing
with new_dataset(h5path) as ds:
assert not ds.features_basin
# This is a very subtle test for checking whether invalid basins
# are just ignored:
_ = ds["index"]

# Also test that on a lower level
bn = S3Basin("https://example.com/nonexistentbucket/nonexistentkey")
Expand Down

0 comments on commit 37b1d6b

Please sign in to comment.