Skip to content

Commit

Permalink
fix: Check for basin availability when checking run identifier
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed May 31, 2024
1 parent 48deb8d commit 4d51239
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
0.59.1
- fix: ValueError when exporting data from basin-mapped dataset
- fix: Check for basin availability when checking run identifier
0.59.0
- feat: support basins with blown indices
- enh: increase verbosity when failing to resolve basins
Expand Down
12 changes: 11 additions & 1 deletion dclab/rtdc_dataset/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def _get_basin_feature_data(
"""
data = None
if self.basins:
for bn in self.basins:
for bn in list(self.basins):
if basin_type is not None and basin_type != bn.basin_type:
# User asked for specific basin type
continue
Expand All @@ -295,6 +295,16 @@ def _get_basin_feature_data(
except (KeyError, OSError, PermissionError, RecursionError):
# Basin data not available
pass
except feat_basin.BasinNotAvailableError:
# remove the basin from the list
# TODO:
# Check whether this has an actual effect. It could be
# that due to some iterative process `self`
# gets re-initialized and we have to go through this
# again.
self._basins.remove(bn)
warnings.warn(
f"Removed unavailable basin {bn} from {self}")
except BaseException:
warnings.warn(f"Could not access {feat} in {self}:\n"
f"{traceback.format_exc()}")
Expand Down
19 changes: 14 additions & 5 deletions dclab/rtdc_dataset/feat_basin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numbers
import threading
from typing import Dict, List, Literal
import warnings
import weakref

import numpy as np
Expand All @@ -19,6 +20,11 @@ class BasinmapFeatureMissingError(KeyError):
pass


class BasinNotAvailableError(BaseException):
"""Used to identify situations where the basin data is not available"""
pass


class BasinAvailabilityChecker(threading.Thread):
"""Helper thread for checking basin availability in the background"""
def __init__(self, basin, *args, **kwargs):
Expand Down Expand Up @@ -147,10 +153,8 @@ def __repr__(self):

def _assert_measurement_identifier(self):
"""Make sure the basin matches the measurement identifier
This method caches its result, i.e. only the first call is slow.
"""
if not self.verify_basin(run_identifier=True, availability=False):
if not self.verify_basin(run_identifier=True):
raise KeyError(f"Measurement identifier of basin {self.ds} "
f"({self.get_measurement_identifier()}) does "
f"not match {self.measurement_identifier}!")
Expand Down Expand Up @@ -195,7 +199,7 @@ def ds(self):
"""The :class:`.RTDCBase` instance represented by the basin"""
if self._ds is None:
if not self.is_available():
raise ValueError(f"Basin {self} is not available!")
raise BasinNotAvailableError(f"Basin {self} is not available!")
self._ds = self.load_dataset(self.location, **self.kwargs)
return self._ds

Expand Down Expand Up @@ -278,7 +282,12 @@ def load_dataset(self, location, **kwargs):
ds_bn = ds
return ds_bn

def verify_basin(self, availability=True, run_identifier=True):
def verify_basin(self, run_identifier=True, availability=True):
if not availability:
warnings.warn("The keyword argument 'availability' is "
"deprecated, because it can lead to long waiting "
"times with many unavailable basins.",
DeprecationWarning)
if availability:
check_avail = self.is_available()
else:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_rtdc_fmt_dcor_basin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from dclab import new_dataset, RTDCWriter
from dclab.rtdc_dataset.fmt_dcor import DCORBasin, RTDC_DCOR

from dclab.rtdc_dataset.feat_basin import BasinNotAvailableError

from helper_methods import DCOR_AVAILABLE, retrieve_data

Expand Down Expand Up @@ -101,7 +101,7 @@ def test_basin_not_available(url):
bn = DCORBasin("https://dcor.mpl.mpg.de/api/3/action/dcserv?id="
"00000000-0000-0000-0000-000000000000")
assert not bn.is_available()
with pytest.raises(ValueError, match="is not available"):
with pytest.raises(BasinNotAvailableError, match="is not available"):
_ = bn.ds


Expand Down
3 changes: 2 additions & 1 deletion tests/test_rtdc_fmt_hdf5_basins.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import dclab
from dclab import new_dataset, rtdc_dataset, RTDCWriter
from dclab.rtdc_dataset.fmt_hdf5.basin import HDF5Basin
from dclab.rtdc_dataset.feat_basin import BasinNotAvailableError


from helper_methods import retrieve_data
Expand Down Expand Up @@ -101,7 +102,7 @@ def test_basin_not_available():
# Also test that on a lower level
bn = HDF5Basin(h5path)
assert not bn.is_available()
with pytest.raises(ValueError, match="is not available"):
with pytest.raises(BasinNotAvailableError, match="is not available"):
_ = bn.ds


Expand Down
3 changes: 2 additions & 1 deletion tests/test_rtdc_fmt_http_basin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import dclab
from dclab import new_dataset, RTDCWriter
from dclab.rtdc_dataset.fmt_http import HTTPBasin, RTDC_HTTP
from dclab.rtdc_dataset.feat_basin import BasinNotAvailableError


from helper_methods import DCOR_AVAILABLE, retrieve_data
Expand Down Expand Up @@ -131,7 +132,7 @@ def test_basin_not_available(url):
# Also test that on a lower level
bn = HTTPBasin("https://example.com/nonexistentbucket/nonexistentkey")
assert not bn.is_available()
with pytest.raises(ValueError, match="is not available"):
with pytest.raises(BasinNotAvailableError, match="is not available"):
_ = bn.ds


Expand Down
4 changes: 3 additions & 1 deletion tests/test_rtdc_fmt_s3_basin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from dclab import new_dataset, RTDCWriter
from dclab.rtdc_dataset.fmt_s3 import S3Basin, RTDC_S3
from dclab.rtdc_dataset.feat_basin import BasinNotAvailableError


from helper_methods import DCOR_AVAILABLE, retrieve_data

Expand Down Expand Up @@ -98,7 +100,7 @@ def test_basin_not_available(url):
# Also test that on a lower level
bn = S3Basin("https://example.com/nonexistentbucket/nonexistentkey")
assert not bn.is_available()
with pytest.raises(ValueError, match="is not available"):
with pytest.raises(BasinNotAvailableError, match="is not available"):
_ = bn.ds


Expand Down

0 comments on commit 4d51239

Please sign in to comment.