Skip to content

Commit

Permalink
Allows for low level http fetching errors to be ignored
Browse files Browse the repository at this point in the history
- usefull for a Dask client as a parallel option
  • Loading branch information
gmaze committed Sep 17, 2024
1 parent 2f2e2dc commit 0095fe6
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 20 deletions.
10 changes: 5 additions & 5 deletions argopy/data_fetchers/erddap_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,11 +432,11 @@ def _bgc_vlist_avail(self):
for p in params:
if p.lower() in self._bgc_vlist_erddap:
results.append(p)
else:
log.error(
"Removed '%s' because it's not available on the erddap, but it must !"
% p
)
# else:
# log.error(
# "Removed '%s' because it is not available on the erddap server (%s), but it should !"
# % (p, self._server)
# )

return results

Expand Down
63 changes: 48 additions & 15 deletions argopy/stores/filesystems.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,7 @@ def download_url(
n_attempt: int = 1,
max_attempt: int = 5,
cat_opts: dict = {},
errors: str = 'raise',
*args,
**kwargs,
):
Expand All @@ -683,19 +684,27 @@ def download_url(
"""

def make_request(
ffs, url, n_attempt: int = 1, max_attempt: int = 5, cat_opts: dict = {}
ffs, url, n_attempt: int = 1, max_attempt: int = 5, cat_opts: dict = {}, errors: str = 'raise',
):
data = None
if n_attempt <= max_attempt:
try:
data = ffs.cat_file(url, **cat_opts)
except FileNotFoundError as e:
if errors == 'raise':
raise e
elif errors == 'ignore':
log.error('FileNotFoundError raised from: %s' % url)
except aiohttp.ClientResponseError as e:
if e.status == 413:
log.debug(
"Error %i (Payload Too Large) raised with %s"
% (e.status, url)
)
raise
if errors == 'raise':
raise e
elif errors == 'ignore':
log.error(
"Error %i (Payload Too Large) raised with %s"
% (e.status, url)
)

elif e.status == 429:
retry_after = int(e.headers.get("Retry-After", 5))
log.debug(
Expand All @@ -707,14 +716,26 @@ def make_request(
else:
# Handle other client response errors
print(f"Error: {e}")
except aiohttp.ClientError:
# Handle other request exceptions
# print(f"Error: {e}")
raise

except aiohttp.ClientError as e:
if errors == 'raise':
raise e
elif errors == 'ignore':
log.error("Error: {e}")

except fsspec.FSTimeoutError as e:
if errors == 'raise':
raise e
elif errors == 'ignore':
log.error("Error: {e}")
else:
raise ValueError(
f"Error: All attempts failed to download this url: {url}"
)
if errors == 'raise':
raise ValueError(
f"Error: All attempts failed to download this url: {url}"
)
elif errors == 'ignore':
log.error("Error: All attempts failed to download this url: {url}")

return data, n_attempt

url = self.curateurl(url)
Expand All @@ -724,14 +745,18 @@ def make_request(
n_attempt=n_attempt,
max_attempt=max_attempt,
cat_opts=cat_opts,
errors=errors,
)

if data is None:
raise FileNotFoundError(url)
if errors == 'raise':
raise FileNotFoundError(url)
elif errors == 'ignore':
log.error("FileNotFoundError: %s" % url)

return data

def open_dataset(self, url, **kwargs):
def open_dataset(self, url, errors: str = 'raise', **kwargs):
"""Open and decode a xarray dataset from an url
Parameters
Expand All @@ -749,6 +774,14 @@ def open_dataset(self, url, **kwargs):
if "download_url_opts" in kwargs:
dwn_opts.update(kwargs["download_url_opts"])
data = self.download_url(url, **dwn_opts)
log.info(dwn_opts)

if data is None:
if errors == 'raise':
raise DataNotFound(url)
elif errors == 'ignore':
log.error("DataNotFound: %s" % url)
return None

if data[0:3] != b"CDF":
raise TypeError(
Expand Down

0 comments on commit 0095fe6

Please sign in to comment.