Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle BentoML errors & clean up failed models #1527

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ authors:
given-names: Miquel
orcid: https://orcid.org/0000-0002-9906-6936
title: 'Ersilia Model Hub: a repository of AI/ML models for neglected tropical diseases'
version: 0.1.40
version: 0.1.41
doi: 10.5281/zenodo.7274645
date-released: ''
url: https://github.com/ersilia-os/ersilia
4 changes: 2 additions & 2 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"givenName": "Miquel"
}
],
"codeRepository": "https://github.com/ersilia-os/ersilia/v0.1.40",
"codeRepository": "https://github.com/ersilia-os/ersilia/v0.1.41",
"dateCreated": "2021-01-01",
"dateModified": "2024-10-01",
"datePublished": "2022-10-06",
Expand Down Expand Up @@ -221,7 +221,7 @@
],
"url": "https://ersilia.io",
"downloadUrl": "https://github.com/ersilia-os/ersilia/archive/refs/tags/v0.1.37.tar.gz",
"version": "0.1.40",
"version": "0.1.41",
"relatedLink": "https://ersilia.gitbook.io",
"developmentStatus": "active",
"issueTracker": "https://github.com/ersilia-os/ersilia/issues"
Expand Down
2 changes: 1 addition & 1 deletion ersilia/_static_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "0.1.40"
version = "0.1.41"
21 changes: 15 additions & 6 deletions ersilia/core/base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import os
import subprocess
from pathlib import Path

from .. import logger
from ..default import EOS
from ..tools.bentoml.exceptions import BentoMLException
from ..utils.config import Config, Credentials
from ..utils.paths import resolve_pack_method
from ..utils.terminal import run_command

home = str(Path.home())

Expand Down Expand Up @@ -70,6 +71,7 @@ def _get_bentoml_location(self, model_id):
tag = self._get_latest_bentoml_tag(model_id)
path = os.path.join(self._bentoml_dir, model_id)
if not os.path.exists(path):
self.logger.debug(f"BentoML path not found: {path}")
return None
if tag is not None:
return os.path.join(path, tag)
Expand All @@ -80,6 +82,7 @@ def _get_bundle_location(self, model_id):
tag = self._get_latest_bundle_tag(model_id)
path = os.path.join(self._bundles_dir, model_id)
if not os.path.exists(path):
self.logger.debug(f"Bundle path not found: {path}")
return None
if tag is not None:
return os.path.join(path, tag)
Expand All @@ -90,16 +93,21 @@ def _get_bento_location(self, model_id):
bundle_path = self._get_bundle_location(model_id)
if resolve_pack_method(bundle_path) != "bentoml":
return None
cmd = ["bentoml", "get", "%s:latest" % model_id, "--print-location", "--quiet"]
result = subprocess.run(cmd, stdout=subprocess.PIPE)
result = result.stdout.decode("utf-8").rstrip()
return result

cmd = ["bentoml", "get", f"{model_id}:latest", "--print-location", "--quiet"]
stdout, stderr, returncode = run_command(cmd, quiet=True)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love this implementation, thanks!

if returncode != 0:
self.logger.error(f"BentoML command failed: {stderr}")
raise BentoMLException(f"BentoML error: {stderr}")
return stdout.strip()

def _is_ready(self, model_id):
"""Check whether a model exists in the local computer"""
try:
self._get_latest_bundle_tag(model_id)
except:
except Exception as e:
self.logger.debug(f"Model {model_id} not ready: {str(e)}")
return False
path = os.path.join(self._abs_path(self._dest_dir), model_id)
if not os.path.exists(path):
Expand All @@ -108,5 +116,6 @@ def _is_ready(self, model_id):

def _has_credentials(self):
if self.cred is None:
self.logger.warning("No credentials found.")
return False
return True
63 changes: 37 additions & 26 deletions ersilia/hub/content/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import csv
import json
import os
import shutil
import subprocess

from ... import ErsiliaBase
from ...db.hubdata.interfaces import JsonModelsInterface
from ...default import BENTOML_PATH, MODEL_SOURCE_FILE, TableConstants
from ...default import MODEL_SOURCE_FILE, TableConstants
from ...tools.bentoml.exceptions import BentoMLException
from ...utils.identifiers.model import ModelIdentifier
from ...utils.terminal import run_command
from .card import ModelCard

try:
Expand Down Expand Up @@ -357,26 +357,37 @@ def bentoml(self) -> CatalogTable:
The catalog table containing the models available as BentoServices.
"""
try:
result = subprocess.run(
["bentoml", "list"], stdout=subprocess.PIPE, env=os.environ, timeout=10
)
except Exception:
shutil.rmtree(BENTOML_PATH)
return None
result = [r for r in result.stdout.decode("utf-8").split("\n") if r]
if len(result) == 1:
return
columns = ["BENTO_SERVICE", "AGE", "APIS", "ARTIFACTS"]
header = result[0]
values = result[1:]
cut_idxs = []
for col in columns:
cut_idxs += [header.find(col)]
R = []
for row in values:
r = []
for i, idx in enumerate(zip(cut_idxs, cut_idxs[1:] + [None])):
r += [row[idx[0] : idx[1]].rstrip()]
R += [[r[0].split(":")[0]] + r]
columns = ["Identifier"] + columns
return CatalogTable(data=R, columns=columns)
stdout, stderr, returncode = run_command(["bentoml", "list"], quiet=True)
if returncode != 0:
raise BentoMLException(f"BentoML list failed: {stderr}")

# Process stdout to build CatalogTable
output_lines = stdout.split("\n")
if not output_lines or len(output_lines) == 1:
return CatalogTable(data=[], columns=[]) # Return empty table

# Extract columns and values
columns = ["BENTO_SERVICE", "AGE", "APIS", "ARTIFACTS"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can move this to defaults.py as something like BENTOML_COLS.

header = output_lines[0]
values = output_lines[1:]

# Parse table data
cut_idxs = [header.find(col) for col in columns]
R = []
for row in values:
r = []
for i, idx in enumerate(zip(cut_idxs, cut_idxs[1:] + [None])):
r.append(
row[idx[0] : idx[1]].rstrip()
if idx[1]
else row[idx[0] :].rstrip()
)
R.append([r[0].split(":")[0]] + r)

return CatalogTable(data=R, columns=["Identifier"] + columns)

except BentoMLException:
raise
except Exception as e:
self.logger.error(f"Unexpected error: {str(e)}")
raise BentoMLException(f"Failed to fetch BentoML models: {str(e)}")
71 changes: 39 additions & 32 deletions ersilia/hub/fetch/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ...hub.delete.delete import ModelFullDeleter
from ...hub.fetch.actions.template_resolver import TemplateResolver
from ...setup.requirements import check_bentoml
from ...tools.bentoml.exceptions import BentoMLException
from ...utils.exceptions_utils.fetch_exceptions import (
NotInstallableWithBentoML,
NotInstallableWithFastAPI,
Expand Down Expand Up @@ -187,6 +188,7 @@ def _fetch_from_fastapi(self):
def _fetch_from_bentoml(self):
self.logger.debug("Fetching using BentoML")
self.check_bentoml()

fetch = importlib.import_module("ersilia.hub.fetch.fetch_bentoml")
mf = fetch.ModelFetcherFromBentoML(
config_json=self.config_json,
Expand All @@ -199,10 +201,11 @@ def _fetch_from_bentoml(self):
force_from_github=self.force_from_github,
force_from_s3=self.force_from_s3,
)

# Check if the model can be installed with BentoML
if mf.seems_installable(model_id=self.model_id):
mf.fetch(model_id=self.model_id)
else:
self.logger.debug("Not installable with BentoML")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can retain this log line.

raise NotInstallableWithBentoML(model_id=self.model_id)

@throw_ersilia_exception()
Expand Down Expand Up @@ -363,36 +366,40 @@ async def fetch(self, model_id: str) -> bool:
fetcher = ModelFetcher(config_json=config)
success = await fetcher.fetch(model_id="eosxxxx")
"""
fr = await self._fetch(model_id)
if fr.fetch_success:
try:
fr = await self._fetch(model_id)
if not fr.fetch_success:
return fr

self._standard_csv_example(model_id)
self.logger.debug("Writing model source to file")
model_source_file = os.path.join(
self._model_path(model_id), MODEL_SOURCE_FILE
)
try:
self._standard_csv_example(model_id)
except StandardModelExampleError:
self.logger.debug("Standard model example failed, deleting artifacts")
do_delete = yes_no_input(
"Do you want to delete the model artifacts? [Y/n]",
default_answer="Y",
)
if do_delete:
md = ModelFullDeleter(overwrite=False)
md.delete(model_id)
return FetchResult(
fetch_success=False,
reason="Could not successfully run a standard example from the model.",
)
else:
self.logger.debug("Writing model source to file")
model_source_file = os.path.join(
self._model_path(model_id), MODEL_SOURCE_FILE
)
try:
os.makedirs(self._model_path(model_id), exist_ok=True)
except OSError as error:
self.logger.error(f"Error during folder creation: {error}")
with open(model_source_file, "w") as f:
f.write(self.model_source)
return FetchResult(
fetch_success=True, reason="Model fetched successfully"
os.makedirs(self._model_path(model_id), exist_ok=True)
except OSError as error:
self.logger.error(f"Error during folder creation: {error}")
with open(model_source_file, "w") as f:
f.write(self.model_source)

return FetchResult(fetch_success=True, reason="Model fetched successfully")

except (StandardModelExampleError, BentoMLException) as err:
self.logger.debug(f"{type(err).__name__} occurred: {str(err)}")
do_delete = yes_no_input(
"Do you want to delete the model artifacts? [Y/n]",
default_answer="Y",
)
if do_delete:
md = ModelFullDeleter(overwrite=False)
md.delete(model_id)
self.logger.info(
f"✅ Model '{model_id}' artifacts have been successfully deleted."
)
else:
return fr

reason = (
str(err) if str(err) else "An unknown error occurred during fetching."
)
return FetchResult(fetch_success=False, reason=reason)
return fr
8 changes: 7 additions & 1 deletion ersilia/tools/bentoml/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
class BentoMLException(Exception):
"""
Exception raised for errors in the BentoML tool.

Parameters
----------
message : str
A custom error message describing the issue.
"""

pass
def __init__(self, message: str):
super().__init__(message)


class BentoMLConfigException(Exception):
Expand Down
46 changes: 26 additions & 20 deletions ersilia/utils/terminal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import shutil
import subprocess
import sys

try:
from inputimeout import TimeoutOccurred, inputimeout
Expand Down Expand Up @@ -38,35 +39,40 @@ def is_quiet():

def run_command(cmd, quiet=None):
"""
Run a shell command.
Run a shell command and return stdout, stderr, and return code.

Parameters
----------
cmd : str or list
The command to run.
quiet : bool, optional
Whether to run the command in quiet mode. Default is None.
Whether to run the command in quiet mode. Defaults to `is_quiet()`.
"""
if quiet is None:
quiet = is_quiet()
if type(cmd) == str:
if quiet:
with open(os.devnull, "w") as fp:
subprocess.Popen(
cmd, stdout=fp, stderr=fp, shell=True, env=os.environ
).wait()
else:
subprocess.Popen(cmd, shell=True, env=os.environ).wait()
else:
if quiet:
subprocess.check_call(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
env=os.environ,
)
else:
subprocess.check_call(cmd, env=os.environ)
# Run the command and capture outputs
result = subprocess.run(
cmd,
shell=isinstance(cmd, str),
stdout=subprocess.PIPE,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we piping both stdout and stderr? Why not set both to subprocess.STDOUT?

stderr=subprocess.PIPE,
text=True,
env=os.environ,
)

# Decode outputs
stdout = result.stdout.strip()
stderr = result.stderr.strip()
returncode = result.returncode

# Log outputs if not in quiet mode
if not quiet:
if stdout:
print(stdout)
if stderr:
print(stderr, file=sys.stderr)

return stdout, stderr, returncode


def run_command_check_output(cmd):
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ersilia"
version = "0.1.40"
version = "0.1.41"
description = "A hub of AI/ML models for open source drug discovery and global health"
license = "GPLv3"
authors = ["Ersilia Open Source Initiative <hello@ersilia.io>"]
Expand Down Expand Up @@ -52,7 +52,7 @@ numpy = "<=1.26.4"
aiofiles = "<=24.1.0"
aiohttp = ">=3.10.11"
nest_asyncio = "<=1.6.0"
isaura = { version = "0.1.40", optional = true }
isaura = { version = "0.1.41", optional = true }
pytest = { version = "^7.4.0", optional = true }
pytest-asyncio = { version = "<=0.24.0", optional = true }
pytest-benchmark = { version = "<=4.0.0", optional = true }
Expand Down
Loading