-
-
Notifications
You must be signed in to change notification settings - Fork 152
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Handle BentoML errors & clean up failed models #1527
base: master
Are you sure you want to change the base?
Changes from 6 commits
fe36370
b1796f9
90980a1
2115e0f
40311a7
6dc331e
46a0785
2727fc1
44ba16b
19267e1
773ee89
685e0c9
95744b4
92df254
cb669f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
version = "0.1.40" | ||
version = "0.1.41" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,13 +3,13 @@ | |
import csv | ||
import json | ||
import os | ||
import shutil | ||
import subprocess | ||
|
||
from ... import ErsiliaBase | ||
from ...db.hubdata.interfaces import JsonModelsInterface | ||
from ...default import BENTOML_PATH, MODEL_SOURCE_FILE, TableConstants | ||
from ...default import MODEL_SOURCE_FILE, TableConstants | ||
from ...tools.bentoml.exceptions import BentoMLException | ||
from ...utils.identifiers.model import ModelIdentifier | ||
from ...utils.terminal import run_command | ||
from .card import ModelCard | ||
|
||
try: | ||
|
@@ -357,26 +357,37 @@ def bentoml(self) -> CatalogTable: | |
The catalog table containing the models available as BentoServices. | ||
""" | ||
try: | ||
result = subprocess.run( | ||
["bentoml", "list"], stdout=subprocess.PIPE, env=os.environ, timeout=10 | ||
) | ||
except Exception: | ||
shutil.rmtree(BENTOML_PATH) | ||
return None | ||
result = [r for r in result.stdout.decode("utf-8").split("\n") if r] | ||
if len(result) == 1: | ||
return | ||
columns = ["BENTO_SERVICE", "AGE", "APIS", "ARTIFACTS"] | ||
header = result[0] | ||
values = result[1:] | ||
cut_idxs = [] | ||
for col in columns: | ||
cut_idxs += [header.find(col)] | ||
R = [] | ||
for row in values: | ||
r = [] | ||
for i, idx in enumerate(zip(cut_idxs, cut_idxs[1:] + [None])): | ||
r += [row[idx[0] : idx[1]].rstrip()] | ||
R += [[r[0].split(":")[0]] + r] | ||
columns = ["Identifier"] + columns | ||
return CatalogTable(data=R, columns=columns) | ||
stdout, stderr, returncode = run_command(["bentoml", "list"], quiet=True) | ||
if returncode != 0: | ||
raise BentoMLException(f"BentoML list failed: {stderr}") | ||
|
||
# Process stdout to build CatalogTable | ||
output_lines = stdout.split("\n") | ||
if not output_lines or len(output_lines) == 1: | ||
return CatalogTable(data=[], columns=[]) # Return empty table | ||
|
||
# Extract columns and values | ||
columns = ["BENTO_SERVICE", "AGE", "APIS", "ARTIFACTS"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can move this to |
||
header = output_lines[0] | ||
values = output_lines[1:] | ||
|
||
# Parse table data | ||
cut_idxs = [header.find(col) for col in columns] | ||
R = [] | ||
for row in values: | ||
r = [] | ||
for i, idx in enumerate(zip(cut_idxs, cut_idxs[1:] + [None])): | ||
r.append( | ||
row[idx[0] : idx[1]].rstrip() | ||
if idx[1] | ||
else row[idx[0] :].rstrip() | ||
) | ||
R.append([r[0].split(":")[0]] + r) | ||
|
||
return CatalogTable(data=R, columns=["Identifier"] + columns) | ||
|
||
except BentoMLException: | ||
raise | ||
except Exception as e: | ||
self.logger.error(f"Unexpected error: {str(e)}") | ||
raise BentoMLException(f"Failed to fetch BentoML models: {str(e)}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
from ...hub.delete.delete import ModelFullDeleter | ||
from ...hub.fetch.actions.template_resolver import TemplateResolver | ||
from ...setup.requirements import check_bentoml | ||
from ...tools.bentoml.exceptions import BentoMLException | ||
from ...utils.exceptions_utils.fetch_exceptions import ( | ||
NotInstallableWithBentoML, | ||
NotInstallableWithFastAPI, | ||
|
@@ -187,6 +188,7 @@ def _fetch_from_fastapi(self): | |
def _fetch_from_bentoml(self): | ||
self.logger.debug("Fetching using BentoML") | ||
self.check_bentoml() | ||
|
||
fetch = importlib.import_module("ersilia.hub.fetch.fetch_bentoml") | ||
mf = fetch.ModelFetcherFromBentoML( | ||
config_json=self.config_json, | ||
|
@@ -199,10 +201,11 @@ def _fetch_from_bentoml(self): | |
force_from_github=self.force_from_github, | ||
force_from_s3=self.force_from_s3, | ||
) | ||
|
||
# Check if the model can be installed with BentoML | ||
if mf.seems_installable(model_id=self.model_id): | ||
mf.fetch(model_id=self.model_id) | ||
else: | ||
self.logger.debug("Not installable with BentoML") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can retain this log line. |
||
raise NotInstallableWithBentoML(model_id=self.model_id) | ||
|
||
@throw_ersilia_exception() | ||
|
@@ -363,36 +366,40 @@ async def fetch(self, model_id: str) -> bool: | |
fetcher = ModelFetcher(config_json=config) | ||
success = await fetcher.fetch(model_id="eosxxxx") | ||
""" | ||
fr = await self._fetch(model_id) | ||
if fr.fetch_success: | ||
try: | ||
fr = await self._fetch(model_id) | ||
if not fr.fetch_success: | ||
return fr | ||
|
||
self._standard_csv_example(model_id) | ||
self.logger.debug("Writing model source to file") | ||
model_source_file = os.path.join( | ||
self._model_path(model_id), MODEL_SOURCE_FILE | ||
) | ||
try: | ||
self._standard_csv_example(model_id) | ||
except StandardModelExampleError: | ||
self.logger.debug("Standard model example failed, deleting artifacts") | ||
do_delete = yes_no_input( | ||
"Do you want to delete the model artifacts? [Y/n]", | ||
default_answer="Y", | ||
) | ||
if do_delete: | ||
md = ModelFullDeleter(overwrite=False) | ||
md.delete(model_id) | ||
return FetchResult( | ||
fetch_success=False, | ||
reason="Could not successfully run a standard example from the model.", | ||
) | ||
else: | ||
self.logger.debug("Writing model source to file") | ||
model_source_file = os.path.join( | ||
self._model_path(model_id), MODEL_SOURCE_FILE | ||
) | ||
try: | ||
os.makedirs(self._model_path(model_id), exist_ok=True) | ||
except OSError as error: | ||
self.logger.error(f"Error during folder creation: {error}") | ||
with open(model_source_file, "w") as f: | ||
f.write(self.model_source) | ||
return FetchResult( | ||
fetch_success=True, reason="Model fetched successfully" | ||
os.makedirs(self._model_path(model_id), exist_ok=True) | ||
except OSError as error: | ||
self.logger.error(f"Error during folder creation: {error}") | ||
with open(model_source_file, "w") as f: | ||
f.write(self.model_source) | ||
|
||
return FetchResult(fetch_success=True, reason="Model fetched successfully") | ||
|
||
except (StandardModelExampleError, BentoMLException) as err: | ||
self.logger.debug(f"{type(err).__name__} occurred: {str(err)}") | ||
do_delete = yes_no_input( | ||
"Do you want to delete the model artifacts? [Y/n]", | ||
default_answer="Y", | ||
) | ||
if do_delete: | ||
md = ModelFullDeleter(overwrite=False) | ||
md.delete(model_id) | ||
self.logger.info( | ||
f"✅ Model '{model_id}' artifacts have been successfully deleted." | ||
) | ||
else: | ||
return fr | ||
|
||
reason = ( | ||
str(err) if str(err) else "An unknown error occurred during fetching." | ||
) | ||
return FetchResult(fetch_success=False, reason=reason) | ||
return fr |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
import os | ||
import shutil | ||
import subprocess | ||
import sys | ||
|
||
try: | ||
from inputimeout import TimeoutOccurred, inputimeout | ||
|
@@ -38,35 +39,40 @@ def is_quiet(): | |
|
||
def run_command(cmd, quiet=None): | ||
""" | ||
Run a shell command. | ||
Run a shell command and return stdout, stderr, and return code. | ||
|
||
Parameters | ||
---------- | ||
cmd : str or list | ||
The command to run. | ||
quiet : bool, optional | ||
Whether to run the command in quiet mode. Default is None. | ||
Whether to run the command in quiet mode. Defaults to `is_quiet()`. | ||
""" | ||
if quiet is None: | ||
quiet = is_quiet() | ||
if type(cmd) == str: | ||
if quiet: | ||
with open(os.devnull, "w") as fp: | ||
subprocess.Popen( | ||
cmd, stdout=fp, stderr=fp, shell=True, env=os.environ | ||
).wait() | ||
else: | ||
subprocess.Popen(cmd, shell=True, env=os.environ).wait() | ||
else: | ||
if quiet: | ||
subprocess.check_call( | ||
cmd, | ||
stdout=subprocess.DEVNULL, | ||
stderr=subprocess.DEVNULL, | ||
env=os.environ, | ||
) | ||
else: | ||
subprocess.check_call(cmd, env=os.environ) | ||
# Run the command and capture outputs | ||
result = subprocess.run( | ||
cmd, | ||
shell=isinstance(cmd, str), | ||
stdout=subprocess.PIPE, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we piping both stdout and stderr? Why not set both to |
||
stderr=subprocess.PIPE, | ||
text=True, | ||
env=os.environ, | ||
) | ||
|
||
# Decode outputs | ||
stdout = result.stdout.strip() | ||
stderr = result.stderr.strip() | ||
returncode = result.returncode | ||
|
||
# Log outputs if not in quiet mode | ||
if not quiet: | ||
if stdout: | ||
print(stdout) | ||
if stderr: | ||
print(stderr, file=sys.stderr) | ||
|
||
return stdout, stderr, returncode | ||
|
||
|
||
def run_command_check_output(cmd): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Love this implementation, thanks!