Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: pre-commit autoupdate [pre-commit.ci] #1091

Merged
merged 2 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ default_language_version:
python: python3.11 # NOTE: sync with .python-version-default
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.6.7"
rev: "v0.6.8"
hooks:
- id: ruff
alias: r
Expand Down
191 changes: 77 additions & 114 deletions src/openllm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,57 +9,48 @@
import questionary
import typer

from openllm.accelerator_spec import (DeploymentTarget, can_run,
get_local_machine_spec)
from openllm.accelerator_spec import DeploymentTarget, can_run, get_local_machine_spec
from openllm.analytic import DO_NOT_TRACK, OpenLLMTyper
from openllm.clean import app as clean_app
from openllm.cloud import deploy as cloud_deploy
from openllm.cloud import ensure_cloud_context, get_cloud_machine_spec
from openllm.common import (CHECKED, INTERACTIVE, VERBOSE_LEVEL, BentoInfo,
output)
from openllm.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, BentoInfo, output
from openllm.local import run as local_run
from openllm.local import serve as local_serve
from openllm.model import app as model_app
from openllm.model import ensure_bento, list_bento
from openllm.repo import app as repo_app

app = OpenLLMTyper(
help="`openllm hello` to get started. "
"OpenLLM is a CLI tool to manage and deploy open source LLMs and"
" get an OpenAI API compatible chat server in seconds."
help='`openllm hello` to get started. '
'OpenLLM is a CLI tool to manage and deploy open source LLMs and'
' get an OpenAI API compatible chat server in seconds.'
)

app.add_typer(repo_app, name="repo")
app.add_typer(model_app, name="model")
app.add_typer(clean_app, name="clean")
app.add_typer(repo_app, name='repo')
app.add_typer(model_app, name='model')
app.add_typer(clean_app, name='clean')


def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget):
from tabulate import tabulate

options = []
model_infos = [
(model.repo.name, model.name, can_run(model, target)) for model in models
]
model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models]
model_name_groups = defaultdict(lambda: 0.0)
for repo, name, score in model_infos:
model_name_groups[repo, name] += score
table_data = [
(name, repo, CHECKED if score > 0 else "")
for (repo, name), score in model_name_groups.items()
]
table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()]
if not table_data:
output("No model found", style="red")
output('No model found', style='red')
raise typer.Exit(1)
table = tabulate(table_data, headers=["model", "repo", "locally runnable"]).split(
"\n"
)
headers = f"{table[0]}\n {table[1]}"
table = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n')
headers = f'{table[0]}\n {table[1]}'

options.append(questionary.Separator(headers))
for table_data, table_line in zip(table_data, table[2:]):
options.append(questionary.Choice(table_line, value=table_data[:2]))
selected = questionary.select("Select a model", options).ask()
selected = questionary.select('Select a model', options).ask()
if selected is None:
raise typer.Exit(1)
return selected
Expand All @@ -69,26 +60,24 @@ def _select_bento_version(models, target, bento_name, repo):
from tabulate import tabulate

model_infos = [
[model, can_run(model, target)]
for model in models
if model.name == bento_name and model.repo.name == repo
[model, can_run(model, target)] for model in models if model.name == bento_name and model.repo.name == repo
]

table_data = [
[model.tag, CHECKED if score > 0 else ""]
[model.tag, CHECKED if score > 0 else '']
for model, score in model_infos
if model.name == bento_name and model.repo.name == repo
]
if not table_data:
output(f"No model found for {bento_name} in {repo}", style="red")
output(f'No model found for {bento_name} in {repo}', style='red')
raise typer.Exit(1)
table = tabulate(table_data, headers=["version", "locally runnable"]).split("\n")
table = tabulate(table_data, headers=['version', 'locally runnable']).split('\n')

options = []
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
options.append(questionary.Separator(f'{table[0]}\n {table[1]}'))
for table_data, table_line in zip(model_infos, table[2:]):
options.append(questionary.Choice(table_line, value=table_data))
selected = questionary.select("Select a version", options).ask()
selected = questionary.select('Select a version', options).ask()
if selected is None:
raise typer.Exit(1)
return selected
Expand All @@ -100,26 +89,26 @@ def _select_target(bento, targets):
options = []
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
if not targets:
output("No available instance type, check your bentocloud account", style="red")
output('No available instance type, check your bentocloud account', style='red')
raise typer.Exit(1)

table = tabulate(
[
[
target.name,
target.accelerators_repr,
f"${target.price}",
CHECKED if can_run(bento, target) else "insufficient res.",
f'${target.price}',
CHECKED if can_run(bento, target) else 'insufficient res.',
]
for target in targets
],
headers=["instance type", "accelerator", "price/hr", "deployable"],
).split("\n")
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
headers=['instance type', 'accelerator', 'price/hr', 'deployable'],
).split('\n')
options.append(questionary.Separator(f'{table[0]}\n {table[1]}'))

for target, line in zip(targets, table[2:]):
options.append(questionary.Choice(f"{line}", value=target))
selected = questionary.select("Select an instance type", options).ask()
options.append(questionary.Choice(f'{line}', value=target))
selected = questionary.select('Select an instance type', options).ask()
if selected is None:
raise typer.Exit(1)
return selected
Expand All @@ -128,115 +117,94 @@ def _select_target(bento, targets):
def _select_action(bento: BentoInfo, score):
if score > 0:
options = [
questionary.Separator("Available actions"),
questionary.Choice(
"0. Run the model in terminal", value="run", shortcut_key="0"
),
questionary.Separator(f" $ openllm run {bento}"),
questionary.Separator(" "),
questionary.Separator('Available actions'),
questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'),
questionary.Separator(f' $ openllm run {bento}'),
questionary.Separator(' '),
questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'),
questionary.Separator(f' $ openllm serve {bento}'),
questionary.Separator(' '),
questionary.Choice(
"1. Serve the model locally and get a chat server",
value="serve",
shortcut_key="1",
'2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
),
questionary.Separator(f" $ openllm serve {bento}"),
questionary.Separator(" "),
questionary.Choice(
"2. Deploy the model to bentocloud and get a scalable chat server",
value="deploy",
shortcut_key="2",
),
questionary.Separator(f" $ openllm deploy {bento}"),
questionary.Separator(f' $ openllm deploy {bento}'),
]
else:
options = [
questionary.Separator("Available actions"),
questionary.Separator('Available actions'),
questionary.Choice(
"0. Run the model in terminal",
value="run",
disabled="insufficient res.",
shortcut_key="0",
'0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'
),
questionary.Separator(f" $ openllm run {bento}"),
questionary.Separator(" "),
questionary.Separator(f' $ openllm run {bento}'),
questionary.Separator(' '),
questionary.Choice(
"1. Serve the model locally and get a chat server",
value="serve",
disabled="insufficient res.",
shortcut_key="1",
'1. Serve the model locally and get a chat server',
value='serve',
disabled='insufficient res.',
shortcut_key='1',
),
questionary.Separator(f" $ openllm serve {bento}"),
questionary.Separator(" "),
questionary.Separator(f' $ openllm serve {bento}'),
questionary.Separator(' '),
questionary.Choice(
"2. Deploy the model to bentocloud and get a scalable chat server",
value="deploy",
shortcut_key="2",
'2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
),
questionary.Separator(f" $ openllm deploy {bento}"),
questionary.Separator(f' $ openllm deploy {bento}'),
]
action = questionary.select("Select an action", options).ask()
action = questionary.select('Select an action', options).ask()
if action is None:
raise typer.Exit(1)
if action == "run":
if action == 'run':
try:
port = random.randint(30000, 40000)
local_run(bento, port=port)
finally:
output("\nUse this command to run the action again:", style="green")
output(f" $ openllm run {bento}", style="orange")
elif action == "serve":
output('\nUse this command to run the action again:', style='green')
output(f' $ openllm run {bento}', style='orange')
elif action == 'serve':
try:
local_serve(bento)
finally:
output("\nUse this command to run the action again:", style="green")
output(f" $ openllm serve {bento}", style="orange")
elif action == "deploy":
output('\nUse this command to run the action again:', style='green')
output(f' $ openllm serve {bento}', style='orange')
elif action == 'deploy':
ensure_cloud_context()
targets = get_cloud_machine_spec()
target = _select_target(bento, targets)
try:
cloud_deploy(bento, target)
finally:
output("\nUse this command to run the action again:", style="green")
output(
f" $ openllm deploy {bento} --instance-type {target.name}",
style="orange",
)
output('\nUse this command to run the action again:', style='green')
output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange')


@app.command(help="get started interactively")
@app.command(help='get started interactively')
def hello():
INTERACTIVE.set(True)
# VERBOSE_LEVEL.set(20)

target = get_local_machine_spec()
output(f" Detected Platform: {target.platform}", style="green")
output(f' Detected Platform: {target.platform}', style='green')
if target.accelerators:
output(" Detected Accelerators: ", style="green")
output(' Detected Accelerators: ', style='green')
for a in target.accelerators:
output(f" - {a.model} {a.memory_size}GB", style="green")
output(f' - {a.model} {a.memory_size}GB', style='green')
else:
output(" Detected Accelerators: None", style="yellow")
output(' Detected Accelerators: None', style='yellow')

models = list_bento()
if not models:
output(
"No model found, you probably need to update the model repo:", style="red"
)
output(" $ openllm repo update", style="orange")
output('No model found, you probably need to update the model repo:', style='red')
output(' $ openllm repo update', style='orange')
raise typer.Exit(1)

bento_name, repo = _select_bento_name(models, target)
bento, score = _select_bento_version(models, target, bento_name, repo)
_select_action(bento, score)


@app.command(help="start an OpenAI API compatible chat server and chat in browser")
@app.command(help='start an OpenAI API compatible chat server and chat in browser')
def serve(
model: Annotated[str, typer.Argument()] = "",
repo: Optional[str] = None,
port: int = 3000,
verbose: bool = False,
model: Annotated[str, typer.Argument()] = '', repo: Optional[str] = None, port: int = 3000, verbose: bool = False
):
if verbose:
VERBOSE_LEVEL.set(20)
Expand All @@ -245,9 +213,9 @@ def serve(
local_serve(bento, port=port)


@app.command(help="run the model and chat in terminal")
@app.command(help='run the model and chat in terminal')
def run(
model: Annotated[str, typer.Argument()] = "",
model: Annotated[str, typer.Argument()] = '',
repo: Optional[str] = None,
port: Optional[int] = None,
timeout: int = 600,
Expand All @@ -262,11 +230,9 @@ def run(
local_run(bento, port=port, timeout=timeout)


@app.command(
help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)"
)
@app.command(help='deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)')
def deploy(
model: Annotated[str, typer.Argument()] = "",
model: Annotated[str, typer.Argument()] = '',
instance_type: Optional[str] = None,
repo: Optional[str] = None,
verbose: bool = False,
Expand All @@ -281,23 +247,20 @@ def deploy(
targets = filter(lambda x: can_run(bento, x) > 0, targets)
targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True)
if not targets:
output("No available instance type, check your bentocloud account", style="red")
output('No available instance type, check your bentocloud account', style='red')
raise typer.Exit(1)
target = targets[0]
output(f"Recommended instance type: {target.name}", style="green")
output(f'Recommended instance type: {target.name}', style='green')
cloud_deploy(bento, target)


@app.callback(invoke_without_command=True)
def typer_callback(
verbose: int = 0,
do_not_track: bool = typer.Option(
False,
"--do-not-track",
help="Whether to disable usage tracking",
envvar=DO_NOT_TRACK,
False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK
),
version: bool = typer.Option(False, "--version", "-v", help="Show version"),
version: bool = typer.Option(False, '--version', '-v', help='Show version'),
):
if verbose:
VERBOSE_LEVEL.set(verbose)
Expand All @@ -310,5 +273,5 @@ def typer_callback(
os.environ[DO_NOT_TRACK] = str(True)


if __name__ == "__main__":
if __name__ == '__main__':
app()
Loading