diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fdb9d6b82..8559e8a29 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,7 +8,7 @@ default_language_version:
   python: python3.11 # NOTE: sync with .python-version-default
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.5.1"
+    rev: "v0.5.2"
     hooks:
       - id: ruff
         alias: r
diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py
index e66c34312..5d9a8d678 100644
--- a/src/openllm/__main__.py
+++ b/src/openllm/__main__.py
@@ -22,42 +22,35 @@
 from openllm.repo import app as repo_app
 
 app = OpenLLMTyper(
-    help="`openllm hello` to get started. "
-    "OpenLLM is a CLI tool to manage and deploy open source LLMs and"
-    " get an OpenAI API compatible chat server in seconds."
+    help='`openllm hello` to get started. '
+    'OpenLLM is a CLI tool to manage and deploy open source LLMs and'
+    ' get an OpenAI API compatible chat server in seconds.'
 )
 
-app.add_typer(repo_app, name="repo")
-app.add_typer(model_app, name="model")
-app.add_typer(clean_app, name="clean")
+app.add_typer(repo_app, name='repo')
+app.add_typer(model_app, name='model')
+app.add_typer(clean_app, name='clean')
 
 
 def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget):
     from tabulate import tabulate
 
     options = []
-    model_infos = [
-        (model.repo.name, model.name, can_run(model, target)) for model in models
-    ]
+    model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models]
     model_name_groups = defaultdict(lambda: 0.0)
     for repo, name, score in model_infos:
         model_name_groups[(repo, name)] += score
-    table_data = [
-        (name, repo, CHECKED if score > 0 else "")
-        for (repo, name), score in model_name_groups.items()
-    ]
+    table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()]
     if not table_data:
-        output("No model found", style="red")
+        output('No model found', style='red')
         raise typer.Exit(1)
-    table = tabulate(table_data, headers=["model", "repo", "locally runnable"]).split(
-        "\n"
-    )
-    headers = f"{table[0]}\n   {table[1]}"
+    table = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n')
+    headers = f'{table[0]}\n   {table[1]}'
 
     options.append(questionary.Separator(headers))
     for table_data, table_line in zip(table_data, table[2:]):
         options.append(questionary.Choice(table_line, value=table_data[:2]))
-    selected = questionary.select("Select a model", options).ask()
+    selected = questionary.select('Select a model', options).ask()
     if selected is None:
         raise typer.Exit(1)
     return selected
@@ -67,26 +60,24 @@ def _select_bento_version(models, target, bento_name, repo):
     from tabulate import tabulate
 
     model_infos = [
-        [model, can_run(model, target)]
-        for model in models
-        if model.name == bento_name and model.repo.name == repo
+        [model, can_run(model, target)] for model in models if model.name == bento_name and model.repo.name == repo
     ]
 
     table_data = [
-        [model.tag, CHECKED if score > 0 else ""]
+        [model.tag, CHECKED if score > 0 else '']
         for model, score in model_infos
         if model.name == bento_name and model.repo.name == repo
     ]
     if not table_data:
-        output(f"No model found for {bento_name} in {repo}", style="red")
+        output(f'No model found for {bento_name} in {repo}', style='red')
         raise typer.Exit(1)
-    table = tabulate(table_data, headers=["version", "locally runnable"]).split("\n")
+    table = tabulate(table_data, headers=['version', 'locally runnable']).split('\n')
 
     options = []
-    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))
+    options.append(questionary.Separator(f'{table[0]}\n   {table[1]}'))
     for table_data, table_line in zip(model_infos, table[2:]):
         options.append(questionary.Choice(table_line, value=table_data))
-    selected = questionary.select("Select a version", options).ask()
+    selected = questionary.select('Select a version', options).ask()
     if selected is None:
         raise typer.Exit(1)
     return selected
@@ -98,7 +89,7 @@ def _select_target(bento, targets):
     options = []
     targets.sort(key=lambda x: can_run(bento, x), reverse=True)
     if not targets:
-        output("No available instance type, check your bentocloud account", style="red")
+        output('No available instance type, check your bentocloud account', style='red')
         raise typer.Exit(1)
 
     table = tabulate(
@@ -106,18 +97,18 @@ def _select_target(bento, targets):
             [
                 target.name,
                 target.accelerators_repr,
-                f"${target.price}",
-                CHECKED if can_run(bento, target) else "insufficient res.",
+                f'${target.price}',
+                CHECKED if can_run(bento, target) else 'insufficient res.',
             ]
             for target in targets
         ],
-        headers=["instance type", "accelerator", "price/hr", "deployable"],
-    ).split("\n")
-    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))
+        headers=['instance type', 'accelerator', 'price/hr', 'deployable'],
+    ).split('\n')
+    options.append(questionary.Separator(f'{table[0]}\n   {table[1]}'))
 
     for target, line in zip(targets, table[2:]):
-        options.append(questionary.Choice(f"{line}", value=target))
-    selected = questionary.select("Select an instance type", options).ask()
+        options.append(questionary.Choice(f'{line}', value=target))
+    selected = questionary.select('Select an instance type', options).ask()
     if selected is None:
         raise typer.Exit(1)
     return selected
@@ -126,102 +117,84 @@ def _select_target(bento, targets):
 def _select_action(bento: BentoInfo, score):
     if score > 0:
         options = [
-            questionary.Separator("Available actions"),
-            questionary.Choice(
-                "0. Run the model in terminal", value="run", shortcut_key="0"
-            ),
-            questionary.Separator(f"  $ openllm run {bento}"),
-            questionary.Separator(" "),
+            questionary.Separator('Available actions'),
+            questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'),
+            questionary.Separator(f'  $ openllm run {bento}'),
+            questionary.Separator(' '),
+            questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'),
+            questionary.Separator(f'  $ openllm serve {bento}'),
+            questionary.Separator(' '),
             questionary.Choice(
-                "1. Serve the model locally and get a chat server",
-                value="serve",
-                shortcut_key="1",
+                '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
             ),
-            questionary.Separator(f"  $ openllm serve {bento}"),
-            questionary.Separator(" "),
-            questionary.Choice(
-                "2. Deploy the model to bentocloud and get a scalable chat server",
-                value="deploy",
-                shortcut_key="2",
-            ),
-            questionary.Separator(f"  $ openllm deploy {bento}"),
+            questionary.Separator(f'  $ openllm deploy {bento}'),
         ]
     else:
         options = [
-            questionary.Separator("Available actions"),
+            questionary.Separator('Available actions'),
             questionary.Choice(
-                "0. Run the model in terminal",
-                value="run",
-                disabled="insufficient res.",
-                shortcut_key="0",
+                '0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'
             ),
-            questionary.Separator(f"  $ openllm run {bento}"),
-            questionary.Separator(" "),
+            questionary.Separator(f'  $ openllm run {bento}'),
+            questionary.Separator(' '),
             questionary.Choice(
-                "1. Serve the model locally and get a chat server",
-                value="serve",
-                disabled="insufficient res.",
-                shortcut_key="1",
+                '1. Serve the model locally and get a chat server',
+                value='serve',
+                disabled='insufficient res.',
+                shortcut_key='1',
             ),
-            questionary.Separator(f"  $ openllm serve {bento}"),
-            questionary.Separator(" "),
+            questionary.Separator(f'  $ openllm serve {bento}'),
+            questionary.Separator(' '),
             questionary.Choice(
-                "2. Deploy the model to bentocloud and get a scalable chat server",
-                value="deploy",
-                shortcut_key="2",
+                '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
             ),
-            questionary.Separator(f"  $ openllm deploy {bento}"),
+            questionary.Separator(f'  $ openllm deploy {bento}'),
         ]
-    action = questionary.select("Select an action", options).ask()
+    action = questionary.select('Select an action', options).ask()
     if action is None:
         raise typer.Exit(1)
-    if action == "run":
+    if action == 'run':
         try:
             port = random.randint(30000, 40000)
             local_run(bento, port=port)
         finally:
-            output("\nUse this command to run the action again:", style="green")
-            output(f"  $ openllm run {bento}", style="orange")
-    elif action == "serve":
+            output('\nUse this command to run the action again:', style='green')
+            output(f'  $ openllm run {bento}', style='orange')
+    elif action == 'serve':
         try:
             local_serve(bento)
         finally:
-            output("\nUse this command to run the action again:", style="green")
-            output(f"  $ openllm serve {bento}", style="orange")
-    elif action == "deploy":
+            output('\nUse this command to run the action again:', style='green')
+            output(f'  $ openllm serve {bento}', style='orange')
+    elif action == 'deploy':
         ensure_cloud_context()
         targets = get_cloud_machine_spec()
         target = _select_target(bento, targets)
         try:
             cloud_deploy(bento, target)
         finally:
-            output("\nUse this command to run the action again:", style="green")
-            output(
-                f"  $ openllm deploy {bento} --instance-type {target.name}",
-                style="orange",
-            )
+            output('\nUse this command to run the action again:', style='green')
+            output(f'  $ openllm deploy {bento} --instance-type {target.name}', style='orange')
 
 
-@app.command(help="get started interactively")
+@app.command(help='get started interactively')
 def hello():
     INTERACTIVE.set(True)
     # VERBOSE_LEVEL.set(20)
 
     target = get_local_machine_spec()
-    output(f"  Detected Platform: {target.platform}", style="green")
+    output(f'  Detected Platform: {target.platform}', style='green')
     if target.accelerators:
-        output("  Detected Accelerators: ", style="green")
+        output('  Detected Accelerators: ', style='green')
         for a in target.accelerators:
-            output(f"   - {a.model} {a.memory_size}GB", style="green")
+            output(f'   - {a.model} {a.memory_size}GB', style='green')
     else:
-        output("  Detected Accelerators: None", style="yellow")
+        output('  Detected Accelerators: None', style='yellow')
 
     models = list_bento()
     if not models:
-        output(
-            "No model found, you probably need to update the model repo:", style="red"
-        )
-        output("  $ openllm repo update", style="orange")
+        output('No model found, you probably need to update the model repo:', style='red')
+        output('  $ openllm repo update', style='orange')
         raise typer.Exit(1)
 
     bento_name, repo = _select_bento_name(models, target)
@@ -229,12 +202,9 @@ def hello():
     _select_action(bento, score)
 
 
-@app.command(help="start an OpenAI API compatible chat server and chat in browser")
+@app.command(help='start an OpenAI API compatible chat server and chat in browser')
 def serve(
-    model: Annotated[str, typer.Argument()] = "",
-    repo: Optional[str] = None,
-    port: int = 3000,
-    verbose: bool = False,
+    model: Annotated[str, typer.Argument()] = '', repo: Optional[str] = None, port: int = 3000, verbose: bool = False
 ):
     if verbose:
         VERBOSE_LEVEL.set(20)
@@ -243,9 +213,9 @@ def serve(
     local_serve(bento, port=port)
 
 
-@app.command(help="run the model and chat in terminal")
+@app.command(help='run the model and chat in terminal')
 def run(
-    model: Annotated[str, typer.Argument()] = "",
+    model: Annotated[str, typer.Argument()] = '',
     repo: Optional[str] = None,
     port: Optional[int] = None,
     timeout: int = 600,
@@ -260,11 +230,9 @@ def run(
     local_run(bento, port=port, timeout=timeout)
 
 
-@app.command(
-    help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)"
-)
+@app.command(help='deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)')
 def deploy(
-    model: Annotated[str, typer.Argument()] = "",
+    model: Annotated[str, typer.Argument()] = '',
     instance_type: Optional[str] = None,
     repo: Optional[str] = None,
     verbose: bool = False,
@@ -279,10 +247,10 @@ def deploy(
     targets = filter(lambda x: can_run(bento, x) > 0, targets)
     targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True)
     if not targets:
-        output("No available instance type, check your bentocloud account", style="red")
+        output('No available instance type, check your bentocloud account', style='red')
         raise typer.Exit(1)
     target = targets[0]
-    output(f"Recommended instance type: {target.name}", style="green")
+    output(f'Recommended instance type: {target.name}', style='green')
     cloud_deploy(bento, target)
 
 
@@ -290,12 +258,9 @@ def deploy(
 def typer_callback(
     verbose: int = 0,
     do_not_track: bool = typer.Option(
-        False,
-        "--do-not-track",
-        help="Whether to disable usage tracking",
-        envvar=DO_NOT_TRACK,
+        False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK
     ),
-    version: bool = typer.Option(False, "--version", "-v", help="Show version"),
+    version: bool = typer.Option(False, '--version', '-v', help='Show version'),
 ):
     if verbose:
         VERBOSE_LEVEL.set(verbose)
@@ -308,5 +273,5 @@ def typer_callback(
         os.environ[DO_NOT_TRACK] = str(True)
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     app()