Skip to content

Commit

Permalink
Support vllm openai api server (#694)
Browse files Browse the repository at this point in the history
* Support vllm openai api server

* make terraform link happy

---------

Co-authored-by: Ming Zhu <mingzhuv@google.com>
  • Loading branch information
zmvictor and zmvictor committed Jun 5, 2024
1 parent 66ed112 commit f3e12b3
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 2 deletions.
1 change: 1 addition & 0 deletions benchmarks/inference-server/vllm/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ resource "kubernetes_manifest" "default" {
namespace = var.namespace
model_id = var.model_id
gpu_count = var.gpu_count
swap_space = var.swap_space
ksa = var.ksa
hugging_face_token_secret_list = local.hugging_face_token_secret == null ? [] : [local.hugging_face_token_secret]
}))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ spec:
ports:
- containerPort: 80
image: "vllm/vllm-openai:v0.3.3"
command: ["python3", "-m", "vllm.entrypoints.api_server"]
args: ["--model", "${model_id}", "--tensor-parallel-size", "${gpu_count}", "--port", "80"]
command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
args: ["--model", "${model_id}", "--tensor-parallel-size", "${gpu_count}", "--port", "80", "--swap-space", "${swap_space}", "--disable-log-requests"]
env:
- name: PORT
value: 80
Expand Down
11 changes: 11 additions & 0 deletions benchmarks/inference-server/vllm/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ variable "gpu_count" {
}
}

variable "swap_space" {
description = "The size (GiB) of CPU memory per GPU to use as swap space. See https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/llm.py#L65 for more details."
type = number
nullable = false
default = 4
validation {
condition = var.swap_space >= 0
error_message = "swap space must be greater than or equal to 0."
}
}

variable "ksa" {
description = "Kubernetes Service Account used for workload."
type = string
Expand Down

0 comments on commit f3e12b3

Please sign in to comment.