Skip to content

Commit

Permalink
align vllm hpu version to latest vllm-fork (#1061)
Browse files Browse the repository at this point in the history
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
  • Loading branch information
XinyaoWa authored Nov 7, 2024
1 parent 6263b51 commit e9b1645
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 82 deletions.
4 changes: 2 additions & 2 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ services:
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
vllm-service:
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
container_name: vllm-gaudi-server
ports:
- "8007:80"
Expand All @@ -104,7 +104,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
Expand Down
12 changes: 6 additions & 6 deletions ChatQnA/docker_image_build/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,6 @@ services:
dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
extends: chatqna
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
llm-vllm-hpu:
build:
context: GenAIComps
dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu
extends: chatqna
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
llm-vllm-ray-hpu:
build:
context: GenAIComps
Expand Down Expand Up @@ -113,6 +107,12 @@ services:
dockerfile: Dockerfile.cpu
extends: chatqna
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
vllm-hpu:
build:
context: vllm-fork
dockerfile: Dockerfile.hpu
extends: chatqna
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
nginx:
build:
context: GenAIComps
Expand Down
3 changes: 2 additions & 1 deletion ChatQnA/tests/test_compose_vllm_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
git clone https://github.com/HabanaAI/vllm-fork.git

echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-hpu nginx"
service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm-hpu nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
Expand Down
Loading

0 comments on commit e9b1645

Please sign in to comment.