From c84b2cde2f55aa72d1f8be3910f36646149bc5ae Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Sat, 23 Mar 2024 14:35:23 +0800 Subject: [PATCH] use official docker image (#10) Signed-off-by: lvliang-intel --- ChatQnA/README.md | 11 ++++++++++- ChatQnA/serving/tgi_gaudi/launch_tgi_service.sh | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ChatQnA/README.md b/ChatQnA/README.md index 6eb52245ad..682c49a2a1 100644 --- a/ChatQnA/README.md +++ b/ChatQnA/README.md @@ -3,7 +3,16 @@ This ChatQnA use case performs RAG using LangChain, Redis vectordb and Text Gene # Environment Setup To use [🤗 text-generation-inference](https://github.com/huggingface/text-generation-inference) on Habana Gaudi/Gaudi2, please follow these steps: -## Build TGI Gaudi Docker Image +## Prepare Docker + +Getting started is straightforward with the official Docker container. Simply pull the image using: + +```bash +docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1 +``` + +Alternatively, you can build the Docker image yourself with: + ```bash bash ./serving/tgi_gaudi/build_docker.sh ``` diff --git a/ChatQnA/serving/tgi_gaudi/launch_tgi_service.sh b/ChatQnA/serving/tgi_gaudi/launch_tgi_service.sh index 1f624f59fd..85a65093ea 100644 --- a/ChatQnA/serving/tgi_gaudi/launch_tgi_service.sh +++ b/ChatQnA/serving/tgi_gaudi/launch_tgi_service.sh @@ -27,9 +27,9 @@ volume=$PWD/data # Build the Docker run command based on the number of cards if [ "$num_cards" -eq 1 ]; then - docker_cmd="docker run -p $port_number:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_name" + docker_cmd="docker run -p $port_number:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi --model-id $model_name" else - docker_cmd="docker run -p $port_number:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_name --sharded true --num-shard $num_cards" + docker_cmd="docker run -p $port_number:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi --model-id $model_name --sharded true --num-shard $num_cards" fi # Execute the Docker run command