-
Notifications
You must be signed in to change notification settings - Fork 210
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update AgentQnA example for v1.1 release (#885)
Signed-off-by: minmin-intel <minmin.hou@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
ced68e1
commit 5eb3d28
Showing
17 changed files
with
212 additions
and
104 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Deployment on Xeon | ||
|
||
We deploy the retrieval tool on Xeon. For LLMs, we support OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 25 additions & 0 deletions
25
AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# LLM related environment variables | ||
export HF_CACHE_DIR=${HF_CACHE_DIR} | ||
ls $HF_CACHE_DIR | ||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} | ||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct" | ||
export NUM_SHARDS=4 | ||
|
||
docker compose -f tgi_gaudi.yaml up -d | ||
|
||
sleep 5s | ||
echo "Waiting tgi gaudi ready" | ||
n=0 | ||
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do | ||
docker logs tgi-server &> tgi-gaudi-service.log | ||
n=$((n+1)) | ||
if grep -q Connected tgi-gaudi-service.log; then | ||
break | ||
fi | ||
sleep 5s | ||
done | ||
sleep 5s | ||
echo "Service started successfully" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
services: | ||
tgi-server: | ||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5 | ||
container_name: tgi-server | ||
ports: | ||
- "8085:80" | ||
volumes: | ||
- ${HF_CACHE_DIR}:/data | ||
environment: | ||
no_proxy: ${no_proxy} | ||
http_proxy: ${http_proxy} | ||
https_proxy: ${https_proxy} | ||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} | ||
HF_HUB_DISABLE_PROGRESS_BARS: 1 | ||
HF_HUB_ENABLE_HF_TRANSFER: 0 | ||
HABANA_VISIBLE_DEVICES: all | ||
OMPI_MCA_btl_vader_single_copy_mechanism: none | ||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true | ||
ENABLE_HPU_GRAPH: true | ||
LIMIT_HPU_GRAPH: true | ||
USE_FLASH_ATTENTION: true | ||
FLASH_ATTENTION_RECOMPUTE: true | ||
runtime: habana | ||
cap_add: | ||
- SYS_NICE | ||
ipc: host | ||
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS} |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
import os | ||
|
||
import requests | ||
|
||
|
||
def generate_answer_agent_api(url, prompt): | ||
proxies = {"http": ""} | ||
payload = { | ||
"query": prompt, | ||
} | ||
response = requests.post(url, json=payload, proxies=proxies) | ||
answer = response.json()["text"] | ||
return answer | ||
|
||
|
||
if __name__ == "__main__": | ||
ip_address = os.getenv("ip_address", "localhost") | ||
agent_port = os.getenv("agent_port", "9095") | ||
url = f"http://{ip_address}:{agent_port}/v1/chat/completions" | ||
prompt = "Tell me about Michael Jackson song thriller" | ||
answer = generate_answer_agent_api(url, prompt) | ||
print(answer) |
Oops, something went wrong.