-
Notifications
You must be signed in to change notification settings - Fork 199
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new example of SearchQnA for GenAIExample (#448)
Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
- Loading branch information
1 parent
e371b1e
commit 21b7d11
Showing
5 changed files
with
428 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
<h1 align="center" id="title">Deploy SearchQnA in a Kubernetes Cluster</h1> | ||
|
||
This document outlines the deployment process for a Code Generation (SearchQnA) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines. | ||
|
||
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install. | ||
|
||
If you have only Intel Xeon machines you could use the searchQnA_xeon.yaml file or if you have a Gaudi cluster you could use searchQnA_gaudi.yaml | ||
In the below example we illustrate on Xeon. | ||
|
||
## Deploy the RAG application | ||
|
||
1. Create the desired namespace if it does not already exist and deploy the application | ||
```bash | ||
export APP_NAMESPACE=CT | ||
kubectl create ns $APP_NAMESPACE | ||
sed -i "s|namespace: searchqa|namespace: $APP_NAMESPACE|g" ./searchQnA_xeon.yaml | ||
kubectl apply -f ./searchQnA_xeon.yaml | ||
``` | ||
|
||
2. Check if the application is up and ready | ||
```bash | ||
kubectl get pods -n $APP_NAMESPACE | ||
``` | ||
|
||
3. Deploy a client pod for testing | ||
```bash | ||
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity | ||
``` | ||
|
||
4. Check that client pod is ready | ||
```bash | ||
kubectl get pods -n $APP_NAMESPACE | ||
``` | ||
|
||
5. Send request to application | ||
```bash | ||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) | ||
export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='searchqa')].status.accessUrl}") | ||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the latest news? Give me also the source link."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_searchqa.log | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
apiVersion: gmc.opea.io/v1alpha3 | ||
kind: GMConnector | ||
metadata: | ||
labels: | ||
app.kubernetes.io/name: gmconnector | ||
app.kubernetes.io/managed-by: kustomize | ||
gmc/platform: gaudi | ||
name: searchqa | ||
namespace: searchqa | ||
spec: | ||
routerConfig: | ||
name: router | ||
serviceName: router-service | ||
nodes: | ||
root: | ||
routerType: Sequence | ||
steps: | ||
- name: Embedding | ||
internalService: | ||
serviceName: embedding-svc | ||
config: | ||
endpoint: /v1/embeddings | ||
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc | ||
- name: TeiEmbeddingGaudi | ||
internalService: | ||
serviceName: tei-embedding-gaudi-svc | ||
isDownstreamService: true | ||
- name: WebRetriever | ||
data: $response | ||
internalService: | ||
serviceName: web-retriever-svc | ||
config: | ||
endpoint: /v1/web_retrieval | ||
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc | ||
GOOGLE_API_KEY: "insert-your-google-api-key-here" | ||
GOOGLE_CSE_ID: "insert-your-google-cse-id-here" | ||
- name: Reranking | ||
data: $response | ||
internalService: | ||
serviceName: reranking-svc | ||
config: | ||
endpoint: /v1/reranking | ||
TEI_RERANKING_ENDPOINT: tei-reranking-svc | ||
- name: TeiReranking | ||
internalService: | ||
serviceName: tei-reranking-svc | ||
config: | ||
endpoint: /rerank | ||
isDownstreamService: true | ||
- name: Llm | ||
data: $response | ||
internalService: | ||
serviceName: llm-svc | ||
config: | ||
endpoint: /v1/chat/completions | ||
TGI_LLM_ENDPOINT: tgi-gaudi-svc | ||
- name: TgiGaudi | ||
internalService: | ||
serviceName: tgi-gaudi-svc | ||
config: | ||
endpoint: /generate | ||
isDownstreamService: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
apiVersion: gmc.opea.io/v1alpha3 | ||
kind: GMConnector | ||
metadata: | ||
labels: | ||
app.kubernetes.io/name: gmconnector | ||
app.kubernetes.io/managed-by: kustomize | ||
gmc/platform: xeon | ||
name: searchqa | ||
namespace: searchqa | ||
spec: | ||
routerConfig: | ||
name: router | ||
serviceName: router-service | ||
nodes: | ||
root: | ||
routerType: Sequence | ||
steps: | ||
- name: Embedding | ||
internalService: | ||
serviceName: embedding-svc | ||
config: | ||
endpoint: /v1/embeddings | ||
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc | ||
- name: TeiEmbedding | ||
internalService: | ||
serviceName: tei-embedding-svc | ||
isDownstreamService: true | ||
- name: WebRetriever | ||
data: $response | ||
internalService: | ||
serviceName: web-retriever-svc | ||
config: | ||
endpoint: /v1/web_retrieval | ||
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc | ||
GOOGLE_API_KEY: "insert-your-google-api-key-here" | ||
GOOGLE_CSE_ID: "insert-your-google-cse-id-here" | ||
- name: Reranking | ||
data: $response | ||
internalService: | ||
serviceName: reranking-svc | ||
config: | ||
endpoint: /v1/reranking | ||
TEI_RERANKING_ENDPOINT: tei-reranking-svc | ||
- name: TeiReranking | ||
internalService: | ||
serviceName: tei-reranking-svc | ||
config: | ||
endpoint: /rerank | ||
isDownstreamService: true | ||
- name: Llm | ||
data: $response | ||
internalService: | ||
serviceName: llm-svc | ||
config: | ||
endpoint: /v1/chat/completions | ||
TGI_LLM_ENDPOINT: tgi-service-m | ||
- name: Tgi | ||
internalService: | ||
serviceName: tgi-service-m | ||
config: | ||
endpoint: /generate | ||
isDownstreamService: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
#!/bin/bash | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
set -xe | ||
USER_ID=$(whoami) | ||
LOG_PATH=/home/$(whoami)/logs | ||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub | ||
IMAGE_REPO=${IMAGE_REPO:-} | ||
|
||
function install_searchqa() { | ||
kubectl create ns $APP_NAMESPACE | ||
sed -i "s|namespace: searchqa|namespace: $APP_NAMESPACE|g" ./searchQnA_gaudi.yaml | ||
sed -i "s|insert-your-google-api-key-here|$GOOGLE_API_KEY|g" ./searchQnA_gaudi.yaml | ||
sed -i "s|insert-your-google-cse-id-here|$GOOGLE_CSE_ID|g" ./searchQnA_gaudi.yaml | ||
kubectl apply -f ./searchQnA_gaudi.yaml | ||
|
||
# Wait until the router service is ready | ||
echo "Waiting for the searchqa router service to be ready..." | ||
wait_until_pod_ready "searchqa router" $APP_NAMESPACE "router-service" | ||
output=$(kubectl get pods -n $APP_NAMESPACE) | ||
echo $output | ||
} | ||
|
||
function validate_searchqa() { | ||
# deploy client pod for testing | ||
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity | ||
|
||
# wait for client pod ready | ||
wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test" | ||
# giving time to populating data | ||
sleep 60 | ||
|
||
kubectl get pods -n $APP_NAMESPACE | ||
# send request to searchqa | ||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) | ||
echo "$CLIENT_POD" | ||
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='searchqa')].status.accessUrl}") | ||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the latest news? Give me also the source link."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_searchqa.log | ||
exit_code=$? | ||
if [ $exit_code -ne 0 ]; then | ||
echo "chatqna failed, please check the logs in ${LOG_PATH}!" | ||
exit 1 | ||
fi | ||
|
||
echo "Checking response results, make sure the output is reasonable. " | ||
local status=false | ||
if [[ -f $LOG_PATH/gmc_searchqa.log ]] && \ | ||
[[ $(grep -c "[DONE]" $LOG_PATH/gmc_searchqa.log) != 0 ]]; then | ||
status=true | ||
fi | ||
if [ $status == false ]; then | ||
if [[ -f $LOG_PATH/gmc_searchqa.log ]]; then | ||
cat $LOG_PATH/gmc_searchqa.log | ||
fi | ||
echo "Response check failed, please check the logs in artifacts!" | ||
cat $LOG_PATH/gmc_searchqa.log | ||
exit 1 | ||
else | ||
echo "Response check succeed!" | ||
fi | ||
} | ||
|
||
function wait_until_pod_ready() { | ||
echo "Waiting for the $1 to be ready..." | ||
max_retries=30 | ||
retry_count=0 | ||
while ! is_pod_ready $2 $3; do | ||
if [ $retry_count -ge $max_retries ]; then | ||
echo "$1 is not ready after waiting for a significant amount of time" | ||
get_gmc_controller_logs | ||
exit 1 | ||
fi | ||
echo "$1 is not ready yet. Retrying in 10 seconds..." | ||
sleep 10 | ||
output=$(kubectl get pods -n $2) | ||
echo $output | ||
retry_count=$((retry_count + 1)) | ||
done | ||
} | ||
|
||
function is_pod_ready() { | ||
if [ "$2" == "gmc-controller" ]; then | ||
pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') | ||
else | ||
pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') | ||
fi | ||
if [ "$pod_status" == "True" ]; then | ||
return 0 | ||
else | ||
return 1 | ||
fi | ||
} | ||
|
||
function get_gmc_controller_logs() { | ||
# Fetch the name of the pod with the app-name gmc-controller in the specified namespace | ||
pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}') | ||
|
||
# Check if the pod name was found | ||
if [ -z "$pod_name" ]; then | ||
echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE" | ||
return 1 | ||
fi | ||
|
||
# Get the logs of the found pod | ||
echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..." | ||
kubectl logs $pod_name -n $SYSTEM_NAMESPACE | ||
} | ||
|
||
if [ $# -eq 0 ]; then | ||
echo "Usage: $0 <function_name>" | ||
exit 1 | ||
fi | ||
|
||
case "$1" in | ||
install_SearchQnA) | ||
pushd SearchQnA/kubernetes | ||
install_searchqa | ||
popd | ||
;; | ||
validate_SearchQnA) | ||
pushd SearchQnA/kubernetes | ||
validate_searchqa | ||
popd | ||
;; | ||
*) | ||
echo "Unknown function: $1" | ||
;; | ||
esac |
Oops, something went wrong.