diff --git a/SearchQnA/kubernetes/README.md b/SearchQnA/kubernetes/README.md new file mode 100644 index 0000000000..6f8848bb42 --- /dev/null +++ b/SearchQnA/kubernetes/README.md @@ -0,0 +1,40 @@ +

Deploy SearchQnA in a Kubernetes Cluster

+ +This document outlines the deployment process for a Code Generation (SearchQnA) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines. + +Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install. + +If you have only Intel Xeon machines you could use the searchQnA_xeon.yaml file or if you have a Gaudi cluster you could use searchQnA_gaudi.yaml +In the below example we illustrate on Xeon. + +## Deploy the RAG application + +1. Create the desired namespace if it does not already exist and deploy the application +```bash +export APP_NAMESPACE=CT +kubectl create ns $APP_NAMESPACE +sed -i "s|namespace: searchqa|namespace: $APP_NAMESPACE|g" ./searchQnA_xeon.yaml +kubectl apply -f ./searchQnA_xeon.yaml +``` + +2. Check if the application is up and ready +```bash +kubectl get pods -n $APP_NAMESPACE +``` + +3. Deploy a client pod for testing +```bash +kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity +``` + +4. Check that client pod is ready +```bash + kubectl get pods -n $APP_NAMESPACE +``` + +5. Send request to application +```bash +export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) +export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='searchqa')].status.accessUrl}") +kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the latest news? Give me also the source link."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_searchqa.log +``` diff --git a/SearchQnA/kubernetes/searchQnA_gaudi.yaml b/SearchQnA/kubernetes/searchQnA_gaudi.yaml new file mode 100644 index 0000000000..8f70ea0fe7 --- /dev/null +++ b/SearchQnA/kubernetes/searchQnA_gaudi.yaml @@ -0,0 +1,65 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: gaudi + name: searchqa + namespace: searchqa +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Embedding + internalService: + serviceName: embedding-svc + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc + - name: TeiEmbeddingGaudi + internalService: + serviceName: tei-embedding-gaudi-svc + isDownstreamService: true + - name: WebRetriever + data: $response + internalService: + serviceName: web-retriever-svc + config: + endpoint: /v1/web_retrieval + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc + GOOGLE_API_KEY: "insert-your-google-api-key-here" + GOOGLE_CSE_ID: "insert-your-google-cse-id-here" + - name: Reranking + data: $response + internalService: + serviceName: reranking-svc + config: + endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc + - name: TeiReranking + internalService: + serviceName: tei-reranking-svc + config: + endpoint: /rerank + isDownstreamService: true + - name: Llm + data: $response + internalService: + serviceName: llm-svc + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-gaudi-svc + - name: TgiGaudi + internalService: + serviceName: tgi-gaudi-svc + config: + endpoint: /generate + isDownstreamService: true diff --git a/SearchQnA/kubernetes/searchQnA_xeon.yaml b/SearchQnA/kubernetes/searchQnA_xeon.yaml new file mode 100644 index 0000000000..85d445ce59 --- /dev/null +++ b/SearchQnA/kubernetes/searchQnA_xeon.yaml @@ -0,0 +1,65 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: xeon + name: searchqa + namespace: searchqa +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Embedding + internalService: + serviceName: embedding-svc + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc + isDownstreamService: true + - name: WebRetriever + data: $response + internalService: + serviceName: web-retriever-svc + config: + endpoint: /v1/web_retrieval + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc + GOOGLE_API_KEY: "insert-your-google-api-key-here" + GOOGLE_CSE_ID: "insert-your-google-cse-id-here" + - name: Reranking + data: $response + internalService: + serviceName: reranking-svc + config: + endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc + - name: TeiReranking + internalService: + serviceName: tei-reranking-svc + config: + endpoint: /rerank + isDownstreamService: true + - name: Llm + data: $response + internalService: + serviceName: llm-svc + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-m + - name: Tgi + internalService: + serviceName: tgi-service-m + config: + endpoint: /generate + isDownstreamService: true diff --git a/SearchQnA/tests/test_gmc_on_gaudi.sh b/SearchQnA/tests/test_gmc_on_gaudi.sh new file mode 100755 index 0000000000..86a474d571 --- /dev/null +++ b/SearchQnA/tests/test_gmc_on_gaudi.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +USER_ID=$(whoami) +LOG_PATH=/home/$(whoami)/logs +MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub +IMAGE_REPO=${IMAGE_REPO:-} + +function install_searchqa() { + kubectl create ns $APP_NAMESPACE + sed -i "s|namespace: searchqa|namespace: $APP_NAMESPACE|g" ./searchQnA_gaudi.yaml + sed -i "s|insert-your-google-api-key-here|$GOOGLE_API_KEY|g" ./searchQnA_gaudi.yaml + sed -i "s|insert-your-google-cse-id-here|$GOOGLE_CSE_ID|g" ./searchQnA_gaudi.yaml + kubectl apply -f ./searchQnA_gaudi.yaml + + # Wait until the router service is ready + echo "Waiting for the searchqa router service to be ready..." + wait_until_pod_ready "searchqa router" $APP_NAMESPACE "router-service" + output=$(kubectl get pods -n $APP_NAMESPACE) + echo $output +} + +function validate_searchqa() { + # deploy client pod for testing + kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity + + # wait for client pod ready + wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test" + # giving time to populating data + sleep 60 + + kubectl get pods -n $APP_NAMESPACE + # send request to searchqa + export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) + echo "$CLIENT_POD" + accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='searchqa')].status.accessUrl}") + kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the latest news? Give me also the source link."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_searchqa.log + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "chatqna failed, please check the logs in ${LOG_PATH}!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + local status=false + if [[ -f $LOG_PATH/gmc_searchqa.log ]] && \ + [[ $(grep -c "[DONE]" $LOG_PATH/gmc_searchqa.log) != 0 ]]; then + status=true + fi + if [ $status == false ]; then + if [[ -f $LOG_PATH/gmc_searchqa.log ]]; then + cat $LOG_PATH/gmc_searchqa.log + fi + echo "Response check failed, please check the logs in artifacts!" + cat $LOG_PATH/gmc_searchqa.log + exit 1 + else + echo "Response check succeed!" + fi +} + +function wait_until_pod_ready() { + echo "Waiting for the $1 to be ready..." + max_retries=30 + retry_count=0 + while ! is_pod_ready $2 $3; do + if [ $retry_count -ge $max_retries ]; then + echo "$1 is not ready after waiting for a significant amount of time" + get_gmc_controller_logs + exit 1 + fi + echo "$1 is not ready yet. Retrying in 10 seconds..." + sleep 10 + output=$(kubectl get pods -n $2) + echo $output + retry_count=$((retry_count + 1)) + done +} + +function is_pod_ready() { + if [ "$2" == "gmc-controller" ]; then + pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') + else + pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') + fi + if [ "$pod_status" == "True" ]; then + return 0 + else + return 1 + fi +} + +function get_gmc_controller_logs() { + # Fetch the name of the pod with the app-name gmc-controller in the specified namespace + pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}') + + # Check if the pod name was found + if [ -z "$pod_name" ]; then + echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE" + return 1 + fi + + # Get the logs of the found pod + echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..." + kubectl logs $pod_name -n $SYSTEM_NAMESPACE +} + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +case "$1" in + install_SearchQnA) + pushd SearchQnA/kubernetes + install_searchqa + popd + ;; + validate_SearchQnA) + pushd SearchQnA/kubernetes + validate_searchqa + popd + ;; + *) + echo "Unknown function: $1" + ;; +esac diff --git a/SearchQnA/tests/test_gmc_on_xeon.sh b/SearchQnA/tests/test_gmc_on_xeon.sh new file mode 100755 index 0000000000..8e577a2d91 --- /dev/null +++ b/SearchQnA/tests/test_gmc_on_xeon.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +USER_ID=$(whoami) +LOG_PATH=/home/$(whoami)/logs +MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub +IMAGE_REPO=${IMAGE_REPO:-} + +function install_searchqa() { + kubectl create ns $APP_NAMESPACE + sed -i "s|namespace: searchqa|namespace: $APP_NAMESPACE|g" ./searchQnA_xeon.yaml + sed -i "s|insert-your-google-api-key-here|$GOOGLE_API_KEY|g" ./searchQnA_xeon.yaml + sed -i "s|insert-your-google-cse-id-here|$GOOGLE_CSE_ID|g" ./searchQnA_xeon.yaml + kubectl apply -f ./searchQnA_xeon.yaml + + # Wait until the router service is ready + echo "Waiting for the searchqa router service to be ready..." + wait_until_pod_ready "searchqa router" $APP_NAMESPACE "router-service" + output=$(kubectl get pods -n $APP_NAMESPACE) + echo $output +} + +function validate_searchqa() { + # deploy client pod for testing + kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity + + # wait for client pod ready + wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test" + # giving time to populating data + sleep 60 + + kubectl get pods -n $APP_NAMESPACE + # send request to searchqa + export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) + echo "$CLIENT_POD" + accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='searchqa')].status.accessUrl}") + kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the latest news? Give me also the source link."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_searchqa.log + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "chatqna failed, please check the logs in ${LOG_PATH}!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + local status=false + if [[ -f $LOG_PATH/gmc_searchqa.log ]] && \ + [[ $(grep -c "[DONE]" $LOG_PATH/gmc_searchqa.log) != 0 ]]; then + status=true + fi + if [ $status == false ]; then + if [[ -f $LOG_PATH/gmc_searchqa.log ]]; then + cat $LOG_PATH/gmc_searchqa.log + fi + echo "Response check failed, please check the logs in artifacts!" + cat $LOG_PATH/gmc_searchqa.log + exit 1 + else + echo "Response check succeed!" + fi +} + +function wait_until_pod_ready() { + echo "Waiting for the $1 to be ready..." + max_retries=30 + retry_count=0 + while ! is_pod_ready $2 $3; do + if [ $retry_count -ge $max_retries ]; then + echo "$1 is not ready after waiting for a significant amount of time" + get_gmc_controller_logs + exit 1 + fi + echo "$1 is not ready yet. Retrying in 10 seconds..." + sleep 10 + output=$(kubectl get pods -n $2) + echo $output + retry_count=$((retry_count + 1)) + done +} + +function is_pod_ready() { + if [ "$2" == "gmc-controller" ]; then + pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') + else + pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') + fi + if [ "$pod_status" == "True" ]; then + return 0 + else + return 1 + fi +} + +function get_gmc_controller_logs() { + # Fetch the name of the pod with the app-name gmc-controller in the specified namespace + pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}') + + # Check if the pod name was found + if [ -z "$pod_name" ]; then + echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE" + return 1 + fi + + # Get the logs of the found pod + echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..." + kubectl logs $pod_name -n $SYSTEM_NAMESPACE +} + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +case "$1" in + install_SearchQnA) + pushd SearchQnA/kubernetes + install_searchqa + popd + ;; + validate_SearchQnA) + pushd SearchQnA/kubernetes + validate_searchqa + popd + ;; + *) + echo "Unknown function: $1" + ;; +esac