Skip to content

Commit

Permalink
Add image build job in docker compose e2e gaudi test in CI (#305)
Browse files Browse the repository at this point in the history
Signed-off-by: Yingchun Guo <yingchun.guo@intel.com>
  • Loading branch information
daisy-ycguo authored Jun 21, 2024
1 parent 01eed84 commit 4fecd6a
Show file tree
Hide file tree
Showing 8 changed files with 244 additions and 50 deletions.
40 changes: 22 additions & 18 deletions .github/workflows/manifest-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ jobs:
uses: ./.github/workflows/reuse-get-test-matrix.yml
with:
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
xeon_server_label: 'k8s'
gaudi_server_label: ""
xeon_server_label: 'xeon'
gaudi_server_label: 'gaudi'

mega-image-build:
needs: job1
Expand All @@ -34,12 +34,13 @@ jobs:
with:
image_tag: ${{ github.event.pull_request.head.sha }}
mega_service: "${{ matrix.example }}"
runner_label: "docker-build-${{ matrix.hardware }}"

manifest-test:
needs: [job1, mega-image-build]
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
runs-on: ${{ matrix.hardware }}
runs-on: "k8s-${{ matrix.hardware }}"
continue-on-error: true
steps:
- name: E2e test manifest
Expand All @@ -62,37 +63,40 @@ jobs:
echo "NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
echo "continue_test=true" >> $GITHUB_ENV
echo "should_cleanup=false" >> $GITHUB_ENV
echo "skip_validate=true" >> $GITHUB_ENV
echo "NAMESPACE=$NAMESPACE"
- name: Initialize manifest testing
run: |
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_xeon.sh init_${{ matrix.example }}
- name: Kubectl install
id: install
run: |
echo "should_cleanup=true" >> $GITHUB_ENV
kubectl create ns $NAMESPACE
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_xeon.sh install_${{ matrix.example }} $NAMESPACE
echo "Testing ${{ matrix.example }}, waiting for pod ready..."
if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Testing manifests ${{ matrix.example }}, waiting for pod ready done!"
echo "skip_validate=false" >> $GITHUB_ENV
if [[ ! -f ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh ]]; then
echo "No test script found, exist test!"
exit 0
else
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
exit 1
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh init_${{ matrix.example }}
echo "should_cleanup=true" >> $GITHUB_ENV
kubectl create ns $NAMESPACE
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh install_${{ matrix.example }} $NAMESPACE
echo "Testing ${{ matrix.example }}, waiting for pod ready..."
if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Testing manifests ${{ matrix.example }}, waiting for pod ready done!"
echo "skip_validate=false" >> $GITHUB_ENV
else
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
exit 1
fi
sleep 60
fi
sleep 60
- name: Validate e2e test
if: always()
run: |
if $skip_validate; then
echo "Skip validate"
else
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_xeon.sh validate_${{ matrix.example }} $NAMESPACE
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }} $NAMESPACE
fi
- name: Kubectl uninstall
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/scripts/build_push.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ function docker_build() {
echo "Building ${IMAGE_REPO}opea/$1:$IMAGE_TAG using Dockerfile $DOCKERFILE_PATH"
# if https_proxy and http_proxy are set, pass them to docker build
if [ -z "$https_proxy" ]; then
#docker build --no-cache -t ${IMAGE_REPO}opea/$1:$IMAGE_TAG -f $DOCKERFILE_PATH .
docker build -t ${IMAGE_REPO}opea/$1:$IMAGE_TAG -f $DOCKERFILE_PATH .
docker build --no-cache -t ${IMAGE_REPO}opea/$1:$IMAGE_TAG -f $DOCKERFILE_PATH .
else
docker build --no-cache -t ${IMAGE_REPO}opea/$1:$IMAGE_TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $DOCKERFILE_PATH .
fi
Expand Down
15 changes: 9 additions & 6 deletions ChatQnA/tests/test_chatqna_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,12 @@ function start_services() {

sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env

# Replace the container name with a test-specific name
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" docker_compose.yaml
sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" docker_compose.yaml
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose.yaml
# Start Docker Containers
# TODO: Replace the container name with a test-specific name

docker compose -f docker_compose.yaml up -d
n=0
until [[ "$n" -ge 200 ]]; do
Expand Down Expand Up @@ -213,13 +216,13 @@ function main() {

stop_docker
begin_time=$(date +%s)
build_docker_images
start_time=$(date +%s)
# build_docker_images
# start_time=$(date +%s)
start_services
end_time=$(date +%s)
minimal_duration=$((end_time-start_time))
# minimal_duration=$((end_time-start_time))
maximal_duration=$((end_time-begin_time))
echo "Mega service start minimal duration is "$minimal_duration"s, maximal duration(including docker image build) is "$maximal_duration"s"
echo "Mega service start duration is "$maximal_duration"s"

validate_microservices
validate_megaservice
Expand Down
111 changes: 111 additions & 0 deletions ChatQnA/tests/test_manifest_on_gaudi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

set -xe
USER_ID=$(whoami)
LOG_PATH=/home/$(whoami)/logs
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
IMAGE_REPO=${IMAGE_REPO:-}
IMAGE_TAG=${IMAGE_TAG:-latest}

function init_chatqna() {
# replace the mount dir "path: /mnt" with "path: $CHART_MOUNT"
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/models#path: $MOUNT_DIR#g" {} \;
# replace megaservice image tag
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" {} \;
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" {} \;
# set huggingface token
find . -name '*.yaml' -type f -exec sed -i "s#\${HUGGINGFACEHUB_API_TOKEN}#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
}

function install_chatqna {
# replace namespace "default" with real namespace
find . -name '*.yaml' -type f -exec sed -i "s#default.svc#$NAMESPACE.svc#g" {} \;
# for very yaml file in yaml_files, apply it to the k8s cluster
yaml_files=("qna_configmap_gaudi" "redis-vector-db" "tei_embedding_gaudi_service" "tei_reranking_service" "tgi_gaudi_service" "retriever" "embedding" "reranking" "llm")
for yaml_file in ${yaml_files[@]}; do
kubectl apply -f $yaml_file.yaml -n $NAMESPACE
done
sleep 60
kubectl apply -f chaqna-xeon-backend-server.yaml -n $NAMESPACE
}

function validate_chatqna() {
max_retry=20
# make sure microservice retriever is ready
# try to curl retriever-svc for max_retry times
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
for ((i=1; i<=max_retry; i++))
do
curl http://retriever-svc.$NAMESPACE:7000/v1/retrieval -X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
-H 'Content-Type: application/json' && break
sleep 10
done
# make sure microservice tgi-svc is ready
for ((i=1; i<=max_retry; i++))
do
curl http://tgi-gaudi-svc.$NAMESPACE:9009/generate -X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json' && break
sleep 10
done
# if i is bigger than max_retry, then exit with error
if [ $i -gt $max_retry ]; then
echo "Microservice failed, exit with error."
exit 1
fi

# check megaservice works
# generate a random logfile name to avoid conflict among multiple runners
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
curl http://chaqna-xeon-backend-server-svc.$NAMESPACE:8888/v1/chatqna -H "Content-Type: application/json" -d '{
"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "Megaservice failed, please check the logs in $LOGFILE!"
exit 1
fi

echo "Checking response results, make sure the output is reasonable. "
local status=false
if [[ -f $LOGFILE ]] &&
[[ $(grep -c "billion" $LOGFILE) != 0 ]]; then
status=true
fi
if [ $status == false ]; then
echo "Response check failed, please check the logs in artifacts!"
exit 1
else
echo "Response check succeed!"
fi
}

if [ $# -eq 0 ]; then
echo "Usage: $0 <function_name>"
exit 1
fi

case "$1" in
init_ChatQnA)
pushd ChatQnA/kubernetes/manifests
init_chatqna
popd
;;
install_ChatQnA)
pushd ChatQnA/kubernetes/manifests
NAMESPACE=$2
install_chatqna
popd
;;
validate_ChatQnA)
NAMESPACE=$2
SERVICE_NAME=chaqna-xeon-backend-server-svc
validate_chatqna
;;
*)
echo "Unknown function: $1"
;;
esac
25 changes: 5 additions & 20 deletions CodeGen/tests/test_codegen_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,6 @@ WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')

function build_docker_images() {
cd $WORKPATH
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps

docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .

docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1

cd $WORKPATH/docker
docker build --no-cache -t opea/codegen:latest -f Dockerfile .

cd $WORKPATH/docker/ui
docker build --no-cache -t opea/codegen-ui:latest -f docker/Dockerfile .

docker images
}

function start_services() {
cd $WORKPATH/docker/gaudi

Expand All @@ -38,8 +20,12 @@ function start_services() {

sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env

# Replace the container name with a test-specific name
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
sed -i "s#image: opea/codegen:latest#image: opea/codegen:${IMAGE_TAG}#g" docker_compose.yaml
sed -i "s#image: opea/codegen-ui:latest#image: opea/codegen-ui:${IMAGE_TAG}#g" docker_compose.yaml
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose.yaml
# Start Docker Containers
# TODO: Replace the container name with a test-specific name
docker compose -f docker_compose.yaml up -d

sleep 2m # Waits 2 minutes
Expand Down Expand Up @@ -141,7 +127,6 @@ function main() {

stop_docker

build_docker_images
start_services

validate_microservices
Expand Down
84 changes: 84 additions & 0 deletions CodeGen/tests/test_manifest_on_gaudi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

set -xe
USER_ID=$(whoami)
LOG_PATH=/home/$(whoami)/logs
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
IMAGE_REPO=${IMAGE_REPO:-}
IMAGE_TAG=${IMAGE_TAG:-latest}

function init_codegen() {
# executed under path manifest/codegen/xeon
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \;
# replace megaservice image tag
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/codegen:latest#image: opea/codegen:${IMAGE_TAG}#g" {} \;
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \;
# set huggingface token
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
}

function install_codegen {
echo "namespace is $NAMESPACE"
kubectl apply -f . -n $NAMESPACE
}

function validate_codegen() {
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
echo "try to curl http://${ip_address}:${port}/v1/codegen..."

# generate a random logfile name to avoid conflict among multiple runners
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
# Curl the Mega Service
curl http://${ip_address}:${port}/v1/codegen -H "Content-Type: application/json" \
-d '{"messages": "def print_hello_world():"}' > $LOGFILE
exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "Megaservice codegen failed, please check the logs in $LOGFILE!"
exit 1
fi

echo "Checking response results, make sure the output is reasonable. "
local status=false
if [[ -f $LOGFILE ]] && \
[[ $(grep -c "print" $LOGFILE) != 0 ]]; then
status=true
fi

if [ $status == false ]; then
echo "Response check failed, please check the logs in artifacts!"
else
echo "Response check succeed!"
fi
}

if [ $# -eq 0 ]; then
echo "Usage: $0 <function_name>"
exit 1
fi

case "$1" in
init_CodeGen)
pushd CodeGen/kubernetes/manifests/gaudi
init_codegen
popd
;;
install_CodeGen)
pushd CodeGen/kubernetes/manifests/gaudi
NAMESPACE=$2
install_codegen
popd
;;
validate_CodeGen)
NAMESPACE=$2
SERVICE_NAME=codegen
validate_codegen
;;
*)
echo "Unknown function: $1"
;;
esac
8 changes: 6 additions & 2 deletions CodeTrans/tests/test_codetrans_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,12 @@ function start_services() {

sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env

# Replace the container name with a test-specific name
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
sed -i "s#image: opea/codetrans:latest#image: opea/codetrans:${IMAGE_TAG}#g" docker_compose.yaml
sed -i "s#image: opea/codetrans-ui:latest#image: opea/codetrans-ui:${IMAGE_TAG}#g" docker_compose.yaml
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose.yaml
# Start Docker Containers
# TODO: Replace the container name with a test-specific name
docker compose -f docker_compose.yaml up -d

sleep 2m # Waits 2 minutes
Expand Down Expand Up @@ -141,7 +145,7 @@ function main() {

stop_docker

build_docker_images
# build_docker_images
start_services

validate_microservices
Expand Down
Loading

0 comments on commit 4fecd6a

Please sign in to comment.