Skip to content

Commit

Permalink
EdgeCraftRAG: Add E2E test cases for EdgeCraftRAG - local LLM and vllm (
Browse files Browse the repository at this point in the history
#1137)

Signed-off-by: Zhang, Rui <rui2.zhang@intel.com>
Signed-off-by: Mingyuan Qi <mingyuan.qi@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Mingyuan Qi <mingyuan.qi@intel.com>
  • Loading branch information
3 people authored Nov 17, 2024
1 parent cbe952e commit 7949045
Show file tree
Hide file tree
Showing 8 changed files with 524 additions and 3 deletions.
92 changes: 92 additions & 0 deletions EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

services:
server:
image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
container_name: edgecraftrag-server
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_ENDPOINT: ${HF_ENDPOINT}
vLLM_ENDPOINT: ${vLLM_ENDPOINT}
volumes:
- ${MODEL_PATH:-${PWD}}:/home/user/models
- ${DOC_PATH:-${PWD}}:/home/user/docs
- ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
ports:
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
devices:
- /dev/dri:/dev/dri
group_add:
- ${VIDEOGROUPID:-44}
- ${RENDERGROUPID:-109}
ecrag:
image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
container_name: edgecraftrag
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
ports:
- ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011}
depends_on:
- server
ui:
image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
container_name: edgecraftrag-ui
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
volumes:
- ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
ports:
- ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
restart: always
depends_on:
- server
- ecrag
vllm-openvino-server:
container_name: vllm-openvino-server
image: opea/vllm-arc:latest
ports:
- ${VLLM_SERVICE_PORT:-8008}:80
environment:
HTTPS_PROXY: ${https_proxy}
HTTP_PROXY: ${https_proxy}
VLLM_OPENVINO_DEVICE: GPU
HF_ENDPOINT: ${HF_ENDPOINT}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
volumes:
- /dev/dri/by-path:/dev/dri/by-path
- $HOME/.cache/huggingface:/root/.cache/huggingface
devices:
- /dev/dri
group_add:
- ${VIDEOGROUPID:-44}
- ${RENDERGROUPID:-109}
entrypoint: /bin/bash -c "\
cd / && \
export VLLM_CPU_KVCACHE_SPACE=50 && \
export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
python3 -m vllm.entrypoints.openai.api_server \
--model '${LLM_MODEL}' \
--max_model_len=4096 \
--host 0.0.0.0 \
--port 80"
networks:
default:
driver: bridge
53 changes: 53 additions & 0 deletions EdgeCraftRAG/tests/common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"

echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..."
local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do
sleep 1
done
local HTTP_STATUS="${RESPONSE: -3}"
local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log)

if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}

function check_gpu_usage() {
echo $date > ${LOG_PATH}/gpu.log
pci_address=$(lspci | grep -i '56a0' | awk '{print $1}')
gpu_stats=$(sudo xpu-smi stats -d 0000:"$pci_address") #TODO need sudo
gpu_utilization=$(echo "$gpu_stats" | grep -i "GPU Utilization" | awk -F'|' '{print $3}' | awk '{print $1}')
memory_used=$(echo "$gpu_stats" | grep -i "GPU Memory Used" | awk -F'|' '{print $3}' | awk '{print $1}')
memory_util=$(echo "$gpu_stats" | grep -i "GPU Memory Util" | awk -F'|' '{print $3}' | awk '{print $1}')

echo "GPU Utilization (%): $gpu_utilization" >> ${LOG_PATH}/gpu.log
echo "GPU Memory Used (MiB): $memory_used" >> ${LOG_PATH}/gpu.log
echo "GPU Memory Util (%): $memory_util" >> ${LOG_PATH}/gpu.log

if [ "$memory_used" -lt 1024 ]; then
echo "GPU Memory Used is less than 1G. Please check."
exit 1
fi
}
3 changes: 3 additions & 0 deletions EdgeCraftRAG/tests/configs/test_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"text": "A test case for the rag pipeline. The test id is 1234567890. There are several tests in this test case. The first test is for node parser. There are 3 types of node parsers. Their names are Aa, Bb and Cc. The second test is for indexer. The indexer will do the indexing for the given nodes. The last test is for retriever. Retrieving text is based on similarity search."
}
44 changes: 44 additions & 0 deletions EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"name": "rag_test_local_llm",
"node_parser": {
"chunk_size": 400,
"chunk_overlap": 48,
"parser_type": "simple"
},
"indexer": {
"indexer_type": "faiss_vector",
"embedding_model": {
"model_id": "BAAI/bge-small-en-v1.5",
"model_path": "./models/BAAI/bge-small-en-v1.5",
"device": "auto",
"weight": "INT4"
}
},
"retriever": {
"retriever_type": "vectorsimilarity",
"retrieve_topk": 30
},
"postprocessor": [
{
"processor_type": "reranker",
"top_n": 2,
"reranker_model": {
"model_id": "BAAI/bge-reranker-large",
"model_path": "./models/BAAI/bge-reranker-large",
"device": "auto",
"weight": "INT4"
}
}
],
"generator": {
"model": {
"model_id": "Qwen/Qwen2-7B-Instruct",
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights",
"device": "auto",
"weight": "INT4"
},
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
"inference_type": "local"
},
"active": "True"
}
44 changes: 44 additions & 0 deletions EdgeCraftRAG/tests/configs/test_pipeline_vllm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"name": "rag_test_local_llm",
"node_parser": {
"chunk_size": 400,
"chunk_overlap": 48,
"parser_type": "simple"
},
"indexer": {
"indexer_type": "faiss_vector",
"embedding_model": {
"model_id": "BAAI/bge-small-en-v1.5",
"model_path": "./models/BAAI/bge-small-en-v1.5",
"device": "auto",
"weight": "INT4"
}
},
"retriever": {
"retriever_type": "vectorsimilarity",
"retrieve_topk": 30
},
"postprocessor": [
{
"processor_type": "reranker",
"top_n": 2,
"reranker_model": {
"model_id": "BAAI/bge-reranker-large",
"model_path": "./models/BAAI/bge-reranker-large",
"device": "auto",
"weight": "INT4"
}
}
],
"generator": {
"model": {
"model_id": "Qwen/Qwen2-7B-Instruct",
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights",
"device": "auto",
"weight": "INT4"
},
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
"inference_type": "vllm"
},
"active": "True"
}
113 changes: 113 additions & 0 deletions EdgeCraftRAG/tests/test_compose_on_arc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

set -e
source ./common.sh

IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}

WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

ip_address=$(hostname -I | awk '{print $1}')
HOST_IP=$ip_address

COMPOSE_FILE="compose.yaml"
EC_RAG_SERVICE_PORT=16010
#MODEL_PATH="$WORKPATH/models"
MODEL_PATH="/home/media/models"
HF_ENDPOINT=https://hf-mirror.com


function build_docker_images() {
cd $WORKPATH/docker_image_build
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="server ui ecrag"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

docker images && sleep 1s
}

function start_services() {
export MODEL_PATH=${MODEL_PATH}
export HOST_IP=${HOST_IP}
export LLM_MODEL=${LLM_MODEL}
export HF_ENDPOINT=${HF_ENDPOINT}
export vLLM_ENDPOINT=${vLLM_ENDPOINT}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export no_proxy="localhost, 127.0.0.1, 192.168.1.1"

cd $WORKPATH/docker_compose/intel/gpu/arc

# Start Docker Containers
docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log
sleep 20
}

function validate_rag() {
cd $WORKPATH/tests

# setup pipeline
validate_services \
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \
"active" \
"pipeline" \
"edgecraftrag-server" \
'@configs/test_pipeline_local_llm.json'

# add data
validate_services \
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \
"Done" \
"data" \
"edgecraftrag-server" \
'@configs/test_data.json'

# query
validate_services \
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \
"1234567890" \
"query" \
"vllm-openvino-server" \
'{"messages":"What is the test id?"}'
}

function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${HOST_IP}:16011/v1/chatqna" \
"1234567890" \
"query" \
"vllm-openvino-server" \
'{"messages":"What is the test id?"}'
}

function stop_docker() {
cd $WORKPATH/docker_compose/intel/gpu/arc
docker compose -f $COMPOSE_FILE down
}


function main() {
mkdir -p $LOG_PATH

stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
echo "EC_RAG service started" && sleep 1s

validate_rag
validate_megaservice

stop_docker
echo y | docker system prune

}

main
Loading

0 comments on commit 7949045

Please sign in to comment.