From 00c2f041bdf9965befd35f946eba4406331a1221 Mon Sep 17 00:00:00 2001 From: ligang Date: Wed, 4 Dec 2024 07:36:57 +0800 Subject: [PATCH 1/5] [DocIndexRetriever] enable the without-rerank flavor Without-rerank flavor for customers that design to handle all documents by LLM and require high performance of DocRetriever Signed-off-by: Li Gang --- .../docker_compose/intel/cpu/xeon/README.md | 10 ++ .../cpu/xeon/compose_without_rerank.yaml | 103 ++++++++++++++++++ DocIndexRetriever/retrieval_tool.py | 32 +++++- 3 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md index 58354babfa..04cf7ec254 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md @@ -62,6 +62,16 @@ cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/ docker compose up -d ``` +Two types of DocRetriever pipeline are supported now: `DocRetriever with/without Rerank`. And the `DocRetriever without Rerank` pipeline (including Embedding and Retrieval) is offered for customers who expect to handle all retrieved documents by LLM, and require high performance of DocRetriever. +In that case, start Docker Containers with compose_without_rerank.yaml +```bash +export host_ip="YOUR IP ADDR" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/ +docker compose -f compose_without_rerank.yaml up -d +``` + ## 4. Validation Add Knowledge Base via HTTP Links: diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml new file mode 100644 index 0000000000..be67c43dca --- /dev/null +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml @@ -0,0 +1,103 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "6007:6007" + - "6008:6008" + - "6009:6009" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db + INDEX_NAME: ${INDEX_NAME:-rag-redis} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "/home/ligang/models:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + embedding: + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} + container_name: embedding-tei-server + ports: + - "6000:6000" + ipc: host + depends_on: + - tei-embedding-service + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + restart: unless-stopped + retriever: + image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + INDEX_NAME: ${INDEX_NAME:-rag-redis} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + restart: unless-stopped + doc-index-retriever-server: +# image: ${REGISTRY:-opea}/doc-index-retriever:${TAG:-latest} + image: opea/kb:dev + container_name: doc-index-retriever-server + depends_on: + - redis-vector-db + - tei-embedding-service + - embedding + - retriever + ports: + - "8889:8889" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-0.0.0.0} + EMBEDDING_SERVICE_HOST_IP: embedding + EMBEDDING_SERVICE_PORT: ${EMBEDDING_SERVER_PORT:-6000} + RETRIEVER_SERVICE_HOST_IP: retriever + LOGFLAG: ${LOGFLAG} + ipc: host + restart: always + command: --without-rerank + +networks: + default: + driver: bridge diff --git a/DocIndexRetriever/retrieval_tool.py b/DocIndexRetriever/retrieval_tool.py index d3adc8d352..d8651d33db 100644 --- a/DocIndexRetriever/retrieval_tool.py +++ b/DocIndexRetriever/retrieval_tool.py @@ -1,6 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import argparse import asyncio import os @@ -53,7 +54,36 @@ def add_remote_service(self): self.megaservice.flow_to(retriever, rerank) self.gateway = RetrievalToolGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) + def add_remote_service_without_rerank(self): + embedding = MicroService( + name="embedding", + host=EMBEDDING_SERVICE_HOST_IP, + port=EMBEDDING_SERVICE_PORT, + endpoint="/v1/embeddings", + use_remote_service=True, + service_type=ServiceType.EMBEDDING, + ) + retriever = MicroService( + name="retriever", + host=RETRIEVER_SERVICE_HOST_IP, + port=RETRIEVER_SERVICE_PORT, + endpoint="/v1/retrieval", + use_remote_service=True, + service_type=ServiceType.RETRIEVER, + ) + + self.megaservice.add(embedding).add(retriever) + self.megaservice.flow_to(embedding, retriever) + self.gateway = RetrievalToolGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--without-rerank", action="store_true") + + args = parser.parse_args() + chatqna = RetrievalToolService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) - chatqna.add_remote_service() + if args.without_rerank: + chatqna.add_remote_service_without_rerank() + else: + chatqna.add_remote_service() From bb40f9645eca0a3f2d64e2add6b58e51379c3d12 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 01:52:06 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md | 1 + DocIndexRetriever/retrieval_tool.py | 1 + 2 files changed, 2 insertions(+) diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md index 04cf7ec254..3ad27345cb 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md @@ -64,6 +64,7 @@ docker compose up -d Two types of DocRetriever pipeline are supported now: `DocRetriever with/without Rerank`. And the `DocRetriever without Rerank` pipeline (including Embedding and Retrieval) is offered for customers who expect to handle all retrieved documents by LLM, and require high performance of DocRetriever. In that case, start Docker Containers with compose_without_rerank.yaml + ```bash export host_ip="YOUR IP ADDR" export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} diff --git a/DocIndexRetriever/retrieval_tool.py b/DocIndexRetriever/retrieval_tool.py index d8651d33db..b53cf88860 100644 --- a/DocIndexRetriever/retrieval_tool.py +++ b/DocIndexRetriever/retrieval_tool.py @@ -76,6 +76,7 @@ def add_remote_service_without_rerank(self): self.megaservice.flow_to(embedding, retriever) self.gateway = RetrievalToolGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--without-rerank", action="store_true") From 228ffb147438189ea4efb7575a65a06cc3c439cc Mon Sep 17 00:00:00 2001 From: Li Gang Date: Thu, 5 Dec 2024 16:38:59 +0800 Subject: [PATCH 3/5] [DocIndexRetriever] Add test script for without rerank flavor Signed-off-by: Li Gang --- .../test_compose_on_xeon_without_rerank.sh | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 DocIndexRetriever/tests/test_compose_on_xeon_without_rerank.sh diff --git a/DocIndexRetriever/tests/test_compose_on_xeon_without_rerank.sh b/DocIndexRetriever/tests/test_compose_on_xeon_without_rerank.sh new file mode 100644 index 0000000000..0298a8a55b --- /dev/null +++ b/DocIndexRetriever/tests/test_compose_on_xeon_without_rerank.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + echo "Building Docker Images...." + cd $WORKPATH/docker_image_build + if [ ! -d "GenAIComps" ] ; then + echo "Cloning GenAIComps repository" + git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ + fi + service_list="dataprep-redis embedding-tei retriever-redis doc-index-retriever" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + docker pull redis/redis-stack:7.2.0-v9 + docker images && sleep 1s + + echo "Docker images built!" +} + +function start_services() { + echo "Starting Docker Services...." + cd $WORKPATH/docker_compose/intel/cpu/xeon + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" + export REDIS_URL="redis://${ip_address}:6379" + export INDEX_NAME="rag-redis" + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export MEGA_SERVICE_HOST_IP=${ip_address} + export EMBEDDING_SERVICE_HOST_IP=${ip_address} + export RETRIEVER_SERVICE_HOST_IP=${ip_address} + + # Start Docker Containers + docker compose -f compose_without_rerank.yaml up -d + sleep 5m + echo "Docker services started!" +} + +function validate() { + local CONTENT="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT." + echo 0 + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + echo 1 + fi +} + +function validate_megaservice() { + echo "===========Ingest data==================" + local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F 'link_list=["https://opea.dev/"]') + local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + docker logs dataprep-redis-server | tee -a ${LOG_PATH}/dataprep-redis-service-xeon.log + return 1 + fi + + # Curl the Mega Service + echo "================Testing retriever service: Text Request ================" + cd $WORKPATH/tests + local CONTENT=$(http_proxy="" curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ + "text": "Explain the OPEA project?" + }') + # local CONTENT=$(python test.py --host_ip ${ip_address} --request_type text) + local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + echo "=============Embedding container log==================" + docker logs embedding-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + echo "=============Retriever container log==================" + docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + echo "=============Doc-index-retriever container log==================" + docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + exit 1 + fi + + echo "================Testing retriever service: ChatCompletion Request================" + cd $WORKPATH/tests + local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion) + local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + echo "=============Embedding container log==================" + docker logs embedding-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + echo "=============Retriever container log==================" + docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + echo "=============Doc-index-retriever container log==================" + docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon + container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2) + for container_name in $container_list; do + cid=$(docker ps -aq --filter "name=$container_name") + echo "Stopping container $container_name" + if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi + done +} + +function main() { + + stop_docker + build_docker_images + echo "Dump current docker ps" + docker ps + start_time=$(date +%s) + start_services + end_time=$(date +%s) + duration=$((end_time-start_time)) + echo "Mega service start duration is $duration s" + validate_megaservice + + stop_docker + echo y | docker system prune + +} + +main From 7b1ceae0ba511a422776e8e344008c5573c521e3 Mon Sep 17 00:00:00 2001 From: Li Gang Date: Fri, 6 Dec 2024 07:25:19 +0800 Subject: [PATCH 4/5] rework patch for CI test scripts Signed-off-by: Li Gang --- .../docker_compose/intel/cpu/xeon/compose_without_rerank.yaml | 3 +-- ...ithout_rerank.sh => test_compose_without_rerank_on_xeon.sh} | 0 2 files changed, 1 insertion(+), 2 deletions(-) rename DocIndexRetriever/tests/{test_compose_on_xeon_without_rerank.sh => test_compose_without_rerank_on_xeon.sh} (100%) diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml index be67c43dca..986fcb41af 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml @@ -75,8 +75,7 @@ services: TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 restart: unless-stopped doc-index-retriever-server: -# image: ${REGISTRY:-opea}/doc-index-retriever:${TAG:-latest} - image: opea/kb:dev + image: ${REGISTRY:-opea}/doc-index-retriever:${TAG:-latest} container_name: doc-index-retriever-server depends_on: - redis-vector-db diff --git a/DocIndexRetriever/tests/test_compose_on_xeon_without_rerank.sh b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh similarity index 100% rename from DocIndexRetriever/tests/test_compose_on_xeon_without_rerank.sh rename to DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh From 2962713942da24283fa6bb05a3c7cf47185eae6e Mon Sep 17 00:00:00 2001 From: Li Gang Date: Tue, 10 Dec 2024 09:46:24 +0800 Subject: [PATCH 5/5] Rework patch to co-worked with new merged change Signed-off-by: Li Gang --- DocIndexRetriever/retrieval_tool.py | 1 - 1 file changed, 1 deletion(-) diff --git a/DocIndexRetriever/retrieval_tool.py b/DocIndexRetriever/retrieval_tool.py index 53ceede204..9581612a50 100644 --- a/DocIndexRetriever/retrieval_tool.py +++ b/DocIndexRetriever/retrieval_tool.py @@ -145,7 +145,6 @@ def add_remote_service_without_rerank(self): self.megaservice.add(embedding).add(retriever) self.megaservice.flow_to(embedding, retriever) - self.gateway = RetrievalToolGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) if __name__ == "__main__":