From 391c4a58014486443c27517f8926394bdacae49d Mon Sep 17 00:00:00 2001 From: Sharan Shirodkar <91109427+sharanshirodkar7@users.noreply.github.com> Date: Tue, 17 Sep 2024 09:57:06 -0400 Subject: [PATCH] Prediction Guard LLM component (#674) Signed-off-by: sharanshirodkar7 --- .../docker/compose/llms-compose-cd.yaml | 4 + .../predictionguard/Dockerfile | 15 ++++ .../text-generation/predictionguard/README.md | 54 ++++++++++++ .../predictionguard/__init__.py | 2 + .../predictionguard/docker_compose_llm.yaml | 20 +++++ .../predictionguard/entrypoint.sh | 8 ++ .../predictionguard/llm_predictionguard.py | 86 +++++++++++++++++++ .../predictionguard/requirements.txt | 12 +++ ...st_llms_text-generation_predictionguard.sh | 68 +++++++++++++++ 9 files changed, 269 insertions(+) create mode 100644 comps/llms/text-generation/predictionguard/Dockerfile create mode 100644 comps/llms/text-generation/predictionguard/README.md create mode 100644 comps/llms/text-generation/predictionguard/__init__.py create mode 100644 comps/llms/text-generation/predictionguard/docker_compose_llm.yaml create mode 100644 comps/llms/text-generation/predictionguard/entrypoint.sh create mode 100644 comps/llms/text-generation/predictionguard/llm_predictionguard.py create mode 100644 comps/llms/text-generation/predictionguard/requirements.txt create mode 100644 tests/llms/test_llms_text-generation_predictionguard.sh diff --git a/.github/workflows/docker/compose/llms-compose-cd.yaml b/.github/workflows/docker/compose/llms-compose-cd.yaml index cbf463bd10..84dc250c9f 100644 --- a/.github/workflows/docker/compose/llms-compose-cd.yaml +++ b/.github/workflows/docker/compose/llms-compose-cd.yaml @@ -23,3 +23,7 @@ services: build: dockerfile: comps/llms/text-generation/vllm/llama_index/dependency/Dockerfile.intel_hpu image: ${REGISTRY:-opea}/llm-vllm-llamaindex-hpu:${TAG:-latest} + llm-predictionguard: + build: + dockerfile: comps/llms/text-generation/predictionguard/Dockerfile + image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest} diff --git a/comps/llms/text-generation/predictionguard/Dockerfile b/comps/llms/text-generation/predictionguard/Dockerfile new file mode 100644 index 0000000000..2994a9e043 --- /dev/null +++ b/comps/llms/text-generation/predictionguard/Dockerfile @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/llms/text-generation/predictionguard/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/llms/text-generation/predictionguard + +ENTRYPOINT ["bash", "entrypoint.sh"] \ No newline at end of file diff --git a/comps/llms/text-generation/predictionguard/README.md b/comps/llms/text-generation/predictionguard/README.md new file mode 100644 index 0000000000..e506793d95 --- /dev/null +++ b/comps/llms/text-generation/predictionguard/README.md @@ -0,0 +1,54 @@ +# Introduction + +[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started. + +# Get Started + +## Build Docker Image + +```bash +cd ../../.. +docker build -t opea/llm-textgen-predictionguard:latest -f comps/llms/text-generation/predictionguard/Dockerfile . +``` + +## Run the Predictionguard Microservice + +```bash +docker run -d -p 9000:9000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY --name llm-textgen-predictionguard opea/llm-textgen-predictionguard:latest +``` + +# Consume the Prediction Guard Microservice + +See the [Prediction Guard docs](https://docs.predictionguard.com/) for available model options. + +## Without streaming + +```bash +curl -X POST http://localhost:9000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Hermes-2-Pro-Llama-3-8B", + "query": "Tell me a joke.", + "max_new_tokens": 100, + "temperature": 0.7, + "top_p": 0.9, + "top_k": 50, + "stream": false + }' +``` + +## With streaming + +```bash +curl -N -X POST http://localhost:9000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Hermes-2-Pro-Llama-3-8B", + "query": "Tell me a joke.", + "max_new_tokens": 100, + "temperature": 0.7, + "top_p": 0.9, + "top_k": 50, + "stream": true + }' +``` diff --git a/comps/llms/text-generation/predictionguard/__init__.py b/comps/llms/text-generation/predictionguard/__init__.py new file mode 100644 index 0000000000..a246c95e79 --- /dev/null +++ b/comps/llms/text-generation/predictionguard/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml b/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml new file mode 100644 index 0000000000..bde9fa10a9 --- /dev/null +++ b/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml @@ -0,0 +1,20 @@ +# Copyright (C) 2024 Prediction Guard, Inc +# SPDX-License-Identifier: Apache-2.0 + +services: + llm: + image: opea/llm-textgen-predictionguard:latest + container_name: llm-textgen-predictionguard + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/llms/text-generation/predictionguard/entrypoint.sh b/comps/llms/text-generation/predictionguard/entrypoint.sh new file mode 100644 index 0000000000..8220ff6399 --- /dev/null +++ b/comps/llms/text-generation/predictionguard/entrypoint.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identifier: Apache-2.0 + +#pip --no-cache-dir install -r requirements-runtime.txt + +python llm_predictionguard.py diff --git a/comps/llms/text-generation/predictionguard/llm_predictionguard.py b/comps/llms/text-generation/predictionguard/llm_predictionguard.py new file mode 100644 index 0000000000..ea70c11bc0 --- /dev/null +++ b/comps/llms/text-generation/predictionguard/llm_predictionguard.py @@ -0,0 +1,86 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identified: Apache-2.0 + + +import time + +from fastapi import FastAPI, HTTPException +from fastapi.responses import StreamingResponse +from predictionguard import PredictionGuard +from pydantic import BaseModel + +from comps import ( + GeneratedDoc, + LLMParamsDoc, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +client = PredictionGuard() +app = FastAPI() + + +@register_microservice( + name="opea_service@llm_predictionguard", + service_type=ServiceType.LLM, + endpoint="/v1/chat/completions", + host="0.0.0.0", + port=9000, +) +@register_statistics(names=["opea_service@llm_predictionguard"]) +def llm_generate(input: LLMParamsDoc): + start = time.time() + + messages = [ + { + "role": "system", + "content": "You are a helpful assistant. Your goal is to provide accurate, detailed, and safe responses to the user's queries.", + }, + {"role": "user", "content": input.query}, + ] + + if input.streaming: + + async def stream_generator(): + chat_response = "" + for res in client.chat.completions.create( + model=input.model, + messages=messages, + max_tokens=input.max_new_tokens, + temperature=input.temperature, + top_p=input.top_p, + top_k=input.top_k, + stream=True, + ): + if "choices" in res["data"] and "delta" in res["data"]["choices"][0]: + delta_content = res["data"]["choices"][0]["delta"]["content"] + chat_response += delta_content + yield f"data: {delta_content}\n\n" + else: + yield "data: [DONE]\n\n" + + statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None) + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + try: + response = client.chat.completions.create( + model=input.model, + messages=messages, + max_tokens=input.max_new_tokens, + temperature=input.temperature, + top_p=input.top_p, + top_k=input.top_k, + ) + response_text = response["choices"][0]["message"]["content"] + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None) + return GeneratedDoc(text=response_text, prompt=input.query) + + +if __name__ == "__main__": + opea_microservices["opea_service@llm_predictionguard"].start() diff --git a/comps/llms/text-generation/predictionguard/requirements.txt b/comps/llms/text-generation/predictionguard/requirements.txt new file mode 100644 index 0000000000..6c9f8340fd --- /dev/null +++ b/comps/llms/text-generation/predictionguard/requirements.txt @@ -0,0 +1,12 @@ +aiohttp +docarray +fastapi +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +Pillow +predictionguard +prometheus-fastapi-instrumentator +shortuuid +transformers +uvicorn diff --git a/tests/llms/test_llms_text-generation_predictionguard.sh b/tests/llms/test_llms_text-generation_predictionguard.sh new file mode 100644 index 0000000000..39a66bcf43 --- /dev/null +++ b/tests/llms/test_llms_text-generation_predictionguard.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -x # Print commands and their arguments as they are executed + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command +if [ -z "$ip_address" ]; then + ip_address="localhost" # Default to localhost if IP address is empty +fi + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/llm-pg:comps -f comps/llms/text-generation/predictionguard/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-pg built failed" + exit 1 + else + echo "opea/llm-pg built successfully" + fi +} + +function start_service() { + llm_service_port=9000 + unset http_proxy + docker run -d --name=test-comps-llm-pg-server \ + -e http_proxy= -e https_proxy= \ + -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ + -p 9000:9000 --ipc=host opea/llm-pg:comps + sleep 60 # Sleep for 1 minute to allow the service to start +} + +function validate_microservice() { + llm_service_port=9000 + result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/completions \ + -X POST \ + -d '{"model": "Hermes-2-Pro-Llama-3-8B", "query": "What is AI?", "streaming": false, "max_new_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \ + -H 'Content-Type: application/json') + + if [[ $result == *"text"* ]]; then + echo "Service response is correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-llm-pg-server + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune +} + +main