Skip to content

Commit

Permalink
Prediction Guard LLM component (#674)
Browse files Browse the repository at this point in the history
Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>
  • Loading branch information
sharanshirodkar7 authored Sep 17, 2024
1 parent 191061b commit 391c4a5
Show file tree
Hide file tree
Showing 9 changed files with 269 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/docker/compose/llms-compose-cd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ services:
build:
dockerfile: comps/llms/text-generation/vllm/llama_index/dependency/Dockerfile.intel_hpu
image: ${REGISTRY:-opea}/llm-vllm-llamaindex-hpu:${TAG:-latest}
llm-predictionguard:
build:
dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
15 changes: 15 additions & 0 deletions comps/llms/text-generation/predictionguard/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (C) 2024 Prediction Guard, Inc.
# SPDX-License-Identifier: Apache-2.0

FROM python:3.11-slim

COPY comps /home/comps

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/comps/llms/text-generation/predictionguard/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home

WORKDIR /home/comps/llms/text-generation/predictionguard

ENTRYPOINT ["bash", "entrypoint.sh"]
54 changes: 54 additions & 0 deletions comps/llms/text-generation/predictionguard/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Introduction

[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started.

# Get Started

## Build Docker Image

```bash
cd ../../..
docker build -t opea/llm-textgen-predictionguard:latest -f comps/llms/text-generation/predictionguard/Dockerfile .
```

## Run the Predictionguard Microservice

```bash
docker run -d -p 9000:9000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY --name llm-textgen-predictionguard opea/llm-textgen-predictionguard:latest
```

# Consume the Prediction Guard Microservice

See the [Prediction Guard docs](https://docs.predictionguard.com/) for available model options.

## Without streaming

```bash
curl -X POST http://localhost:9000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "Hermes-2-Pro-Llama-3-8B",
"query": "Tell me a joke.",
"max_new_tokens": 100,
"temperature": 0.7,
"top_p": 0.9,
"top_k": 50,
"stream": false
}'
```

## With streaming

```bash
curl -N -X POST http://localhost:9000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "Hermes-2-Pro-Llama-3-8B",
"query": "Tell me a joke.",
"max_new_tokens": 100,
"temperature": 0.7,
"top_p": 0.9,
"top_k": 50,
"stream": true
}'
```
2 changes: 2 additions & 0 deletions comps/llms/text-generation/predictionguard/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (C) 2024 Prediction Guard, Inc.
# SPDX-License-Identifier: Apache-2.0
20 changes: 20 additions & 0 deletions comps/llms/text-generation/predictionguard/docker_compose_llm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (C) 2024 Prediction Guard, Inc
# SPDX-License-Identifier: Apache-2.0

services:
llm:
image: opea/llm-textgen-predictionguard:latest
container_name: llm-textgen-predictionguard
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY}
restart: unless-stopped

networks:
default:
driver: bridge
8 changes: 8 additions & 0 deletions comps/llms/text-generation/predictionguard/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash

# Copyright (C) 2024 Prediction Guard, Inc.
# SPDX-License-Identifier: Apache-2.0

#pip --no-cache-dir install -r requirements-runtime.txt

python llm_predictionguard.py
86 changes: 86 additions & 0 deletions comps/llms/text-generation/predictionguard/llm_predictionguard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Copyright (C) 2024 Prediction Guard, Inc.
# SPDX-License-Identified: Apache-2.0


import time

from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from predictionguard import PredictionGuard
from pydantic import BaseModel

from comps import (
GeneratedDoc,
LLMParamsDoc,
ServiceType,
opea_microservices,
register_microservice,
register_statistics,
statistics_dict,
)

client = PredictionGuard()
app = FastAPI()


@register_microservice(
name="opea_service@llm_predictionguard",
service_type=ServiceType.LLM,
endpoint="/v1/chat/completions",
host="0.0.0.0",
port=9000,
)
@register_statistics(names=["opea_service@llm_predictionguard"])
def llm_generate(input: LLMParamsDoc):
start = time.time()

messages = [
{
"role": "system",
"content": "You are a helpful assistant. Your goal is to provide accurate, detailed, and safe responses to the user's queries.",
},
{"role": "user", "content": input.query},
]

if input.streaming:

async def stream_generator():
chat_response = ""
for res in client.chat.completions.create(
model=input.model,
messages=messages,
max_tokens=input.max_new_tokens,
temperature=input.temperature,
top_p=input.top_p,
top_k=input.top_k,
stream=True,
):
if "choices" in res["data"] and "delta" in res["data"]["choices"][0]:
delta_content = res["data"]["choices"][0]["delta"]["content"]
chat_response += delta_content
yield f"data: {delta_content}\n\n"
else:
yield "data: [DONE]\n\n"

statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None)
return StreamingResponse(stream_generator(), media_type="text/event-stream")
else:
try:
response = client.chat.completions.create(
model=input.model,
messages=messages,
max_tokens=input.max_new_tokens,
temperature=input.temperature,
top_p=input.top_p,
top_k=input.top_k,
)
response_text = response["choices"][0]["message"]["content"]
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None)
return GeneratedDoc(text=response_text, prompt=input.query)


if __name__ == "__main__":
opea_microservices["opea_service@llm_predictionguard"].start()
12 changes: 12 additions & 0 deletions comps/llms/text-generation/predictionguard/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
aiohttp
docarray
fastapi
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
Pillow
predictionguard
prometheus-fastapi-instrumentator
shortuuid
transformers
uvicorn
68 changes: 68 additions & 0 deletions tests/llms/test_llms_text-generation_predictionguard.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash
# Copyright (C) 2024 Prediction Guard, Inc.
# SPDX-License-Identifier: Apache-2.0

set -x # Print commands and their arguments as they are executed

WORKPATH=$(dirname "$PWD")
ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command
if [ -z "$ip_address" ]; then
ip_address="localhost" # Default to localhost if IP address is empty
fi

function build_docker_images() {
cd $WORKPATH
echo $(pwd)
docker build --no-cache -t opea/llm-pg:comps -f comps/llms/text-generation/predictionguard/Dockerfile .
if [ $? -ne 0 ]; then
echo "opea/llm-pg built failed"
exit 1
else
echo "opea/llm-pg built successfully"
fi
}

function start_service() {
llm_service_port=9000
unset http_proxy
docker run -d --name=test-comps-llm-pg-server \
-e http_proxy= -e https_proxy= \
-e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \
-p 9000:9000 --ipc=host opea/llm-pg:comps
sleep 60 # Sleep for 1 minute to allow the service to start
}

function validate_microservice() {
llm_service_port=9000
result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/completions \
-X POST \
-d '{"model": "Hermes-2-Pro-Llama-3-8B", "query": "What is AI?", "streaming": false, "max_new_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \
-H 'Content-Type: application/json')

if [[ $result == *"text"* ]]; then
echo "Service response is correct."
else
echo "Result wrong. Received was $result"
docker logs test-comps-llm-pg-server
exit 1
fi
}

function stop_docker() {
cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
stop_docker

build_docker_images
start_service

validate_microservice

stop_docker
echo y | docker system prune
}

main

0 comments on commit 391c4a5

Please sign in to comment.