-
Notifications
You must be signed in to change notification settings - Fork 149
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Prediction Guard LLM component (#674)
Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>
- Loading branch information
1 parent
191061b
commit 391c4a5
Showing
9 changed files
with
269 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright (C) 2024 Prediction Guard, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
FROM python:3.11-slim | ||
|
||
COPY comps /home/comps | ||
|
||
RUN pip install --no-cache-dir --upgrade pip && \ | ||
pip install --no-cache-dir -r /home/comps/llms/text-generation/predictionguard/requirements.txt | ||
|
||
ENV PYTHONPATH=$PYTHONPATH:/home | ||
|
||
WORKDIR /home/comps/llms/text-generation/predictionguard | ||
|
||
ENTRYPOINT ["bash", "entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Introduction | ||
|
||
[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started. | ||
|
||
# Get Started | ||
|
||
## Build Docker Image | ||
|
||
```bash | ||
cd ../../.. | ||
docker build -t opea/llm-textgen-predictionguard:latest -f comps/llms/text-generation/predictionguard/Dockerfile . | ||
``` | ||
|
||
## Run the Predictionguard Microservice | ||
|
||
```bash | ||
docker run -d -p 9000:9000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY --name llm-textgen-predictionguard opea/llm-textgen-predictionguard:latest | ||
``` | ||
|
||
# Consume the Prediction Guard Microservice | ||
|
||
See the [Prediction Guard docs](https://docs.predictionguard.com/) for available model options. | ||
|
||
## Without streaming | ||
|
||
```bash | ||
curl -X POST http://localhost:9000/v1/chat/completions \ | ||
-H "Content-Type: application/json" \ | ||
-d '{ | ||
"model": "Hermes-2-Pro-Llama-3-8B", | ||
"query": "Tell me a joke.", | ||
"max_new_tokens": 100, | ||
"temperature": 0.7, | ||
"top_p": 0.9, | ||
"top_k": 50, | ||
"stream": false | ||
}' | ||
``` | ||
|
||
## With streaming | ||
|
||
```bash | ||
curl -N -X POST http://localhost:9000/v1/chat/completions \ | ||
-H "Content-Type: application/json" \ | ||
-d '{ | ||
"model": "Hermes-2-Pro-Llama-3-8B", | ||
"query": "Tell me a joke.", | ||
"max_new_tokens": 100, | ||
"temperature": 0.7, | ||
"top_p": 0.9, | ||
"top_k": 50, | ||
"stream": true | ||
}' | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Copyright (C) 2024 Prediction Guard, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 |
20 changes: 20 additions & 0 deletions
20
comps/llms/text-generation/predictionguard/docker_compose_llm.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Copyright (C) 2024 Prediction Guard, Inc | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
services: | ||
llm: | ||
image: opea/llm-textgen-predictionguard:latest | ||
container_name: llm-textgen-predictionguard | ||
ports: | ||
- "9000:9000" | ||
ipc: host | ||
environment: | ||
no_proxy: ${no_proxy} | ||
http_proxy: ${http_proxy} | ||
https_proxy: ${https_proxy} | ||
PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} | ||
restart: unless-stopped | ||
|
||
networks: | ||
default: | ||
driver: bridge |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/usr/bin/env bash | ||
|
||
# Copyright (C) 2024 Prediction Guard, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
#pip --no-cache-dir install -r requirements-runtime.txt | ||
|
||
python llm_predictionguard.py |
86 changes: 86 additions & 0 deletions
86
comps/llms/text-generation/predictionguard/llm_predictionguard.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# Copyright (C) 2024 Prediction Guard, Inc. | ||
# SPDX-License-Identified: Apache-2.0 | ||
|
||
|
||
import time | ||
|
||
from fastapi import FastAPI, HTTPException | ||
from fastapi.responses import StreamingResponse | ||
from predictionguard import PredictionGuard | ||
from pydantic import BaseModel | ||
|
||
from comps import ( | ||
GeneratedDoc, | ||
LLMParamsDoc, | ||
ServiceType, | ||
opea_microservices, | ||
register_microservice, | ||
register_statistics, | ||
statistics_dict, | ||
) | ||
|
||
client = PredictionGuard() | ||
app = FastAPI() | ||
|
||
|
||
@register_microservice( | ||
name="opea_service@llm_predictionguard", | ||
service_type=ServiceType.LLM, | ||
endpoint="/v1/chat/completions", | ||
host="0.0.0.0", | ||
port=9000, | ||
) | ||
@register_statistics(names=["opea_service@llm_predictionguard"]) | ||
def llm_generate(input: LLMParamsDoc): | ||
start = time.time() | ||
|
||
messages = [ | ||
{ | ||
"role": "system", | ||
"content": "You are a helpful assistant. Your goal is to provide accurate, detailed, and safe responses to the user's queries.", | ||
}, | ||
{"role": "user", "content": input.query}, | ||
] | ||
|
||
if input.streaming: | ||
|
||
async def stream_generator(): | ||
chat_response = "" | ||
for res in client.chat.completions.create( | ||
model=input.model, | ||
messages=messages, | ||
max_tokens=input.max_new_tokens, | ||
temperature=input.temperature, | ||
top_p=input.top_p, | ||
top_k=input.top_k, | ||
stream=True, | ||
): | ||
if "choices" in res["data"] and "delta" in res["data"]["choices"][0]: | ||
delta_content = res["data"]["choices"][0]["delta"]["content"] | ||
chat_response += delta_content | ||
yield f"data: {delta_content}\n\n" | ||
else: | ||
yield "data: [DONE]\n\n" | ||
|
||
statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None) | ||
return StreamingResponse(stream_generator(), media_type="text/event-stream") | ||
else: | ||
try: | ||
response = client.chat.completions.create( | ||
model=input.model, | ||
messages=messages, | ||
max_tokens=input.max_new_tokens, | ||
temperature=input.temperature, | ||
top_p=input.top_p, | ||
top_k=input.top_k, | ||
) | ||
response_text = response["choices"][0]["message"]["content"] | ||
except Exception as e: | ||
raise HTTPException(status_code=500, detail=str(e)) | ||
|
||
statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None) | ||
return GeneratedDoc(text=response_text, prompt=input.query) | ||
|
||
|
||
if __name__ == "__main__": | ||
opea_microservices["opea_service@llm_predictionguard"].start() |
12 changes: 12 additions & 0 deletions
12
comps/llms/text-generation/predictionguard/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
aiohttp | ||
docarray | ||
fastapi | ||
opentelemetry-api | ||
opentelemetry-exporter-otlp | ||
opentelemetry-sdk | ||
Pillow | ||
predictionguard | ||
prometheus-fastapi-instrumentator | ||
shortuuid | ||
transformers | ||
uvicorn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#!/bin/bash | ||
# Copyright (C) 2024 Prediction Guard, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
set -x # Print commands and their arguments as they are executed | ||
|
||
WORKPATH=$(dirname "$PWD") | ||
ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command | ||
if [ -z "$ip_address" ]; then | ||
ip_address="localhost" # Default to localhost if IP address is empty | ||
fi | ||
|
||
function build_docker_images() { | ||
cd $WORKPATH | ||
echo $(pwd) | ||
docker build --no-cache -t opea/llm-pg:comps -f comps/llms/text-generation/predictionguard/Dockerfile . | ||
if [ $? -ne 0 ]; then | ||
echo "opea/llm-pg built failed" | ||
exit 1 | ||
else | ||
echo "opea/llm-pg built successfully" | ||
fi | ||
} | ||
|
||
function start_service() { | ||
llm_service_port=9000 | ||
unset http_proxy | ||
docker run -d --name=test-comps-llm-pg-server \ | ||
-e http_proxy= -e https_proxy= \ | ||
-e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ | ||
-p 9000:9000 --ipc=host opea/llm-pg:comps | ||
sleep 60 # Sleep for 1 minute to allow the service to start | ||
} | ||
|
||
function validate_microservice() { | ||
llm_service_port=9000 | ||
result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/completions \ | ||
-X POST \ | ||
-d '{"model": "Hermes-2-Pro-Llama-3-8B", "query": "What is AI?", "streaming": false, "max_new_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \ | ||
-H 'Content-Type: application/json') | ||
|
||
if [[ $result == *"text"* ]]; then | ||
echo "Service response is correct." | ||
else | ||
echo "Result wrong. Received was $result" | ||
docker logs test-comps-llm-pg-server | ||
exit 1 | ||
fi | ||
} | ||
|
||
function stop_docker() { | ||
cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*") | ||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi | ||
} | ||
|
||
function main() { | ||
stop_docker | ||
|
||
build_docker_images | ||
start_service | ||
|
||
validate_microservice | ||
|
||
stop_docker | ||
echo y | docker system prune | ||
} | ||
|
||
main |