-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Mikyo King <mikyo@arize.com>
- Loading branch information
1 parent
a646a31
commit b6cfe69
Showing
5 changed files
with
157 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
"instrumentator", | ||
"Instrumentor", | ||
"langchain", | ||
"litellm", | ||
"llms", | ||
"nextjs", | ||
"openinference", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
63 changes: 63 additions & 0 deletions
63
...entation/openinference-instrumentation-litellm/examples/litellm-proxy/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# LiteLLM Proxy Server | ||
|
||
Use [LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy) to Log OpenAI, Azure, Vertex, Bedrock (100+ LLMs) to Arize | ||
|
||
Use LiteLLM Proxy for: | ||
- Calling 100+ LLMs OpenAI, Azure, Vertex, Bedrock/etc. in the OpenAI ChatCompletions & Completions format | ||
- Automatically Log all requests to Arize AI | ||
- Proving a central self hosted server for calling LLMs + logging to Arize | ||
|
||
|
||
## Step 1. Create a Config for LiteLLM proxy | ||
|
||
LiteLLM Requires a config with all your models define - we will call this file `litellm_config.yaml` | ||
|
||
[Detailed docs on how to setup litellm config - here](https://docs.litellm.ai/docs/proxy/configs) | ||
|
||
```yaml | ||
model_list: | ||
- model_name: gpt-4 | ||
litellm_params: | ||
model: openai/fake | ||
api_key: fake-key | ||
api_base: https://exampleopenaiendpoint-production.up.railway.app/ | ||
|
||
litellm_settings: | ||
success_callback: ["arize"] # 👈 Set Arize AI as a callback | ||
|
||
environment_variables: # 👈 Set Arize AI env vars | ||
ARIZE_SPACE_KEY: "d0*****" | ||
ARIZE_API_KEY: "141a****" | ||
``` | ||
Step 2. Start LiteLLM proxy | ||
```shell | ||
docker run \ | ||
-v $(pwd)/litellm_config.yaml:/app/config.yaml \ | ||
-p 4000:4000 \ | ||
ghcr.io/berriai/litellm:main-latest \ | ||
--config /app/config.yaml --detailed_debug | ||
``` | ||
|
||
Step 3. Test it - Make /chat/completions request to LiteLLM proxy | ||
|
||
```shell | ||
curl -i http://localhost:4000/v1/chat/completions \ | ||
-H "Content-Type: application/json" \ | ||
-H "Authorization: Bearer sk-1234" \ | ||
-d '{ | ||
"model": "gpt-4", | ||
"messages": [ | ||
{"role": "user", "content": "Hello, Claude gm!"} | ||
] | ||
}' | ||
``` | ||
|
||
## Expected output on Arize AI below: | ||
|
||
<img width="1283" alt="Xnapper-2024-07-23-17 07 34" src="https://github.com/user-attachments/assets/7460bc2b-7f4f-4ec4-b966-2bf33a26ded5"> | ||
|
||
|
||
## Additional Resources | ||
- [LiteLLM Arize AI docs](https://docs.litellm.ai/docs/observability/arize_integration) |
69 changes: 69 additions & 0 deletions
69
python/instrumentation/openinference-instrumentation-litellm/examples/litellm.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import os | ||
|
||
import litellm | ||
import phoenix as px | ||
|
||
# Get the secret key from environment variables | ||
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY" | ||
|
||
# Launch Phoenix app | ||
session = px.launch_app() | ||
|
||
# Import OpenTelemetry components | ||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter # noqa: E402 | ||
from opentelemetry.sdk.trace import TracerProvider # noqa: E402 | ||
from opentelemetry.sdk.trace.export import SimpleSpanProcessor # noqa: E402 | ||
|
||
from openinference.instrumentation.litellm import LiteLLMInstrumentor # noqa: E402 | ||
|
||
# Set up OpenTelemetry tracing | ||
endpoint = "http://127.0.0.1:6006/v1/traces" | ||
tracer_provider = TracerProvider() | ||
tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint))) | ||
LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider) | ||
|
||
# Simple single message completion call | ||
litellm.completion( | ||
model="gpt-3.5-turbo", messages=[{"content": "What's the capital of China?", "role": "user"}] | ||
) | ||
|
||
# Multiple message conversation completion call with added param | ||
litellm.completion( | ||
model="gpt-3.5-turbo", | ||
messages=[ | ||
{"content": "Hello, I want to bake a cake", "role": "user"}, | ||
{"content": "Hello, I can pull up some recipes for cakes.", "role": "assistant"}, | ||
{"content": "No actually I want to make a pie", "role": "user"}, | ||
], | ||
temperature=0.7, | ||
) | ||
|
||
# Multiple message conversation acompletion call with added params | ||
await litellm.acompletion( # noqa: F704 | ||
model="gpt-3.5-turbo", | ||
messages=[ | ||
{"content": "Hello, I want to bake a cake", "role": "user"}, | ||
{"content": "Hello, I can pull up some recipes for cakes.", "role": "assistant"}, | ||
{"content": "No actually I want to make a pie", "role": "user"}, | ||
], | ||
temperature=0.7, | ||
max_tokens=20, | ||
) | ||
|
||
# Completion with retries | ||
litellm.completion_with_retries( | ||
model="gpt-3.5-turbo", | ||
messages=[{"content": "What's the highest grossing film ever", "role": "user"}], | ||
) | ||
|
||
# Embedding call | ||
litellm.embedding(model="text-embedding-ada-002", input=["good morning from litellm"]) | ||
|
||
# Asynchronous embedding call | ||
await litellm.aembedding(model="text-embedding-ada-002", input=["good morning from litellm"]) # noqa: F704 | ||
|
||
# Image generation call | ||
litellm.image_generation(model="dall-e-2", prompt="cute baby otter") | ||
|
||
# Asynchronous image generation call | ||
await litellm.aimage_generation(model="dall-e-2", prompt="cute baby otter") # noqa: F704 |