Skip to content

Commit

Permalink
feat: sglang integration
Browse files Browse the repository at this point in the history
  • Loading branch information
av committed Sep 13, 2024
1 parent d570b95 commit eb51529
Show file tree
Hide file tree
Showing 13 changed files with 141 additions and 3 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Harbor is a containerized LLM toolkit that allows you to run LLMs and additional

##### Backends

[Ollama](https://github.com/av/harbor/wiki/Services#ollama) ⦁︎ [llama.cpp](https://github.com/av/harbor/wiki/Services#llamacpp) ⦁︎ [vLLM](https://github.com/av/harbor/wiki/Services#vllm) ⦁︎ [TabbyAPI](https://github.com/av/harbor/wiki/Services#tabbyapi) ⦁︎ [Aphrodite Engine](https://github.com/av/harbor/wiki/Services#aphrodite-engine) ⦁︎ [mistral.rs](https://github.com/av/harbor/wiki/Services#mistralrs) ⦁︎ [openedai-speech](https://github.com/av/harbor/wiki/Services#openedai-speech) ⦁︎ [Parler](https://github.com/av/harbor/wiki/Services#parler) ⦁︎ [text-generation-inference](https://github.com/av/harbor/wiki/Services#text-generation-inference) ⦁︎ [LMDeploy](https://github.com/av/harbor/wiki/Services#lmdeploy) ⦁︎ [AirLLM](https://github.com/av/harbor/wiki/Services#airllm)
[Ollama](https://github.com/av/harbor/wiki/Services#ollama) ⦁︎ [llama.cpp](https://github.com/av/harbor/wiki/Services#llamacpp) ⦁︎ [vLLM](https://github.com/av/harbor/wiki/Services#vllm) ⦁︎ [TabbyAPI](https://github.com/av/harbor/wiki/Services#tabbyapi) ⦁︎ [Aphrodite Engine](https://github.com/av/harbor/wiki/Services#aphrodite-engine) ⦁︎ [mistral.rs](https://github.com/av/harbor/wiki/Services#mistralrs) ⦁︎ [openedai-speech](https://github.com/av/harbor/wiki/Services#openedai-speech) ⦁︎ [Parler](https://github.com/av/harbor/wiki/Services#parler) ⦁︎ [text-generation-inference](https://github.com/av/harbor/wiki/Services#text-generation-inference) ⦁︎ [LMDeploy](https://github.com/av/harbor/wiki/Services#lmdeploy) ⦁︎ [AirLLM](https://github.com/av/harbor/wiki/Services#airllm) ⦁︎ [SGLang](https://github.com/av/harbor/wiki/Services#sglang)

##### Satellites

Expand All @@ -33,7 +33,7 @@ harbor up searxng

# Run additional/alternative LLM Inference backends
# Open Webui is automatically connected to them.
harbor up llamacpp tgi litellm vllm tabbyapi aphrodite
harbor up llamacpp tgi litellm vllm tabbyapi aphrodite sglang

# Run different Frontends
harbor up librechat chatui bionicgpt hollama
Expand All @@ -57,6 +57,7 @@ harbor aphrodite model google/gemma-2-2b-it
harbor tabbyapi model google/gemma-2-2b-it-exl2
harbor mistralrs model google/gemma-2-2b-it
harbor opint model google/gemma-2-2b-it
harbor sglang model google/gemma-2-2b-it

# Convenience tools for docker setup
harbor logs llamacpp
Expand Down
8 changes: 8 additions & 0 deletions aichat/configs/aichat.sglang.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
clients:
- type: openai-compatible
name: sglang
api_base: http://sglang:30000/v1
api_key: sk-sglang
models:
- name: ${HARBOR_AICHAT_MODEL}

4 changes: 4 additions & 0 deletions aider/configs/aider.sglang.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
openai-api-base: http://sglang:30000/v1
openai-api-key: sk-sglang
model: openai/${HARBOR_AIDER_MODEL}
verify-ssl: false
23 changes: 23 additions & 0 deletions compose.sglang.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
services:
sglang:
image: lmsysorg/sglang:${HARBOR_SGLANG_VERSION}
container_name: ${HARBOR_CONTAINER_PREFIX}.sglang
volumes:
- ${HARBOR_HF_CACHE}:/root/.cache/huggingface
- ${HARBOR_VLLM_CACHE}:/root/.cache/vllm
ports:
- ${HARBOR_SGLANG_HOST_PORT}:30000
environment:
- HF_TOKEN=${HARBOR_HF_TOKEN}
entrypoint: python3 -m sglang.launch_server
command: >
--model-path ${HARBOR_SGLANG_MODEL}
--host 0.0.0.0
--port 30000
${HARBOR_SGLANG_EXTRA_ARGS}
ulimits:
memlock: -1
stack: 67108864
ipc: host
networks:
- harbor-network
4 changes: 4 additions & 0 deletions compose.x.aider.sglang.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
services:
aider:
volumes:
- ./aider/configs/aider.sglang.yml:/root/.aider/sglang.yml
9 changes: 9 additions & 0 deletions compose.x.sglang.nvidia.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
services:
sglang:
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
4 changes: 4 additions & 0 deletions compose.x.webui.sglang.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
services:
webui:
volumes:
- ./open-webui/configs/config.sglang.json:/app/configs/config.sglang.json
6 changes: 6 additions & 0 deletions default.env
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,12 @@ HARBOR_LMEVAL_EXTRA_ARGS=""
HARBOR_LMEVAL_MODEL_SPECIFIER=""
HARBOR_LMEVAL_MODEL_ARGS=""

# SGLang
HARBOR_SGLANG_HOST_PORT=34091
HARBOR_SGLANG_VERSION="latest"
HARBOR_SGLANG_MODEL="google/gemma-2-2b-it"
HARBOR_SGLANG_EXTRA_ARGS=""

# ============================================
# Service Configuration.
# You can specify any of the service's own environment variables here.
Expand Down
30 changes: 30 additions & 0 deletions harbor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ show_help() {
echo " chatui - Configure HuggingFace ChatUI service"
echo " comfyui - Configure ComfyUI service"
echo " parler - Configure Parler service"
echo " sglang - Configure SGLang CLI"
echo " omnichain - Work with Omnichain service"
echo
echo "Service CLIs:"
Expand All @@ -55,6 +56,7 @@ show_help() {
echo " plandex - Launch Plandex CLI"
echo " cmdh - Run cmdh CLI"
echo " parllama - Launch Parllama - TUI for chatting with Ollama models"
echo " bench - Run and manage Harbor Bench"
echo " hf - Run the Harbor's Hugging Face CLI. Expanded with a few additional commands."
echo " hf dl - HuggingFaceModelDownloader CLI"
echo " hf parse-url - Parse file URL from Hugging Face"
Expand Down Expand Up @@ -2707,6 +2709,30 @@ run_lm_eval_command() {
lmeval "$@"
}

run_sglang_command() {
case "$1" in
model)
shift
env_manager_alias sglang.model "$@"
return 0
;;
args)
shift
env_manager_alias sglang.extra.args "$@"
return 0
;;
-h|--help|help)
echo "Please note that this is not sglang CLI, but a Harbor CLI to manage sglang service."
echo
echo "Usage: harbor sglang <command>"
echo
echo "Commands:"
echo " harbor sglang model [user/repo] - Get or set the sglang model repository to run"
echo " harbor sglang args [args] - Get or set extra args to pass to the sglang CLI"
;;
esac
}

# ========================================================================
# == Main script
# ========================================================================
Expand Down Expand Up @@ -2952,6 +2978,10 @@ main_entrypoint() {
shift
run_lm_eval_command "$@"
;;
sglang)
shift
run_sglang_command "$@"
;;
tunnel|t)
shift
establish_tunnel "$@"
Expand Down
18 changes: 18 additions & 0 deletions http-catalog/sglang.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@host = http://localhost:34091

###

curl {{host}}/v1/models

###

curl {{host}}/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer sk-sglang" -d '{
"model": "anything",
"messages": [
{
"role": "user",
"content": "Bobby was born in Paris. How old is Bobby?"
}
],
"max_tokens": 30
}'
20 changes: 20 additions & 0 deletions librechat/librechat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,23 @@ endpoints:
summaryModel: "togethercomputer/llama-2-7b-chat"
forcePrompt: false
modelDisplayLabel: "together.ai"


# SGLang
- name: "SGLang"
apiKey: "sk-sglang"
# use 'host.docker.internal' instead of localhost if running LibreChat in a docker container
baseURL: "http://sglang:30000/v1/chat/completions"
models:
default: [
""
]
# fetching list of models is supported but the `name` field must start
# with `ollama` (case-insensitive), as it does in this example.
fetch: true
titleConvo: true
titleModel: "current_model"
summarize: false
summaryModel: "current_model"
forcePrompt: false
modelDisplayLabel: "SGLang"
2 changes: 1 addition & 1 deletion open-webui/configs/config.airllm.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
open-webui/configs/config.airllm.json{
"openai": {
"api_base_urls": [
"http://airllm:5000/v1"
Expand Down
11 changes: 11 additions & 0 deletions open-webui/configs/config.sglang.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"openai": {
"api_base_urls": [
"http://sglang:30000/v1"
],
"api_keys": [
"sk-sglang"
],
"enabled": true
}
}

0 comments on commit eb51529

Please sign in to comment.