Skip to content

Commit

Permalink
adding the triton docker build minimal example (#242)
Browse files Browse the repository at this point in the history
  • Loading branch information
amirarsalan90 authored Mar 12, 2024
1 parent b2eb080 commit eb4308c
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 0 deletions.
10 changes: 10 additions & 0 deletions examples/usage/triton/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM nvcr.io/nvidia/tritonserver:24.01-py3

WORKDIR /opt

RUN git clone https://github.com/sgl-project/sglang.git

WORKDIR /opt/sglang
RUN pip install --upgrade pip && \
pip install -e "python[all]" && \
pip install datasets
41 changes: 41 additions & 0 deletions examples/usage/triton/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# sglang_triton

Build the docker image:
```
docker build -t sglang-triton .
```

Then do:
```
docker run -ti --gpus=all --network=host --name sglang-triton -v ./models:/mnt/models sglang-triton
```

inside the docker container:
```
cd sglang
python3 -m sglang.launch_server --model-path mistralai/Mistral-7B-Instruct-v0.2 --port 30000 --mem-fraction-static 0.9
```

with another shell, inside the docker container:
```
docker exec -ti sglang-triton /bin/bash
cd /mnt
tritonserver --model-repository=/mnt/models
```


Send request to the server:
```
curl -X POST http://localhost:8000/v2/models/character_generation/generate \
-H "Content-Type: application/json" \
-d '{
"inputs": [
{
"name": "INPUT_TEXT",
"datatype": "STRING",
"shape": [1],
"data": ["Name1"]
}
]
}'
```
45 changes: 45 additions & 0 deletions examples/usage/triton/models/character_generation/1/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import triton_python_backend_utils as pb_utils
import numpy
import sglang as sgl
from sglang import function, set_default_backend
from sglang.srt.constrained import build_regex_from_object

from pydantic import BaseModel

sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000"))

class Character(BaseModel):
name: str
eye_color: str
house: str

@function
def character_gen(s, name):
s += (
name
+ " is a character in Harry Potter. Please fill in the following information about this character.\n"
)
s += sgl.gen("json_output", max_tokens=256, regex=build_regex_from_object(Character))


class TritonPythonModel:
def initialize(self, args):
print("Initialized.")
def execute(self, requests):
responses = []
for request in requests:
tensor_in = pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT")
if tensor_in is None:
return pb_utils.InferenceResponse(output_tensors=[])

input_list_names = [i.decode('utf-8') if isinstance(i, bytes) else i for i in tensor_in.as_numpy().tolist()]

input_list_dicts = [{"name":i} for i in input_list_names]

states = character_gen.run_batch(input_list_dicts)
character_strs = [state.text() for state in states]

tensor_out = pb_utils.Tensor("OUTPUT_TEXT", numpy.array(character_strs, dtype=object))

responses.append(pb_utils.InferenceResponse(output_tensors = [tensor_out]))
return responses
23 changes: 23 additions & 0 deletions examples/usage/triton/models/character_generation/config.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: "character_generation"
backend: "python"
input [
{
name: "INPUT_TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output [
{
name: "OUTPUT_TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
instance_group [
{
count: 1
kind: KIND_GPU
gpus: [ 0 ]
}
]

0 comments on commit eb4308c

Please sign in to comment.