-
Notifications
You must be signed in to change notification settings - Fork 151
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #131 from ELS-RD/feat/add-t5-support
Feat/add t5 support
- Loading branch information
Showing
23 changed files
with
2,120 additions
and
167 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,38 @@ | ||
FROM nvcr.io/nvidia/tritonserver:22.07-py3 | ||
|
||
# see .dockerignore to check what is transfered | ||
COPY . ./ | ||
|
||
RUN pip3 install -U pip && \ | ||
pip3 install nvidia-pyindex && \ | ||
pip3 install ".[GPU]" -f https://download.pytorch.org/whl/cu116/torch_stable.html --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir && \ | ||
pip3 install sentence-transformers notebook pytorch-quantization ipywidgets | ||
RUN apt-get update && \ | ||
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ | ||
python3-dev \ | ||
python3-distutils \ | ||
python3-venv \ | ||
python3-pip \ | ||
apt-get clean | ||
|
||
ARG UID=1000 | ||
ARG GID=1000 | ||
RUN addgroup --gid $GID ubuntu && \ | ||
useradd -d /home/ubuntu -ms /bin/bash -g ubuntu -G sudo -u $UID ubuntu | ||
## Switch to ubuntu user by default. | ||
USER ubuntu | ||
|
||
WORKDIR /build | ||
RUN pip3 install -U pip --no-cache-dir && \ | ||
pip3 install --pre torch --force-reinstall --index-url https://download.pytorch.org/whl/nightly/cu117 --no-cache-dir && \ | ||
pip3 install sentence-transformers notebook pytorch-quantization ipywidgets --no-cache-dir | ||
|
||
RUN mkdir /syncback | ||
WORKDIR /transformer_deploy | ||
|
||
COPY ./setup.py ./setup.py | ||
COPY ./requirements.txt ./requirements.txt | ||
COPY ./requirements_gpu.txt ./requirements_gpu.txt | ||
COPY ./src/__init__.py ./src/__init__.py | ||
COPY ./src/transformer_deploy/__init__.py ./src/transformer_deploy/__init__.py | ||
|
||
RUN pip3 install -r requirements.txt && \ | ||
pip3 install nvidia-pyindex --no-cache-dir && \ | ||
pip3 install -r requirements_gpu.txt | ||
|
||
COPY ./ ./ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
0.5.4 | ||
0.6.0 |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import json | ||
import struct | ||
|
||
import requests | ||
|
||
|
||
text: str = "My name is Wolfgang and I live in Berlin" | ||
|
||
context_text: bytes = text.encode("UTF-8") | ||
|
||
context_text_struct: bytes = struct.pack("<I", len(context_text)) + context_text | ||
|
||
len_context_text_struct = len(context_text_struct) | ||
|
||
data_struct = context_text_struct | ||
|
||
request_data = { | ||
"inputs": [ | ||
{ | ||
"name": "TEXT", | ||
"shape": [1], | ||
"datatype": "BYTES", | ||
"parameters": {"binary_data_size": len_context_text_struct}, | ||
}, | ||
], | ||
"outputs": [{"name": "OUTPUT_TEXT", "parameters": {"binary_data": False}}], | ||
} | ||
|
||
data = json.dumps(request_data).encode() + data_struct | ||
|
||
print(data) | ||
|
||
|
||
with open("t5_query_body.bin", "wb") as f: | ||
f.write(data) | ||
|
||
|
||
curl = f""" | ||
curl -X POST http://localhost:8000/v2/models/t5-dec-if-node_onnx_generate/versions/1/infer \ | ||
--data-binary "@demo/generative-model/t5_query_body.bin" \ | ||
--header "Inference-Header-Content-Length: {len(json.dumps(request_data).encode())}" | ||
""" | ||
print(curl) | ||
|
||
|
||
res = requests.post( | ||
url="http://localhost:8000/v2/models/t5-dec-if-node_onnx_generate/versions/1/infer", | ||
data="@demo/generative-model/t5_query_body.bin", | ||
headers={"Inference-Header-Content-Length": len(json.dumps(request_data).encode()).to_bytes(5, "little")}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
onnxruntime==1.12.0 | ||
onnxruntime==1.13.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
onnxruntime-gpu==1.12.0 | ||
onnxruntime-gpu==1.13.1 | ||
nvidia-tensorrt==8.4.1.5 | ||
onnx_graphsurgeon | ||
polygraphy | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.