diff --git a/docker/Dockerfile b/docker/Dockerfile index a6d6382d7a..333f585047 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -47,6 +47,7 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \ python$PYTHON_VERSION-venv \ openjdk-17-jdk \ curl \ + git \ && rm -rf /var/lib/apt/lists/* # Make the virtual environment and "activating" it by adding it first to the path. @@ -62,26 +63,29 @@ RUN export USE_CUDA=1 ARG CUDA_VERSION="" -RUN TORCH_VER=$(curl --silent --location https://pypi.org/pypi/torch/json | python -c "import sys, json, pkg_resources; releases = json.load(sys.stdin)['releases']; print(sorted(releases, key=pkg_resources.parse_version)[-1])") && \ - TORCH_VISION_VER=$(curl --silent --location https://pypi.org/pypi/torchvision/json | python -c "import sys, json, pkg_resources; releases = json.load(sys.stdin)['releases']; print(sorted(releases, key=pkg_resources.parse_version)[-1])") && \ +RUN git clone --depth 1 https://github.com/pytorch/serve.git + +WORKDIR "serve" + +RUN \ if echo "$BASE_IMAGE" | grep -q "cuda:"; then \ # Install CUDA version specific binary when CUDA version is specified as a build arg if [ "$CUDA_VERSION" ]; then \ - python -m pip install --no-cache-dir torch==$TORCH_VER+$CUDA_VERSION torchvision==$TORCH_VISION_VER+$CUDA_VERSION -f https://download.pytorch.org/whl/torch_stable.html; \ - # Install the binary with the latest CUDA version support + python ./ts_scripts/install_dependencies.py --cuda $CUDA_VERSION; \ + # Install the binary with the latest CPU image on a CUDA base image else \ - python -m pip install --no-cache-dir torch torchvision; \ + python ./ts_scripts/install_dependencies.py; \ fi; \ - python -m pip install --no-cache-dir -r https://raw.githubusercontent.com/pytorch/serve/master/requirements/common.txt; \ # Install the CPU binary else \ - python -m pip install --no-cache-dir torch==$TORCH_VER+cpu torchvision==$TORCH_VISION_VER+cpu -f https://download.pytorch.org/whl/torch_stable.html; \ + python ./ts_scripts/install_dependencies.py; \ fi -RUN python -m pip install --no-cache-dir captum torchtext torchserve torch-model-archiver pyyaml +# Make sure latest version of torchserve is uploaded before running this +RUN python -m pip install --no-cache-dir torchserve torch-model-archiver torch-workflow-archiver # Final image for production -FROM ${BASE_IMAGE} AS runtime-image +FROM ${BASE_IMAGE} AS runtime-image # Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top) ARG PYTHON_VERSION ENV PYTHONUNBUFFERED TRUE diff --git a/examples/Huggingface_Transformers/README.md b/examples/Huggingface_Transformers/README.md index 4a24f1fe09..c278973f55 100644 --- a/examples/Huggingface_Transformers/README.md +++ b/examples/Huggingface_Transformers/README.md @@ -10,7 +10,7 @@ We borrowed ideas to write a custom handler for transformers from tutorial prese To get started [install Torchserve](https://github.com/pytorch/serve) and then - `pip install transformers==4.6.0` + `pip install -r requirements.txt` ### Objectives 1. How to package a transformer into a torch model archive (.mar) file (eager mode or Torchscript) with `torch-model-archiver` @@ -315,7 +315,6 @@ When a json file is passed as a request format to the curl, Torchserve unwraps t In the setup_config.json, specify `"BetterTransformer":true,`. -Note: make sure to install [HuggingFace Optimum] `pip install optimum` [Better Transformer](https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/) from PyTorch is integrated into [Huggingface Optimum](https://huggingface.co/docs/optimum/bettertransformer/overview) that bring major speedups for many of encoder models on different modalities (text, image, audio). It is a one liner API that we have also added in the `Transformer_handler_generalized.py` in this example as well. That as shown above you just need to set `"BetterTransformer":true,` in the setup_config.json. diff --git a/examples/Huggingface_Transformers/Transformer_handler_generalized.py b/examples/Huggingface_Transformers/Transformer_handler_generalized.py index b469311464..61893d87e4 100644 --- a/examples/Huggingface_Transformers/Transformer_handler_generalized.py +++ b/examples/Huggingface_Transformers/Transformer_handler_generalized.py @@ -83,14 +83,10 @@ def initialize(self, ctx): logger.warning("Missing the operation mode.") # Using the Better Transformer integration to speedup the inference if self.setup_config["BetterTransformer"]: - try: - from optimum.bettertransformer import BetterTransformer + from optimum.bettertransformer import BetterTransformer + try: self.model = BetterTransformer.transform(self.model) - except ImportError as error: - logger.warning( - "HuggingFace Optimum is not installed. Proceeding without BetterTransformer" - ) except RuntimeError as error: logger.warning( "HuggingFace Optimum is not supporting this model,for the list of supported models, please refer to this doc,https://huggingface.co/docs/optimum/bettertransformer/overview" @@ -377,7 +373,6 @@ def get_insights(self, input_batch, text, target): self.setup_config["mode"] == "sequence_classification" or self.setup_config["mode"] == "token_classification" ): - attributions, delta = self.lig.attribute( inputs=input_ids, baselines=ref_input_ids, diff --git a/examples/Huggingface_Transformers/requirements.txt b/examples/Huggingface_Transformers/requirements.txt new file mode 100644 index 0000000000..196e970d9a --- /dev/null +++ b/examples/Huggingface_Transformers/requirements.txt @@ -0,0 +1,2 @@ +transformers +optimum diff --git a/model-archiver/model_archiver/version.txt b/model-archiver/model_archiver/version.txt index 39e898a4f9..a3df0a6959 100644 --- a/model-archiver/model_archiver/version.txt +++ b/model-archiver/model_archiver/version.txt @@ -1 +1 @@ -0.7.1 +0.8.0 diff --git a/ts/version.txt b/ts/version.txt index 39e898a4f9..a3df0a6959 100644 --- a/ts/version.txt +++ b/ts/version.txt @@ -1 +1 @@ -0.7.1 +0.8.0 diff --git a/workflow-archiver/workflow_archiver/version.txt b/workflow-archiver/workflow_archiver/version.txt index b0032849c8..a45be46276 100644 --- a/workflow-archiver/workflow_archiver/version.txt +++ b/workflow-archiver/workflow_archiver/version.txt @@ -1 +1 @@ -0.2.7 +0.2.8