From fd71f0733c70f992ef54b2252458f9eeaea7fdda Mon Sep 17 00:00:00 2001 From: Matthias Reso <13337103+mreso@users.noreply.github.com> Date: Mon, 23 Sep 2024 23:57:24 +0000 Subject: [PATCH] Rename vllm dockerfile --- README.md | 4 ++-- docker/{Dockerfile.llm => Dockerfile.vllm} | 0 docs/llm_deployment.md | 4 ++-- examples/large_models/vllm/llama3/Readme.md | 2 +- examples/large_models/vllm/lora/Readme.md | 2 +- examples/large_models/vllm/mistral/Readme.md | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) rename docker/{Dockerfile.llm => Dockerfile.vllm} (100%) diff --git a/README.md b/README.md index 766b3f4e45..b056a2bca1 100644 --- a/README.md +++ b/README.md @@ -74,9 +74,9 @@ curl -X POST -d '{"model":"meta-llama/Meta-Llama-3-8B-Instruct", "prompt":"Hello ```bash #export token= -docker build --pull . -f docker/Dockerfile.llm -t ts/llm +docker build --pull . -f docker/Dockerfile.vllm -t ts/vllm -docker run --rm -ti --shm-size 10g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/llm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth +docker run --rm -ti --shm-size 10g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/vllm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth # Try it out curl -X POST -d '{"model":"meta-llama/Meta-Llama-3-8B-Instruct", "prompt":"Hello, my name is", "max_tokens": 200}' --header "Content-Type: application/json" "http://localhost:8080/predictions/model/1.0/v1/completions" diff --git a/docker/Dockerfile.llm b/docker/Dockerfile.vllm similarity index 100% rename from docker/Dockerfile.llm rename to docker/Dockerfile.vllm diff --git a/docs/llm_deployment.md b/docs/llm_deployment.md index 282dd558fe..2a7bfc8742 100644 --- a/docs/llm_deployment.md +++ b/docs/llm_deployment.md @@ -11,7 +11,7 @@ The launcher can either be used standalone or in combination with our provided T To launch the docker we first need to build it: ```bash -docker build . -f docker/Dockerfile.llm -t ts/llm +docker build . -f docker/Dockerfile.vllm -t ts/vllm ``` Models are usually loaded from the HuggingFace hub and are cached in a [docker volume](https://docs.docker.com/storage/volumes/) for faster reload. @@ -22,7 +22,7 @@ export token= You can then go ahead and launch a TorchServe instance serving your selected model: ```bash -docker run --rm -ti --shm-size 1g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/llm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth +docker run --rm -ti --shm-size 1g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/vllm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth ``` To change the model you just need to exchange the identifier given to the `--model_id` parameter. diff --git a/examples/large_models/vllm/llama3/Readme.md b/examples/large_models/vllm/llama3/Readme.md index fb80f7a3e3..ac8ea048d4 100644 --- a/examples/large_models/vllm/llama3/Readme.md +++ b/examples/large_models/vllm/llama3/Readme.md @@ -9,7 +9,7 @@ To leverage the power of vLLM we fist need to install it using pip in out develo ```bash python -m pip install -r ../requirements.txt ``` -For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.llm). +For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.vllm). ### Step 1: Download Model from HuggingFace diff --git a/examples/large_models/vllm/lora/Readme.md b/examples/large_models/vllm/lora/Readme.md index c592f23a73..5171fd9014 100644 --- a/examples/large_models/vllm/lora/Readme.md +++ b/examples/large_models/vllm/lora/Readme.md @@ -9,7 +9,7 @@ To leverage the power of vLLM we fist need to install it using pip in out develo ```bash python -m pip install -r ../requirements.txt ``` -For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.llm). +For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.vllm). ### Step 1: Download Model from HuggingFace diff --git a/examples/large_models/vllm/mistral/Readme.md b/examples/large_models/vllm/mistral/Readme.md index 4816adcae5..d7c504a54c 100644 --- a/examples/large_models/vllm/mistral/Readme.md +++ b/examples/large_models/vllm/mistral/Readme.md @@ -9,7 +9,7 @@ To leverage the power of vLLM we fist need to install it using pip in out develo ```bash python -m pip install -r ../requirements.txt ``` -For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.llm). +For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.vllm). ### Step 1: Download Model from HuggingFace