From e9552034063c01c0366a2876152ac0506c0e9e95 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 6 Feb 2024 18:17:22 -0800 Subject: [PATCH 1/7] Enable incremental build --- setup.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9cc4aea0ea75a..2109c7cab1328 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,10 @@ # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS) +if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ: + torch_cpp_ext._TORCH_PATH = os.environ["VLLM_INCREMENTAL_BUILD_TORCH_PATH"] + + def _is_hip() -> bool: return torch.version.hip is not None @@ -432,6 +436,27 @@ def get_requirements() -> List[str]: ext_modules = [] package_data["vllm"].append("*.so") + +if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ: + # This is an optional hack to allow incremental compilation. + class VllmBuildExtension(BuildExtension): + def __init__(self, *args, **kwargs): + old_write_ninja_file = torch_cpp_ext._write_ninja_file + def write_ninja_file(*args, **kwargs): + result = old_write_ninja_file(*args, **kwargs) + # Back date the build file to the unix epoch + os.utime("/tmp/vllmcompile/build.ninja", (0, 0)) + return result + torch_cpp_ext._write_ninja_file = write_ninja_file + super().__init__(*args, **kwargs) + + def build_extensions(self) -> None: + self.build_temp = "/tmp/vllmcompile/" + super().build_extensions() +else: + VllmBuildExtension = BuildExtension + + setuptools.setup( name="vllm", version=get_vllm_version(), @@ -459,6 +484,7 @@ def get_requirements() -> List[str]: python_requires=">=3.8", install_requires=get_requirements(), ext_modules=ext_modules, - cmdclass={"build_ext": BuildExtension} if not _is_neuron() else {}, + cmdclass={"build_ext": VllmBuildExtension} if not _is_neuron() else {}, package_data=package_data, ) + From 0ad56b8cf60a4b0fdb7c958636fccc48f5db6afb Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 6 Feb 2024 18:35:12 -0800 Subject: [PATCH 2/7] yapf --- setup.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 2109c7cab1328..ac8951f19e2b9 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,6 @@ ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942"} # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS) - if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ: torch_cpp_ext._TORCH_PATH = os.environ["VLLM_INCREMENTAL_BUILD_TORCH_PATH"] @@ -436,17 +435,19 @@ def get_requirements() -> List[str]: ext_modules = [] package_data["vllm"].append("*.so") - if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ: # This is an optional hack to allow incremental compilation. class VllmBuildExtension(BuildExtension): + def __init__(self, *args, **kwargs): old_write_ninja_file = torch_cpp_ext._write_ninja_file + def write_ninja_file(*args, **kwargs): result = old_write_ninja_file(*args, **kwargs) # Back date the build file to the unix epoch os.utime("/tmp/vllmcompile/build.ninja", (0, 0)) return result + torch_cpp_ext._write_ninja_file = write_ninja_file super().__init__(*args, **kwargs) @@ -456,7 +457,6 @@ def build_extensions(self) -> None: else: VllmBuildExtension = BuildExtension - setuptools.setup( name="vllm", version=get_vllm_version(), @@ -487,4 +487,3 @@ def build_extensions(self) -> None: cmdclass={"build_ext": VllmBuildExtension} if not _is_neuron() else {}, package_data=package_data, ) - From 90dbcb50d13b2fc032ce21077b2186cca03a04c8 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 6 Feb 2024 18:48:28 -0800 Subject: [PATCH 3/7] remove hack --- setup.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/setup.py b/setup.py index ac8951f19e2b9..a22d0a6180fec 100644 --- a/setup.py +++ b/setup.py @@ -440,15 +440,6 @@ def get_requirements() -> List[str]: class VllmBuildExtension(BuildExtension): def __init__(self, *args, **kwargs): - old_write_ninja_file = torch_cpp_ext._write_ninja_file - - def write_ninja_file(*args, **kwargs): - result = old_write_ninja_file(*args, **kwargs) - # Back date the build file to the unix epoch - os.utime("/tmp/vllmcompile/build.ninja", (0, 0)) - return result - - torch_cpp_ext._write_ninja_file = write_ninja_file super().__init__(*args, **kwargs) def build_extensions(self) -> None: From 5d94f52d215a835af4453b7ca8188eb81b484bc0 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 7 Feb 2024 10:26:49 -0800 Subject: [PATCH 4/7] add documentation --- setup.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/setup.py b/setup.py index a22d0a6180fec..5c2371ded4256 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,12 @@ ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942"} # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS) +# You can use +# VLLM_INCREMENTAL_BUILD_TORCH_PATH=`python -c "import torch; print(torch.__path__[0])"` pip install -e . --verbose +# to do an incremental build of vLLM. Alternatively, you can use +# `python setup.py develop` without extra environment variables, which +# has even less overhead, but is unfortunately deprecated upstream +# (https://github.com/pypa/setuptools/issues/917). if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ: torch_cpp_ext._TORCH_PATH = os.environ["VLLM_INCREMENTAL_BUILD_TORCH_PATH"] From 476c12047c457701576e3519e171fb7a86d0215f Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 7 Feb 2024 10:43:49 -0800 Subject: [PATCH 5/7] add docs --- docs/source/getting_started/installation.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst index 911c3d8f9a4ab..65cf92b5ad21e 100644 --- a/docs/source/getting_started/installation.rst +++ b/docs/source/getting_started/installation.rst @@ -67,3 +67,14 @@ You can also build and install vLLM from source: $ # Use `--ipc=host` to make sure the shared memory is large enough. $ docker run --gpus all -it --rm --ipc=host nvcr.io/nvidia/pytorch:23.10-py3 + +.. note:: + If you are developing on the C++ code in vLLM, you can use the following flag to + do incremental compilation. + + .. code-block:: console + + $ VLLM_INCREMENTAL_BUILD_TORCH_PATH=`python -c "import torch; print(torch.__path__[0])"` pip install -e . --verbose + + Alternatively, you can use ``python setup.py develop`` which is even faster but + deprecated by setuptools. From ec1941f3d8a0ad31d5bb25395a476d3edb69d486 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 7 Feb 2024 11:42:30 -0800 Subject: [PATCH 6/7] update --- docs/source/getting_started/installation.rst | 9 +++--- setup.py | 30 ++++---------------- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst index 65cf92b5ad21e..77b0ae65838a8 100644 --- a/docs/source/getting_started/installation.rst +++ b/docs/source/getting_started/installation.rst @@ -69,12 +69,11 @@ You can also build and install vLLM from source: $ docker run --gpus all -it --rm --ipc=host nvcr.io/nvidia/pytorch:23.10-py3 .. note:: - If you are developing on the C++ code in vLLM, you can use the following flag to - do incremental compilation. + If you are developing the C++ backend of vLLM, consider building vLLM with .. code-block:: console - $ VLLM_INCREMENTAL_BUILD_TORCH_PATH=`python -c "import torch; print(torch.__path__[0])"` pip install -e . --verbose + $ python setup.py develop - Alternatively, you can use ``python setup.py develop`` which is even faster but - deprecated by setuptools. + since it will give you incremental builds. The downside is that this method + is `deprecated by setuptools `_. diff --git a/setup.py b/setup.py index 5c2371ded4256..a3684cb8acd32 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,11 @@ ROOT_DIR = os.path.dirname(__file__) +# If you are developing the C++ backend of vLLM, consider building vLLM with +# `python setup.py develop` since it will give you incremental builds. +# The downside is that this method is deprecated, see +# https://github.com/pypa/setuptools/issues/917 + MAIN_CUDA_VERSION = "12.1" # Supported NVIDIA GPU architectures. @@ -22,16 +27,6 @@ ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942"} # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS) -# You can use -# VLLM_INCREMENTAL_BUILD_TORCH_PATH=`python -c "import torch; print(torch.__path__[0])"` pip install -e . --verbose -# to do an incremental build of vLLM. Alternatively, you can use -# `python setup.py develop` without extra environment variables, which -# has even less overhead, but is unfortunately deprecated upstream -# (https://github.com/pypa/setuptools/issues/917). -if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ: - torch_cpp_ext._TORCH_PATH = os.environ["VLLM_INCREMENTAL_BUILD_TORCH_PATH"] - - def _is_hip() -> bool: return torch.version.hip is not None @@ -441,19 +436,6 @@ def get_requirements() -> List[str]: ext_modules = [] package_data["vllm"].append("*.so") -if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ: - # This is an optional hack to allow incremental compilation. - class VllmBuildExtension(BuildExtension): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def build_extensions(self) -> None: - self.build_temp = "/tmp/vllmcompile/" - super().build_extensions() -else: - VllmBuildExtension = BuildExtension - setuptools.setup( name="vllm", version=get_vllm_version(), @@ -481,6 +463,6 @@ def build_extensions(self) -> None: python_requires=">=3.8", install_requires=get_requirements(), ext_modules=ext_modules, - cmdclass={"build_ext": VllmBuildExtension} if not _is_neuron() else {}, + cmdclass={"build_ext": BuildExtension} if not _is_neuron() else {}, package_data=package_data, ) From 2c33d74cfe4f2b2735a5e57611e9e95d38e41df7 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 7 Feb 2024 11:43:04 -0800 Subject: [PATCH 7/7] format --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index a3684cb8acd32..60efed0720ff1 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942"} # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS) + def _is_hip() -> bool: return torch.version.hip is not None