From e9552034063c01c0366a2876152ac0506c0e9e95 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 6 Feb 2024 18:17:22 -0800
Subject: [PATCH 1/7] Enable incremental build

---
 setup.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9cc4aea0ea75a..2109c7cab1328 100644
--- a/setup.py
+++ b/setup.py
@@ -23,6 +23,10 @@
 # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)
 
 
+if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ:
+    torch_cpp_ext._TORCH_PATH = os.environ["VLLM_INCREMENTAL_BUILD_TORCH_PATH"]
+
+
 def _is_hip() -> bool:
     return torch.version.hip is not None
 
@@ -432,6 +436,27 @@ def get_requirements() -> List[str]:
     ext_modules = []
     package_data["vllm"].append("*.so")
 
+
+if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ:
+    # This is an optional hack to allow incremental compilation.
+    class VllmBuildExtension(BuildExtension):
+        def __init__(self, *args, **kwargs):
+            old_write_ninja_file = torch_cpp_ext._write_ninja_file
+            def write_ninja_file(*args, **kwargs):
+                result = old_write_ninja_file(*args, **kwargs)
+                # Back date the build file to the unix epoch
+                os.utime("/tmp/vllmcompile/build.ninja", (0, 0))
+                return result
+            torch_cpp_ext._write_ninja_file = write_ninja_file
+            super().__init__(*args, **kwargs)
+
+        def build_extensions(self) -> None:
+            self.build_temp = "/tmp/vllmcompile/"
+            super().build_extensions()
+else:
+    VllmBuildExtension = BuildExtension
+
+
 setuptools.setup(
     name="vllm",
     version=get_vllm_version(),
@@ -459,6 +484,7 @@ def get_requirements() -> List[str]:
     python_requires=">=3.8",
     install_requires=get_requirements(),
     ext_modules=ext_modules,
-    cmdclass={"build_ext": BuildExtension} if not _is_neuron() else {},
+    cmdclass={"build_ext": VllmBuildExtension} if not _is_neuron() else {},
     package_data=package_data,
 )
+

From 0ad56b8cf60a4b0fdb7c958636fccc48f5db6afb Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 6 Feb 2024 18:35:12 -0800
Subject: [PATCH 2/7] yapf

---
 setup.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index 2109c7cab1328..ac8951f19e2b9 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,6 @@
 ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942"}
 # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)
 
-
 if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ:
     torch_cpp_ext._TORCH_PATH = os.environ["VLLM_INCREMENTAL_BUILD_TORCH_PATH"]
 
@@ -436,17 +435,19 @@ def get_requirements() -> List[str]:
     ext_modules = []
     package_data["vllm"].append("*.so")
 
-
 if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ:
     # This is an optional hack to allow incremental compilation.
     class VllmBuildExtension(BuildExtension):
+
         def __init__(self, *args, **kwargs):
             old_write_ninja_file = torch_cpp_ext._write_ninja_file
+
             def write_ninja_file(*args, **kwargs):
                 result = old_write_ninja_file(*args, **kwargs)
                 # Back date the build file to the unix epoch
                 os.utime("/tmp/vllmcompile/build.ninja", (0, 0))
                 return result
+
             torch_cpp_ext._write_ninja_file = write_ninja_file
             super().__init__(*args, **kwargs)
 
@@ -456,7 +457,6 @@ def build_extensions(self) -> None:
 else:
     VllmBuildExtension = BuildExtension
 
-
 setuptools.setup(
     name="vllm",
     version=get_vllm_version(),
@@ -487,4 +487,3 @@ def build_extensions(self) -> None:
     cmdclass={"build_ext": VllmBuildExtension} if not _is_neuron() else {},
     package_data=package_data,
 )
-

From 90dbcb50d13b2fc032ce21077b2186cca03a04c8 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 6 Feb 2024 18:48:28 -0800
Subject: [PATCH 3/7] remove hack

---
 setup.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/setup.py b/setup.py
index ac8951f19e2b9..a22d0a6180fec 100644
--- a/setup.py
+++ b/setup.py
@@ -440,15 +440,6 @@ def get_requirements() -> List[str]:
     class VllmBuildExtension(BuildExtension):
 
         def __init__(self, *args, **kwargs):
-            old_write_ninja_file = torch_cpp_ext._write_ninja_file
-
-            def write_ninja_file(*args, **kwargs):
-                result = old_write_ninja_file(*args, **kwargs)
-                # Back date the build file to the unix epoch
-                os.utime("/tmp/vllmcompile/build.ninja", (0, 0))
-                return result
-
-            torch_cpp_ext._write_ninja_file = write_ninja_file
             super().__init__(*args, **kwargs)
 
         def build_extensions(self) -> None:

From 5d94f52d215a835af4453b7ca8188eb81b484bc0 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 7 Feb 2024 10:26:49 -0800
Subject: [PATCH 4/7] add documentation

---
 setup.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/setup.py b/setup.py
index a22d0a6180fec..5c2371ded4256 100644
--- a/setup.py
+++ b/setup.py
@@ -22,6 +22,12 @@
 ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942"}
 # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)
 
+# You can use
+# VLLM_INCREMENTAL_BUILD_TORCH_PATH=`python -c "import torch; print(torch.__path__[0])"` pip install -e . --verbose
+# to do an incremental build of vLLM. Alternatively, you can use
+# `python setup.py develop` without extra environment variables, which
+# has even less overhead, but is unfortunately deprecated upstream
+# (https://github.com/pypa/setuptools/issues/917).
 if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ:
     torch_cpp_ext._TORCH_PATH = os.environ["VLLM_INCREMENTAL_BUILD_TORCH_PATH"]
 

From 476c12047c457701576e3519e171fb7a86d0215f Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 7 Feb 2024 10:43:49 -0800
Subject: [PATCH 5/7] add docs

---
 docs/source/getting_started/installation.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst
index 911c3d8f9a4ab..65cf92b5ad21e 100644
--- a/docs/source/getting_started/installation.rst
+++ b/docs/source/getting_started/installation.rst
@@ -67,3 +67,14 @@ You can also build and install vLLM from source:
 
         $ # Use `--ipc=host` to make sure the shared memory is large enough.
         $ docker run --gpus all -it --rm --ipc=host nvcr.io/nvidia/pytorch:23.10-py3
+
+.. note::
+    If you are developing on the C++ code in vLLM, you can use the following flag to
+    do incremental compilation.
+
+    .. code-block:: console
+
+        $ VLLM_INCREMENTAL_BUILD_TORCH_PATH=`python -c "import torch; print(torch.__path__[0])"` pip install -e . --verbose
+
+    Alternatively, you can use ``python setup.py develop`` which is even faster but
+    deprecated by setuptools.

From ec1941f3d8a0ad31d5bb25395a476d3edb69d486 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 7 Feb 2024 11:42:30 -0800
Subject: [PATCH 6/7] update

---
 docs/source/getting_started/installation.rst |  9 +++---
 setup.py                                     | 30 ++++----------------
 2 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst
index 65cf92b5ad21e..77b0ae65838a8 100644
--- a/docs/source/getting_started/installation.rst
+++ b/docs/source/getting_started/installation.rst
@@ -69,12 +69,11 @@ You can also build and install vLLM from source:
         $ docker run --gpus all -it --rm --ipc=host nvcr.io/nvidia/pytorch:23.10-py3
 
 .. note::
-    If you are developing on the C++ code in vLLM, you can use the following flag to
-    do incremental compilation.
+    If you are developing the C++ backend of vLLM, consider building vLLM with
 
     .. code-block:: console
 
-        $ VLLM_INCREMENTAL_BUILD_TORCH_PATH=`python -c "import torch; print(torch.__path__[0])"` pip install -e . --verbose
+        $ python setup.py develop
 
-    Alternatively, you can use ``python setup.py develop`` which is even faster but
-    deprecated by setuptools.
+    since it will give you incremental builds. The downside is that this method
+    is `deprecated by setuptools <https://github.com/pypa/setuptools/issues/917>`_.
diff --git a/setup.py b/setup.py
index 5c2371ded4256..a3684cb8acd32 100644
--- a/setup.py
+++ b/setup.py
@@ -15,6 +15,11 @@
 
 ROOT_DIR = os.path.dirname(__file__)
 
+# If you are developing the C++ backend of vLLM, consider building vLLM with
+# `python setup.py develop` since it will give you incremental builds.
+# The downside is that this method is deprecated, see
+# https://github.com/pypa/setuptools/issues/917
+
 MAIN_CUDA_VERSION = "12.1"
 
 # Supported NVIDIA GPU architectures.
@@ -22,16 +27,6 @@
 ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942"}
 # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)
 
-# You can use
-# VLLM_INCREMENTAL_BUILD_TORCH_PATH=`python -c "import torch; print(torch.__path__[0])"` pip install -e . --verbose
-# to do an incremental build of vLLM. Alternatively, you can use
-# `python setup.py develop` without extra environment variables, which
-# has even less overhead, but is unfortunately deprecated upstream
-# (https://github.com/pypa/setuptools/issues/917).
-if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ:
-    torch_cpp_ext._TORCH_PATH = os.environ["VLLM_INCREMENTAL_BUILD_TORCH_PATH"]
-
-
 def _is_hip() -> bool:
     return torch.version.hip is not None
 
@@ -441,19 +436,6 @@ def get_requirements() -> List[str]:
     ext_modules = []
     package_data["vllm"].append("*.so")
 
-if "VLLM_INCREMENTAL_BUILD_TORCH_PATH" in os.environ:
-    # This is an optional hack to allow incremental compilation.
-    class VllmBuildExtension(BuildExtension):
-
-        def __init__(self, *args, **kwargs):
-            super().__init__(*args, **kwargs)
-
-        def build_extensions(self) -> None:
-            self.build_temp = "/tmp/vllmcompile/"
-            super().build_extensions()
-else:
-    VllmBuildExtension = BuildExtension
-
 setuptools.setup(
     name="vllm",
     version=get_vllm_version(),
@@ -481,6 +463,6 @@ def build_extensions(self) -> None:
     python_requires=">=3.8",
     install_requires=get_requirements(),
     ext_modules=ext_modules,
-    cmdclass={"build_ext": VllmBuildExtension} if not _is_neuron() else {},
+    cmdclass={"build_ext": BuildExtension} if not _is_neuron() else {},
     package_data=package_data,
 )

From 2c33d74cfe4f2b2735a5e57611e9e95d38e41df7 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 7 Feb 2024 11:43:04 -0800
Subject: [PATCH 7/7] format

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index a3684cb8acd32..60efed0720ff1 100644
--- a/setup.py
+++ b/setup.py
@@ -27,6 +27,7 @@
 ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942"}
 # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)
 
+
 def _is_hip() -> bool:
     return torch.version.hip is not None