diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9c88c31c83da1..f8d6a2be9feae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -195,9 +195,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   FetchContent_Declare(
         cutlass
         GIT_REPOSITORY https://github.com/nvidia/cutlass.git
-        # CUTLASS 3.5.1
-        GIT_TAG 06b21349bcf6ddf6a1686a47a137ad1446579db9 
+        GIT_TAG v3.5.1
         GIT_PROGRESS TRUE
+
+        # Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
+        # Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags.
+        # So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE
+        GIT_SHALLOW TRUE
   )
   FetchContent_MakeAvailable(cutlass)
 
@@ -231,6 +235,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
           "-gencode arch=compute_90a,code=sm_90a")
   endif()
 
+
   #
   # Machete kernels
 
@@ -289,6 +294,12 @@ define_gpu_extension_target(
   USE_SABI 3
   WITH_SOABI)
 
+# If CUTLASS is compiled on NVCC >= 12.5, it by default uses 
+# cudaGetDriverEntryPointByVersion as a wrapper to avoid directly calling the 
+# driver API. This causes problems when linking with earlier versions of CUDA.
+# Setting this variable sidesteps the issue by calling the driver directly.
+target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1)
+
 #
 # _moe_C extension
 #