format

neuralmagic · Jul 16, 2024 · b733a84 · b733a84
1 parent adb6713
commit b733a84
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 5 deletions.
diff --git a/vllm/attention/backends/placeholder_attn.py b/vllm/attention/backends/placeholder_attn.py
@@ -1,8 +1,10 @@
 from dataclasses import dataclass
-from typing import (List, Optional, Tuple, Type)
+from typing import List, Optional, Tuple, Type
+
+import torch
+
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                               AttentionMetadata)
-import torch
 
 # Placeholder attention backend for models like Mamba that don't have attention.
 # Mainly exists to sidestep get_attn_backend.

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
@@ -261,7 +261,7 @@ def __init__(
 
         if not self.model_config.embedding_mode:
             # For all decoders including attention-free models like mamba,
-            # this must call _initialize_kv_caches, as this is where model 
+            # this must call _initialize_kv_caches, as this is where model
             # warmup and CUDA graphs creation happens.
             self._initialize_kv_caches()
 

diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
@@ -23,8 +23,8 @@
     FLASHINFER_WORKSPACE_BUFFER_SIZE = 0
 
 from vllm.attention import AttentionMetadata, get_attn_backend
-from vllm.attention.backends.placeholder_attn import PlaceholderAttentionBackend
-
+from vllm.attention.backends.placeholder_attn import (
+    PlaceholderAttentionBackend)
 from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
                          ModelConfig, MultiModalConfig, ParallelConfig,
                          PromptAdapterConfig, SchedulerConfig)