From b733a840010c054f3bb069e49335e9c7926d5a35 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Tue, 16 Jul 2024 18:26:46 +0000
Subject: [PATCH] format

---
 vllm/attention/backends/placeholder_attn.py | 6 ++++--
 vllm/engine/llm_engine.py                   | 2 +-
 vllm/worker/model_runner.py                 | 4 ++--
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/vllm/attention/backends/placeholder_attn.py b/vllm/attention/backends/placeholder_attn.py
index 6bc766ba4e3f7..f5728756c6e5d 100644
--- a/vllm/attention/backends/placeholder_attn.py
+++ b/vllm/attention/backends/placeholder_attn.py
@@ -1,8 +1,10 @@
 from dataclasses import dataclass
-from typing import (List, Optional, Tuple, Type)
+from typing import List, Optional, Tuple, Type
+
+import torch
+
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                               AttentionMetadata)
-import torch
 
 # Placeholder attention backend for models like Mamba that don't have attention.
 # Mainly exists to sidestep get_attn_backend.
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index c43f7fcb85484..f1ce03171ebf7 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -261,7 +261,7 @@ def __init__(
 
         if not self.model_config.embedding_mode:
             # For all decoders including attention-free models like mamba,
-            # this must call _initialize_kv_caches, as this is where model 
+            # this must call _initialize_kv_caches, as this is where model
             # warmup and CUDA graphs creation happens.
             self._initialize_kv_caches()
 
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 459798e418c30..2f4a0657c3f1a 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -23,8 +23,8 @@
     FLASHINFER_WORKSPACE_BUFFER_SIZE = 0
 
 from vllm.attention import AttentionMetadata, get_attn_backend
-from vllm.attention.backends.placeholder_attn import PlaceholderAttentionBackend
-
+from vllm.attention.backends.placeholder_attn import (
+    PlaceholderAttentionBackend)
 from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
                          ModelConfig, MultiModalConfig, ParallelConfig,
                          PromptAdapterConfig, SchedulerConfig)