From b733a840010c054f3bb069e49335e9c7926d5a35 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Tue, 16 Jul 2024 18:26:46 +0000 Subject: [PATCH] format --- vllm/attention/backends/placeholder_attn.py | 6 ++++-- vllm/engine/llm_engine.py | 2 +- vllm/worker/model_runner.py | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/vllm/attention/backends/placeholder_attn.py b/vllm/attention/backends/placeholder_attn.py index 6bc766ba4e3f7..f5728756c6e5d 100644 --- a/vllm/attention/backends/placeholder_attn.py +++ b/vllm/attention/backends/placeholder_attn.py @@ -1,8 +1,10 @@ from dataclasses import dataclass -from typing import (List, Optional, Tuple, Type) +from typing import List, Optional, Tuple, Type + +import torch + from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, AttentionMetadata) -import torch # Placeholder attention backend for models like Mamba that don't have attention. # Mainly exists to sidestep get_attn_backend. diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index c43f7fcb85484..f1ce03171ebf7 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -261,7 +261,7 @@ def __init__( if not self.model_config.embedding_mode: # For all decoders including attention-free models like mamba, - # this must call _initialize_kv_caches, as this is where model + # this must call _initialize_kv_caches, as this is where model # warmup and CUDA graphs creation happens. self._initialize_kv_caches() diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 459798e418c30..2f4a0657c3f1a 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -23,8 +23,8 @@ FLASHINFER_WORKSPACE_BUFFER_SIZE = 0 from vllm.attention import AttentionMetadata, get_attn_backend -from vllm.attention.backends.placeholder_attn import PlaceholderAttentionBackend - +from vllm.attention.backends.placeholder_attn import ( + PlaceholderAttentionBackend) from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig, ModelConfig, MultiModalConfig, ParallelConfig, PromptAdapterConfig, SchedulerConfig)