From 8ca895484117e55c66c8b5643929866e634e5ce3 Mon Sep 17 00:00:00 2001 From: yudian0504 <138860534+yudian0504@users.noreply.github.com> Date: Tue, 22 Oct 2024 01:33:30 +0800 Subject: [PATCH] [Bugfix][Misc]: fix graph capture for decoder (#9549) --- vllm/worker/model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index f98fb7e4f01df..8b74f06e77be0 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -828,7 +828,7 @@ def build(self) -> ModelInputForGPU: cuda_graph_pad_size = self._get_cuda_graph_pad_size( num_seqs=len(seq_lens), - max_decode_seq_len=max_encoder_seq_len, + max_decode_seq_len=max_decode_seq_len, max_encoder_seq_len=max_encoder_seq_len) batch_size = len(input_tokens)