diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index f98fb7e4f01df..8b74f06e77be0 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -828,7 +828,7 @@ def build(self) -> ModelInputForGPU: cuda_graph_pad_size = self._get_cuda_graph_pad_size( num_seqs=len(seq_lens), - max_decode_seq_len=max_encoder_seq_len, + max_decode_seq_len=max_decode_seq_len, max_encoder_seq_len=max_encoder_seq_len) batch_size = len(input_tokens)