From 293fbd0786e5e41ccd545303b5f5d47006ada7c1 Mon Sep 17 00:00:00 2001 From: yaofengchen Date: Thu, 19 Sep 2024 02:44:21 +0000 Subject: [PATCH] fix ascend atten_mask --- lmdeploy/pytorch/backends/ascend/op_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lmdeploy/pytorch/backends/ascend/op_backend.py b/lmdeploy/pytorch/backends/ascend/op_backend.py index 721d4b8bd5..faee68a75e 100644 --- a/lmdeploy/pytorch/backends/ascend/op_backend.py +++ b/lmdeploy/pytorch/backends/ascend/op_backend.py @@ -101,7 +101,7 @@ def update_step_context(cls, step_context): for i in range(step_context.q_start_loc.size(0)): q_seq_len = int(step_context.q_seqlens[i]) kv_seq_len = int(step_context.kv_seqlens[i]) - if not step_context.is_decoding: + if not (step_context.is_decoding or is_unpaged_prefill): single_attention_mask = torch.logical_not( torch.tril( torch.ones(step_context.q_seqlens[i],