From efb5d13f5b9cfb084ca2dc33777c391076dc201f Mon Sep 17 00:00:00 2001 From: eaidova Date: Fri, 30 Aug 2024 09:14:41 +0400 Subject: [PATCH] fix attention mask for glm4 --- optimum/exporters/openvino/model_patcher.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 6e65f4f11a..8cb745bd72 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -308,10 +308,9 @@ def _chatglm2_core_attention_forward(self, query_layer, key_layer, value_layer, def _glm4_core_attention_forward(self, query_layer, key_layer, value_layer, attention_mask): - attention_mask = ~attention_mask - context_layer = torch.nn.functional.scaled_dot_product_attention( - query_layer, key_layer, value_layer, attention_mask.to(torch.float32) - ) + causal_mask = torch.zeros_like(attention_mask, dtype=torch.float32) + causal_mask.masked_fill_(attention_mask, float("-inf")) + context_layer = torch.nn.functional.scaled_dot_product_attention(query_layer, key_layer, value_layer, causal_mask) context_layer = context_layer.transpose(1, 2).contiguous() new_context_layer_shape = context_layer.size()[:-2] + (self.hidden_size_per_partition,) context_layer = context_layer.reshape(*new_context_layer_shape)