Skip to content

Commit

Permalink
[LLM] Fix Qwen2 (#8584)
Browse files Browse the repository at this point in the history
* fix output_router_logits

* fix with __future__
  • Loading branch information
DrownFish19 authored Jun 12, 2024
1 parent 4609d07 commit 5bdf751
Showing 1 changed file with 6 additions and 13 deletions.
19 changes: 6 additions & 13 deletions paddlenlp/transformers/qwen2/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Paddle Qwen2 model."""
from __future__ import annotations

import math
import warnings
Expand Down Expand Up @@ -187,11 +188,11 @@ def scaled_dot_product_attention(
else:
# [ bz, seqlen, nhead, head_dim] -> [bs, nhead, seq_len, head_dim]
query_states = paddle.transpose(query_states, [0, 2, 1, 3])
# merge with the next tranpose
# merge with the next transpose
key_states = paddle.transpose(key_states, [0, 2, 1, 3])
value_states = paddle.transpose(value_states, [0, 2, 1, 3])

# matmul and devide by sqrt(head_dim)
# matmul and divide by sqrt(head_dim)
attn_weights = paddle.matmul(query_states / math.sqrt(head_dim), key_states.transpose([0, 1, 3, 2]))

if attn_weights.shape != [bsz, num_heads, q_len, kv_seq_len]:
Expand Down Expand Up @@ -1127,7 +1128,7 @@ def forward(self, prediction_scores, masked_lm_labels):
if self.enable_parallel_cross_entropy:
if prediction_scores.shape[-1] == self.config.vocab_size:
warnings.warn(
f"enable_parallel_cross_entropy, the vocab_size should be splited: {prediction_scores.shape[-1]}, {self.config.vocab_size}"
f"enable_parallel_cross_entropy, the vocab_size should be splitted: {prediction_scores.shape[-1]}, {self.config.vocab_size}"
)
self.loss_func = paddle.nn.CrossEntropyLoss(reduction="none", ignore_index=self.ignore_index)

Expand Down Expand Up @@ -1202,14 +1203,7 @@ def get_decoder(self):
return self.qwen2

def prepare_inputs_for_generation(
self,
input_ids,
use_cache=False,
past_key_values=None,
attention_mask=None,
inputs_embeds=None,
output_router_logits=False,
**kwargs
self, input_ids, use_cache=False, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
):
batch_size, seq_length = input_ids.shape
position_ids = kwargs.get("position_ids", paddle.arange(seq_length).expand((batch_size, seq_length)))
Expand All @@ -1230,7 +1224,6 @@ def prepare_inputs_for_generation(
"past_key_values": past_key_values,
"use_cache": use_cache,
"attention_mask": attention_mask,
"output_router_logits": output_router_logits,
}
)
return model_inputs
Expand Down Expand Up @@ -1325,7 +1318,7 @@ def forward(
hidden_states = outputs[0]

# if labels is None,means we need full output, instead of tensor_parallel_output
# tensor_parallel_output is togather with ParallelCrossEntropy
# tensor_parallel_output is together with ParallelCrossEntropy
tensor_parallel_output = (
self.config.tensor_parallel_output and labels is not None and self.config.tensor_parallel_degree > 1
)
Expand Down

0 comments on commit 5bdf751

Please sign in to comment.