From 60a475e157d2e9617a83b7aea3c8221dc1a880bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AA=91=E9=A9=AC=E5=B0=8F=E7=8C=AB?= <1435130236@qq.com> Date: Wed, 7 Sep 2022 13:32:51 +0800 Subject: [PATCH] [ModelingOutput]update roformer unittest (#3159) * add roformer unittest * add roformer unittest * update test_modeling * use relative import * reduce model config to accelerate testing * remove input_embedding from pretrained model * revert slow tag * update local branch * update get_vocab method * update get_vocab method * update test_chinese method * change absolute import * update unittest * update chinese test case * add roformer more output testing Co-authored-by: Guo Sheng Co-authored-by: liu zhengxi <380185688@qq.com> --- paddlenlp/transformers/roformer/modeling.py | 6 + tests/transformers/roformer/test_modeling.py | 205 ++++++++++++------- 2 files changed, 133 insertions(+), 78 deletions(-) diff --git a/paddlenlp/transformers/roformer/modeling.py b/paddlenlp/transformers/roformer/modeling.py index d4aa86629372..68d0b9e8522c 100644 --- a/paddlenlp/transformers/roformer/modeling.py +++ b/paddlenlp/transformers/roformer/modeling.py @@ -709,6 +709,12 @@ def get_input_embeddings(self) -> nn.Embedding: def set_input_embeddings(self, embedding: nn.Embedding): self.embeddings.word_embeddings = embedding + def get_input_embeddings(self) -> nn.Embedding: + return self.embeddings.word_embeddings + + def set_input_embeddings(self, embedding: nn.Embedding): + self.embeddings.word_embeddings = embedding + class RoFormerForQuestionAnswering(RoFormerPretrainedModel): r""" diff --git a/tests/transformers/roformer/test_modeling.py b/tests/transformers/roformer/test_modeling.py index 23a46ddc3c93..bc9c7e3945e5 100644 --- a/tests/transformers/roformer/test_modeling.py +++ b/tests/transformers/roformer/test_modeling.py @@ -16,14 +16,17 @@ import unittest from typing import Optional, Tuple from dataclasses import dataclass, fields, Field +from parameterized import parameterized_class import paddle +from paddle import Tensor -from paddlenlp.transformers import ( - RoFormerModel, RoFormerPretrainedModel, RoFormerForPretraining, - RoFormerForSequenceClassification, RoFormerForTokenClassification, - RoFormerForQuestionAnswering, RoFormerForMultipleChoice, - RoFormerForMaskedLM) +from paddlenlp.transformers import (RoFormerModel, RoFormerPretrainedModel, + RoFormerForSequenceClassification, + RoFormerForTokenClassification, + RoFormerForQuestionAnswering, + RoFormerForMultipleChoice, + RoFormerForMaskedLM) from ..test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin from ...testing_utils import slow @@ -67,6 +70,7 @@ class RoFormerModelTestConfig(RoFormerModelTestModelConfig): is_training: bool = False use_input_mask: bool = False use_token_type_ids: bool = True + type_sequence_label_size = 3 # used for sequence classification num_classes: int = 3 @@ -102,27 +106,43 @@ def prepare_config_and_inputs(self): if self.config.use_token_type_ids: token_type_ids = ids_tensor([config.batch_size, config.seq_length], config.type_vocab_size) + sequence_labels = None + token_labels = None + choice_labels = None + + if self.parent.use_labels: + sequence_labels = ids_tensor([self.batch_size], + self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], + self.num_classes) + choice_labels = ids_tensor([self.batch_size], self.num_choices) config = self.get_config() - return config, input_ids, token_type_ids, input_mask + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels def get_config(self) -> dict: return self.config.model_kwargs - def create_and_check_model( - self, - config, - input_ids, - token_type_ids, - input_mask, - ): + def __getattr__(self, key: str): + if not hasattr(self.config, key): + raise AttributeError(f'attribute <{key}> not exist') + return getattr(self.config, key) + + def create_and_check_model(self, config, input_ids: Tensor, + token_type_ids: Tensor, input_mask: Tensor, + sequence_labels: Tensor, token_labels: Tensor, + choice_labels: Tensor): model = RoFormerModel(**config) model.eval() result = model(input_ids, attention_mask=input_mask, - token_type_ids=token_type_ids) - result = model(input_ids, token_type_ids=token_type_ids) - result = model(input_ids) + token_type_ids=token_type_ids, + return_dict=self.parent.return_dict) + result = model(input_ids, + token_type_ids=token_type_ids, + return_dict=self.parent.return_dict) + result = model(input_ids, return_dict=self.parent.return_dict) + self.parent.assertEqual(result[0].shape, [ self.config.batch_size, self.config.seq_length, self.config.hidden_size @@ -130,13 +150,12 @@ def create_and_check_model( self.parent.assertEqual( result[1].shape, [self.config.batch_size, self.config.hidden_size]) - def create_and_check_for_multiple_choice( - self, - config, - input_ids, - token_type_ids, - input_mask, - ): + def create_and_check_for_multiple_choice(self, config, input_ids: Tensor, + token_type_ids: Tensor, + input_mask: Tensor, + sequence_labels: Tensor, + token_labels: Tensor, + choice_labels: Tensor): model = RoFormerForMultipleChoice(RoFormerModel(**config), num_choices=self.config.num_choices) model.eval() @@ -151,89 +170,113 @@ def create_and_check_for_multiple_choice( input_mask = input_mask.unsqueeze(1).expand( [-1, self.config.num_choices, -1]) - result = model( - multiple_choice_inputs_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - ) - self.parent.assertEqual( - result.shape, [self.config.batch_size, self.config.num_choices]) + result = model(multiple_choice_inputs_ids, + attention_mask=input_mask, + token_type_ids=token_type_ids, + labels=choice_labels, + return_dict=self.parent.return_dict) + + if paddle.is_tensor(result): + result = [result] + elif choice_labels is not None: + result = result[1:] - def create_and_check_for_question_answering(self, config, input_ids, - token_type_ids, input_mask): + self.parent.assertEqual( + result[0].shape, [self.config.batch_size, self.config.num_choices]) + + def create_and_check_for_question_answering(self, config, input_ids: Tensor, + token_type_ids: Tensor, + input_mask: Tensor, + sequence_labels: Tensor, + token_labels: Tensor, + choice_labels: Tensor): model = RoFormerForQuestionAnswering(RoFormerModel(**config)) model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - ) + result = model(input_ids, + attention_mask=input_mask, + token_type_ids=token_type_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + return_dict=self.parent.return_dict) + + if paddle.is_tensor(result): + result = [result] + elif choice_labels is not None: + result = result[1:] + self.parent.assertEqual( result[0].shape, [self.config.batch_size, self.config.seq_length]) self.parent.assertEqual( result[1].shape, [self.config.batch_size, self.config.seq_length]) def create_and_check_for_token_classification( - self, - config, - input_ids, - token_type_ids, - input_mask, - ): + self, config, input_ids: Tensor, token_type_ids: Tensor, + input_mask: Tensor, sequence_labels: Tensor, token_labels: Tensor, + choice_labels: Tensor): model = RoFormerForTokenClassification(RoFormerModel(**config), num_classes=self.num_classes) model.eval() result = model(input_ids, attention_mask=input_mask, - token_type_ids=token_type_ids) - self.parent.assertEqual(result.shape, [ + token_type_ids=token_type_ids, + labels=token_labels, + return_dict=self.parent.return_dict) + if paddle.is_tensor(result): + result = [result] + elif choice_labels is not None: + result = result[1:] + + self.parent.assertEqual(result[0].shape, [ self.config.batch_size, self.config.seq_length, self.config.num_classes ]) - def create_and_check_for_masked_lm( - self, - config, - input_ids, - token_type_ids, - input_mask, - ): + def create_and_check_for_masked_lm(self, config, input_ids: Tensor, + token_type_ids: Tensor, + input_mask: Tensor, + sequence_labels: Tensor, + token_labels: Tensor, + choice_labels: Tensor): model = RoFormerForMaskedLM(RoFormerModel(**config)) model.eval() result = model(input_ids, attention_mask=input_mask, - token_type_ids=token_type_ids) - self.parent.assertEqual(result.shape, [ + token_type_ids=token_type_ids, + labels=token_labels, + return_dict=self.parent.return_dict) + if paddle.is_tensor(result): + result = [result] + elif choice_labels is not None: + result = result[1:] + + self.parent.assertEqual(result[0].shape, [ self.config.batch_size, self.config.seq_length, self.config.vocab_size ]) def create_and_check_for_sequence_classification( - self, - config, - input_ids, - token_type_ids, - input_mask, - ): + self, config, input_ids: Tensor, token_type_ids: Tensor, + input_mask: Tensor, sequence_labels: Tensor, token_labels: Tensor, + choice_labels: Tensor): model = RoFormerForSequenceClassification( RoFormerModel(**config), num_classes=self.config.num_classes) model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - ) + result = model(input_ids, + attention_mask=input_mask, + token_type_ids=token_type_ids, + labels=sequence_labels, + return_dict=self.parent.return_dict) + if paddle.is_tensor(result): + result = [result] + elif choice_labels is not None: + result = result[1:] self.parent.assertEqual( - result.shape, [self.config.batch_size, self.config.num_classes]) + result[0].shape, [self.config.batch_size, self.config.num_classes]) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - ) = config_and_inputs + (config, input_ids, token_type_ids, input_mask, _, _, + _) = config_and_inputs inputs_dict = { "input_ids": input_ids, "token_type_ids": token_type_ids, @@ -242,15 +285,21 @@ def prepare_config_and_inputs_for_common(self): return config, inputs_dict +@parameterized_class(("return_dict", "use_labels"), [ + [False, False], + [False, True], + [True, False], + [True, True], +]) class RoFormerModelTest(ModelTesterMixin, unittest.TestCase): base_model_class = RoFormerModel + use_labels = False + return_dict = False - all_model_classes = ( - RoFormerModel, - RoFormerForMultipleChoice, - RoFormerForPretraining, - RoFormerForSequenceClassification, - ) + all_model_classes = (RoFormerModel, RoFormerForSequenceClassification, + RoFormerForTokenClassification, + RoFormerForQuestionAnswering, + RoFormerForMultipleChoice, RoFormerForMaskedLM) def setUp(self): self.model_tester = RoFormerModelTester(self)