From 60a475e157d2e9617a83b7aea3c8221dc1a880bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=AA=91=E9=A9=AC=E5=B0=8F=E7=8C=AB?= <1435130236@qq.com>
Date: Wed, 7 Sep 2022 13:32:51 +0800
Subject: [PATCH] [ModelingOutput]update roformer unittest (#3159)

* add roformer unittest

* add roformer unittest

* update test_modeling

* use relative import

* reduce model config to accelerate testing

* remove input_embedding from pretrained model

* revert slow tag

* update local branch

* update get_vocab method

* update get_vocab method

* update test_chinese method

* change absolute import

* update unittest

* update chinese test case

* add roformer more output testing

Co-authored-by: Guo Sheng <guosheng@baidu.com>
Co-authored-by: liu zhengxi <380185688@qq.com>
---
 paddlenlp/transformers/roformer/modeling.py  |   6 +
 tests/transformers/roformer/test_modeling.py | 205 ++++++++++++-------
 2 files changed, 133 insertions(+), 78 deletions(-)

diff --git a/paddlenlp/transformers/roformer/modeling.py b/paddlenlp/transformers/roformer/modeling.py
index d4aa86629372..68d0b9e8522c 100644
--- a/paddlenlp/transformers/roformer/modeling.py
+++ b/paddlenlp/transformers/roformer/modeling.py
@@ -709,6 +709,12 @@ def get_input_embeddings(self) -> nn.Embedding:
     def set_input_embeddings(self, embedding: nn.Embedding):
         self.embeddings.word_embeddings = embedding
 
+    def get_input_embeddings(self) -> nn.Embedding:
+        return self.embeddings.word_embeddings
+
+    def set_input_embeddings(self, embedding: nn.Embedding):
+        self.embeddings.word_embeddings = embedding
+
 
 class RoFormerForQuestionAnswering(RoFormerPretrainedModel):
     r"""
diff --git a/tests/transformers/roformer/test_modeling.py b/tests/transformers/roformer/test_modeling.py
index 23a46ddc3c93..bc9c7e3945e5 100644
--- a/tests/transformers/roformer/test_modeling.py
+++ b/tests/transformers/roformer/test_modeling.py
@@ -16,14 +16,17 @@
 import unittest
 from typing import Optional, Tuple
 from dataclasses import dataclass, fields, Field
+from parameterized import parameterized_class
 
 import paddle
+from paddle import Tensor
 
-from paddlenlp.transformers import (
-    RoFormerModel, RoFormerPretrainedModel, RoFormerForPretraining,
-    RoFormerForSequenceClassification, RoFormerForTokenClassification,
-    RoFormerForQuestionAnswering, RoFormerForMultipleChoice,
-    RoFormerForMaskedLM)
+from paddlenlp.transformers import (RoFormerModel, RoFormerPretrainedModel,
+                                    RoFormerForSequenceClassification,
+                                    RoFormerForTokenClassification,
+                                    RoFormerForQuestionAnswering,
+                                    RoFormerForMultipleChoice,
+                                    RoFormerForMaskedLM)
 
 from ..test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
 from ...testing_utils import slow
@@ -67,6 +70,7 @@ class RoFormerModelTestConfig(RoFormerModelTestModelConfig):
     is_training: bool = False
     use_input_mask: bool = False
     use_token_type_ids: bool = True
+    type_sequence_label_size = 3
 
     # used for sequence classification
     num_classes: int = 3
@@ -102,27 +106,43 @@ def prepare_config_and_inputs(self):
         if self.config.use_token_type_ids:
             token_type_ids = ids_tensor([config.batch_size, config.seq_length],
                                         config.type_vocab_size)
+        sequence_labels = None
+        token_labels = None
+        choice_labels = None
+
+        if self.parent.use_labels:
+            sequence_labels = ids_tensor([self.batch_size],
+                                         self.type_sequence_label_size)
+            token_labels = ids_tensor([self.batch_size, self.seq_length],
+                                      self.num_classes)
+            choice_labels = ids_tensor([self.batch_size], self.num_choices)
 
         config = self.get_config()
-        return config, input_ids, token_type_ids, input_mask
+        return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
 
     def get_config(self) -> dict:
         return self.config.model_kwargs
 
-    def create_and_check_model(
-        self,
-        config,
-        input_ids,
-        token_type_ids,
-        input_mask,
-    ):
+    def __getattr__(self, key: str):
+        if not hasattr(self.config, key):
+            raise AttributeError(f'attribute <{key}> not exist')
+        return getattr(self.config, key)
+
+    def create_and_check_model(self, config, input_ids: Tensor,
+                               token_type_ids: Tensor, input_mask: Tensor,
+                               sequence_labels: Tensor, token_labels: Tensor,
+                               choice_labels: Tensor):
         model = RoFormerModel(**config)
         model.eval()
         result = model(input_ids,
                        attention_mask=input_mask,
-                       token_type_ids=token_type_ids)
-        result = model(input_ids, token_type_ids=token_type_ids)
-        result = model(input_ids)
+                       token_type_ids=token_type_ids,
+                       return_dict=self.parent.return_dict)
+        result = model(input_ids,
+                       token_type_ids=token_type_ids,
+                       return_dict=self.parent.return_dict)
+        result = model(input_ids, return_dict=self.parent.return_dict)
+
         self.parent.assertEqual(result[0].shape, [
             self.config.batch_size, self.config.seq_length,
             self.config.hidden_size
@@ -130,13 +150,12 @@ def create_and_check_model(
         self.parent.assertEqual(
             result[1].shape, [self.config.batch_size, self.config.hidden_size])
 
-    def create_and_check_for_multiple_choice(
-        self,
-        config,
-        input_ids,
-        token_type_ids,
-        input_mask,
-    ):
+    def create_and_check_for_multiple_choice(self, config, input_ids: Tensor,
+                                             token_type_ids: Tensor,
+                                             input_mask: Tensor,
+                                             sequence_labels: Tensor,
+                                             token_labels: Tensor,
+                                             choice_labels: Tensor):
         model = RoFormerForMultipleChoice(RoFormerModel(**config),
                                           num_choices=self.config.num_choices)
         model.eval()
@@ -151,89 +170,113 @@ def create_and_check_for_multiple_choice(
             input_mask = input_mask.unsqueeze(1).expand(
                 [-1, self.config.num_choices, -1])
 
-        result = model(
-            multiple_choice_inputs_ids,
-            attention_mask=input_mask,
-            token_type_ids=token_type_ids,
-        )
-        self.parent.assertEqual(
-            result.shape, [self.config.batch_size, self.config.num_choices])
+        result = model(multiple_choice_inputs_ids,
+                       attention_mask=input_mask,
+                       token_type_ids=token_type_ids,
+                       labels=choice_labels,
+                       return_dict=self.parent.return_dict)
+
+        if paddle.is_tensor(result):
+            result = [result]
+        elif choice_labels is not None:
+            result = result[1:]
 
-    def create_and_check_for_question_answering(self, config, input_ids,
-                                                token_type_ids, input_mask):
+        self.parent.assertEqual(
+            result[0].shape, [self.config.batch_size, self.config.num_choices])
+
+    def create_and_check_for_question_answering(self, config, input_ids: Tensor,
+                                                token_type_ids: Tensor,
+                                                input_mask: Tensor,
+                                                sequence_labels: Tensor,
+                                                token_labels: Tensor,
+                                                choice_labels: Tensor):
         model = RoFormerForQuestionAnswering(RoFormerModel(**config))
         model.eval()
-        result = model(
-            input_ids,
-            attention_mask=input_mask,
-            token_type_ids=token_type_ids,
-        )
+        result = model(input_ids,
+                       attention_mask=input_mask,
+                       token_type_ids=token_type_ids,
+                       start_positions=sequence_labels,
+                       end_positions=sequence_labels,
+                       return_dict=self.parent.return_dict)
+
+        if paddle.is_tensor(result):
+            result = [result]
+        elif choice_labels is not None:
+            result = result[1:]
+
         self.parent.assertEqual(
             result[0].shape, [self.config.batch_size, self.config.seq_length])
         self.parent.assertEqual(
             result[1].shape, [self.config.batch_size, self.config.seq_length])
 
     def create_and_check_for_token_classification(
-        self,
-        config,
-        input_ids,
-        token_type_ids,
-        input_mask,
-    ):
+            self, config, input_ids: Tensor, token_type_ids: Tensor,
+            input_mask: Tensor, sequence_labels: Tensor, token_labels: Tensor,
+            choice_labels: Tensor):
         model = RoFormerForTokenClassification(RoFormerModel(**config),
                                                num_classes=self.num_classes)
         model.eval()
         result = model(input_ids,
                        attention_mask=input_mask,
-                       token_type_ids=token_type_ids)
-        self.parent.assertEqual(result.shape, [
+                       token_type_ids=token_type_ids,
+                       labels=token_labels,
+                       return_dict=self.parent.return_dict)
+        if paddle.is_tensor(result):
+            result = [result]
+        elif choice_labels is not None:
+            result = result[1:]
+
+        self.parent.assertEqual(result[0].shape, [
             self.config.batch_size, self.config.seq_length,
             self.config.num_classes
         ])
 
-    def create_and_check_for_masked_lm(
-        self,
-        config,
-        input_ids,
-        token_type_ids,
-        input_mask,
-    ):
+    def create_and_check_for_masked_lm(self, config, input_ids: Tensor,
+                                       token_type_ids: Tensor,
+                                       input_mask: Tensor,
+                                       sequence_labels: Tensor,
+                                       token_labels: Tensor,
+                                       choice_labels: Tensor):
         model = RoFormerForMaskedLM(RoFormerModel(**config))
         model.eval()
         result = model(input_ids,
                        attention_mask=input_mask,
-                       token_type_ids=token_type_ids)
-        self.parent.assertEqual(result.shape, [
+                       token_type_ids=token_type_ids,
+                       labels=token_labels,
+                       return_dict=self.parent.return_dict)
+        if paddle.is_tensor(result):
+            result = [result]
+        elif choice_labels is not None:
+            result = result[1:]
+
+        self.parent.assertEqual(result[0].shape, [
             self.config.batch_size, self.config.seq_length,
             self.config.vocab_size
         ])
 
     def create_and_check_for_sequence_classification(
-        self,
-        config,
-        input_ids,
-        token_type_ids,
-        input_mask,
-    ):
+            self, config, input_ids: Tensor, token_type_ids: Tensor,
+            input_mask: Tensor, sequence_labels: Tensor, token_labels: Tensor,
+            choice_labels: Tensor):
         model = RoFormerForSequenceClassification(
             RoFormerModel(**config), num_classes=self.config.num_classes)
         model.eval()
-        result = model(
-            input_ids,
-            attention_mask=input_mask,
-            token_type_ids=token_type_ids,
-        )
+        result = model(input_ids,
+                       attention_mask=input_mask,
+                       token_type_ids=token_type_ids,
+                       labels=sequence_labels,
+                       return_dict=self.parent.return_dict)
+        if paddle.is_tensor(result):
+            result = [result]
+        elif choice_labels is not None:
+            result = result[1:]
         self.parent.assertEqual(
-            result.shape, [self.config.batch_size, self.config.num_classes])
+            result[0].shape, [self.config.batch_size, self.config.num_classes])
 
     def prepare_config_and_inputs_for_common(self):
         config_and_inputs = self.prepare_config_and_inputs()
-        (
-            config,
-            input_ids,
-            token_type_ids,
-            input_mask,
-        ) = config_and_inputs
+        (config, input_ids, token_type_ids, input_mask, _, _,
+         _) = config_and_inputs
         inputs_dict = {
             "input_ids": input_ids,
             "token_type_ids": token_type_ids,
@@ -242,15 +285,21 @@ def prepare_config_and_inputs_for_common(self):
         return config, inputs_dict
 
 
+@parameterized_class(("return_dict", "use_labels"), [
+    [False, False],
+    [False, True],
+    [True, False],
+    [True, True],
+])
 class RoFormerModelTest(ModelTesterMixin, unittest.TestCase):
     base_model_class = RoFormerModel
+    use_labels = False
+    return_dict = False
 
-    all_model_classes = (
-        RoFormerModel,
-        RoFormerForMultipleChoice,
-        RoFormerForPretraining,
-        RoFormerForSequenceClassification,
-    )
+    all_model_classes = (RoFormerModel, RoFormerForSequenceClassification,
+                         RoFormerForTokenClassification,
+                         RoFormerForQuestionAnswering,
+                         RoFormerForMultipleChoice, RoFormerForMaskedLM)
 
     def setUp(self):
         self.model_tester = RoFormerModelTester(self)