pytorch · lxning · Jul 6, 2021 · Jun 24, 2021 · Jun 24, 2021 · Jun 24, 2021
diff --git a/examples/Huggingface_Transformers/Download_Transformer_models.py b/examples/Huggingface_Transformers/Download_Transformer_models.py
@@ -49,8 +49,9 @@ def transformers_model_dowloader(mode,pretrained_model_name,num_labels,do_lower_
         dummy_input = "This is a dummy input for torch jit trace"
         inputs = tokenizer.encode_plus(dummy_input,max_length = int(max_length),pad_to_max_length = True, add_special_tokens = True, return_tensors = 'pt')
         input_ids = inputs["input_ids"].to(device)
+        attention_mask = inputs["attention_mask"].to(device)
         model.to(device).eval()
-        traced_model = torch.jit.trace(model, [input_ids])
+        traced_model = torch.jit.trace(model, (input_ids, attention_mask))
         torch.jit.save(traced_model,os.path.join(NEW_DIR, "traced_model.pt"))
     return
 if __name__== "__main__":

diff --git a/examples/Huggingface_Transformers/README.md b/examples/Huggingface_Transformers/README.md
@@ -74,10 +74,17 @@ For examples of how to configure a model for a use case and what the input forma
 
 ## Sequence Classification
 
-### Create model archive 
+### Create model archive eager mode
 
 ```
-torch-model-archiver --model-name BERTSeqClassification --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files "Transformer_model/config.json,./setup_config.json,./Seq_classification_artifacts/index_to_name.json" --model-file model.py
+torch-model-archiver --model-name BERTSeqClassification --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files "Transformer_model/config.json,./setup_config.json,./Seq_classification_artifacts/index_to_name.json"
+
+```
+
+### Create model archive Torchscript mode
+
+```
+torch-model-archiver --model-name BERTSeqClassification --version 1.0 --serialized-file Transformer_model/traced_model.pt --handler ./Transformer_handler_generalized.py --extra-files "./setup_config.json,./Seq_classification_artifacts/index_to_name.json"
 
 ```
 
@@ -119,11 +126,16 @@ rm -r Transformer_model
 python Download_Transformer_models.py
 ```
 
-### Create model archive
+### Create model archive eager mode
 ```
 torch-model-archiver --model-name BERTTokenClassification --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files "Transformer_model/config.json,./setup_config.json,./Token_classification_artifacts/index_to_name.json"
 ```
 
+### Create model archive Torchscript mode
+```
+torch-model-archiver --model-name BERTTokenClassification --version 1.0 --serialized-file Transformer_model/traced_model.pt --handler ./Transformer_handler_generalized.py --extra-files "./setup_config.json,./Token_classification_artifacts/index_to_name.json"
+```
+
 ### Register the model
 
 ```
@@ -157,11 +169,16 @@ rm -r Transformer_model
 python Download_Transformer_models.py
 ```
 
-### Create model archive
+### Create model archive eager mode
 ```
 torch-model-archiver --model-name BERTQA --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files "Transformer_model/config.json,./setup_config.json"
 ```
 
+### Create model archive Torchscript mode
+```
+torch-model-archiver --model-name BERTQA --version 1.0 --serialized-file Transformer_model/traced_model.pt --handler ./Transformer_handler_generalized.py --extra-files "./setup_config.json"
+```
+
 ### Register the model
 
 ```

diff --git a/examples/Huggingface_Transformers/Transformer_handler_generalized.py b/examples/Huggingface_Transformers/Transformer_handler_generalized.py
@@ -55,7 +55,7 @@ def initialize(self, ctx):
         # Loading the model and tokenizer from checkpoint and config files based on the user's choice of mode
         # further setup config can be added.
         if self.setup_config["save_mode"] == "torchscript":
-            self.model = torch.jit.load(model_pt_path)
+            self.model = torch.jit.load(model_pt_path, map_location=self.device)
         elif self.setup_config["save_mode"] == "pretrained":
             if self.setup_config["mode"] == "sequence_classification":
                 self.model = AutoModelForSequenceClassification.from_pretrained(
@@ -67,6 +67,8 @@ def initialize(self, ctx):
                 self.model = AutoModelForTokenClassification.from_pretrained(model_dir)
             else:
                 logger.warning("Missing the operation mode.")
+            self.model.to(self.device)
+
         else:
             logger.warning("Missing the checkpoint or state_dict.")
 
@@ -80,7 +82,6 @@ def initialize(self, ctx):
                 do_lower_case=self.setup_config["do_lower_case"],
             )
 
-        self.model.to(self.device)
         self.model.eval()
 
         logger.info(
@@ -106,7 +107,8 @@ def preprocess(self, requests):
         Returns:
             list : The preprocess function returns a list of Tensor for the size of the word tokens.
         """
-        input_batch = None
+        input_ids_batch = None
+        attention_mask_batch = None
         for idx, data in enumerate(requests):
             input_text = data.get("data")
             if input_text is None:
@@ -137,12 +139,17 @@ def preprocess(self, requests):
                 context = question_context["context"]
                 inputs = self.tokenizer.encode_plus(question, context, max_length=int(max_length), pad_to_max_length=True, add_special_tokens=True, return_tensors="pt")
             input_ids = inputs["input_ids"].to(self.device)
+            attention_mask = inputs["attention_mask"].to(self.device)
+            # making a batch out of the recieved requests
+            # attention masks are passed for cases where input tokens are padded.
             if input_ids.shape is not None:
-                if input_batch is None:
-                    input_batch = input_ids
+                if input_ids_batch is None:
+                    input_ids_batch = input_ids
+                    attention_mask_batch = attention_mask
                 else:
-                    input_batch = torch.cat((input_batch, input_ids), 0)
-        return input_batch
+                    input_ids_batch = torch.cat((input_ids_batch, input_ids), 0)
+                    attention_mask_batch = torch.cat((attention_mask_batch, attention_mask), 0)
+        return (input_ids_batch, attention_mask_batch)
 
     def inference(self, input_batch):
         """Predict the class (or classes) of the received text using the
@@ -152,11 +159,11 @@ def inference(self, input_batch):
         Returns:
             list : It returns a list of the predicted value for the input text
         """
-
+        input_ids_batch, attention_mask_batch = input_batch
         inferences = []
         # Handling inference for sequence_classification.
         if self.setup_config["mode"] == "sequence_classification":
-            predictions = self.model(input_batch)
+            predictions = self.model(input_ids_batch, attention_mask_batch)
             print("This the output size from the Seq classification model", predictions[0].size())
             print("This the output from the Seq classification model", predictions)
 
@@ -175,7 +182,7 @@ def inference(self, input_batch):
                 answer_start_scores = outputs.start_logits
                 answer_end_scores = outputs.end_logits
             else:
-                answer_start_scores, answer_end_scores = self.model(input_batch)
+                answer_start_scores, answer_end_scores = self.model(input_ids_batch, attention_mask_batch)
             print("This the output size for answer start scores from the question answering model", answer_start_scores.size())
             print("This the output for answer start scores from the question answering model", answer_start_scores)
             print("This the output size for answer end scores from the question answering model", answer_end_scores.size())
@@ -188,19 +195,19 @@ def inference(self, input_batch):
                 answer_start = torch.argmax(answer_start_scores_one_seq)
                 answer_end_scores_one_seq = answer_end_scores[i].unsqueeze(0)
                 answer_end = torch.argmax(answer_end_scores_one_seq) + 1
-                prediction = self.tokenizer.convert_tokens_to_string(self.tokenizer.convert_ids_to_tokens(input_batch[i].tolist()[answer_start:answer_end]))
+                prediction = self.tokenizer.convert_tokens_to_string(self.tokenizer.convert_ids_to_tokens(input_ids_batch[i].tolist()[answer_start:answer_end]))
                 inferences.append(prediction)
             logger.info("Model predicted: '%s'", prediction)
         # Handling inference for token_classification.
         elif self.setup_config["mode"]== "token_classification":
-            outputs = self.model(input_batch)[0]
+            outputs = self.model(input_ids_batch, attention_mask_batch)[0]
             print("This the output size from the token classification model", outputs.size())
             print("This the output from the token classification model",outputs)
             num_rows = outputs.shape[0]
             for i in range(num_rows):
                 output = outputs[i].unsqueeze(0)
                 predictions = torch.argmax(output, dim=2)
-                tokens = self.tokenizer.tokenize(self.tokenizer.decode(input_batch[i]))
+                tokens = self.tokenizer.tokenize(self.tokenizer.decode(input_ids_batch[i]))
                 if self.mapping:
                     label_list = self.mapping["label_list"]
                 label_list = label_list.strip('][').split(', ')

diff --git a/examples/Huggingface_Transformers/model.py b/examples/Huggingface_Transformers/model.py