Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing issue #1137 and adding Torchscript archiving commands + fixing initialization for Torchscript models. #1141

Merged
merged 9 commits into from
Jul 6, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ def transformers_model_dowloader(mode,pretrained_model_name,num_labels,do_lower_
dummy_input = "This is a dummy input for torch jit trace"
inputs = tokenizer.encode_plus(dummy_input,max_length = int(max_length),pad_to_max_length = True, add_special_tokens = True, return_tensors = 'pt')
input_ids = inputs["input_ids"].to(device)
attention_mask = inputs["attention_mask"].to(device)
model.to(device).eval()
traced_model = torch.jit.trace(model, [input_ids])
traced_model = torch.jit.trace(model, (input_ids, attention_mask))
HamidShojanazeri marked this conversation as resolved.
Show resolved Hide resolved
torch.jit.save(traced_model,os.path.join(NEW_DIR, "traced_model.pt"))
return
if __name__== "__main__":
Expand Down
25 changes: 21 additions & 4 deletions examples/Huggingface_Transformers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,17 @@ For examples of how to configure a model for a use case and what the input forma

## Sequence Classification

### Create model archive
### Create model archive eager mode

```
torch-model-archiver --model-name BERTSeqClassification --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files "Transformer_model/config.json,./setup_config.json,./Seq_classification_artifacts/index_to_name.json" --model-file model.py
torch-model-archiver --model-name BERTSeqClassification --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files "Transformer_model/config.json,./setup_config.json,./Seq_classification_artifacts/index_to_name.json"

```

### Create model archive Torchscript mode

```
torch-model-archiver --model-name BERTSeqClassification --version 1.0 --serialized-file Transformer_model/traced_model.pt --handler ./Transformer_handler_generalized.py --extra-files "./setup_config.json,./Seq_classification_artifacts/index_to_name.json"

```

Expand Down Expand Up @@ -119,11 +126,16 @@ rm -r Transformer_model
python Download_Transformer_models.py
```

### Create model archive
### Create model archive eager mode
```
torch-model-archiver --model-name BERTTokenClassification --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files "Transformer_model/config.json,./setup_config.json,./Token_classification_artifacts/index_to_name.json"
```

### Create model archive Torchscript mode
```
torch-model-archiver --model-name BERTTokenClassification --version 1.0 --serialized-file Transformer_model/traced_model.pt --handler ./Transformer_handler_generalized.py --extra-files "./setup_config.json,./Token_classification_artifacts/index_to_name.json"
```

### Register the model

```
Expand Down Expand Up @@ -157,11 +169,16 @@ rm -r Transformer_model
python Download_Transformer_models.py
```

### Create model archive
### Create model archive eager mode
```
torch-model-archiver --model-name BERTQA --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files "Transformer_model/config.json,./setup_config.json"
```

### Create model archive Torchscript mode
```
torch-model-archiver --model-name BERTQA --version 1.0 --serialized-file Transformer_model/traced_model.pt --handler ./Transformer_handler_generalized.py --extra-files "./setup_config.json"
```

### Register the model

```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def initialize(self, ctx):
# Loading the model and tokenizer from checkpoint and config files based on the user's choice of mode
# further setup config can be added.
if self.setup_config["save_mode"] == "torchscript":
self.model = torch.jit.load(model_pt_path)
self.model = torch.jit.load(model_pt_path, map_location=self.device)
elif self.setup_config["save_mode"] == "pretrained":
if self.setup_config["mode"] == "sequence_classification":
self.model = AutoModelForSequenceClassification.from_pretrained(
Expand All @@ -67,6 +67,8 @@ def initialize(self, ctx):
self.model = AutoModelForTokenClassification.from_pretrained(model_dir)
else:
logger.warning("Missing the operation mode.")
self.model.to(self.device)
HamidShojanazeri marked this conversation as resolved.
Show resolved Hide resolved

else:
logger.warning("Missing the checkpoint or state_dict.")

Expand All @@ -80,7 +82,6 @@ def initialize(self, ctx):
do_lower_case=self.setup_config["do_lower_case"],
)

self.model.to(self.device)
self.model.eval()

logger.info(
Expand All @@ -106,7 +107,8 @@ def preprocess(self, requests):
Returns:
list : The preprocess function returns a list of Tensor for the size of the word tokens.
"""
input_batch = None
input_ids_batch = None
attention_mask_batch = None
for idx, data in enumerate(requests):
input_text = data.get("data")
if input_text is None:
Expand Down Expand Up @@ -137,12 +139,17 @@ def preprocess(self, requests):
context = question_context["context"]
inputs = self.tokenizer.encode_plus(question, context, max_length=int(max_length), pad_to_max_length=True, add_special_tokens=True, return_tensors="pt")
input_ids = inputs["input_ids"].to(self.device)
attention_mask = inputs["attention_mask"].to(self.device)
# making a batch out of the recieved requests
# attention masks are passed for cases where input tokens are padded.
if input_ids.shape is not None:
HamidShojanazeri marked this conversation as resolved.
Show resolved Hide resolved
if input_batch is None:
input_batch = input_ids
if input_ids_batch is None:
input_ids_batch = input_ids
attention_mask_batch = attention_mask
else:
input_batch = torch.cat((input_batch, input_ids), 0)
return input_batch
input_ids_batch = torch.cat((input_ids_batch, input_ids), 0)
attention_mask_batch = torch.cat((attention_mask_batch, attention_mask), 0)
HamidShojanazeri marked this conversation as resolved.
Show resolved Hide resolved
return (input_ids_batch, attention_mask_batch)

def inference(self, input_batch):
"""Predict the class (or classes) of the received text using the
Expand All @@ -152,11 +159,11 @@ def inference(self, input_batch):
Returns:
list : It returns a list of the predicted value for the input text
"""

input_ids_batch, attention_mask_batch = input_batch
inferences = []
# Handling inference for sequence_classification.
if self.setup_config["mode"] == "sequence_classification":
predictions = self.model(input_batch)
predictions = self.model(input_ids_batch, attention_mask_batch)
print("This the output size from the Seq classification model", predictions[0].size())
print("This the output from the Seq classification model", predictions)

Expand All @@ -175,7 +182,7 @@ def inference(self, input_batch):
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits
else:
answer_start_scores, answer_end_scores = self.model(input_batch)
answer_start_scores, answer_end_scores = self.model(input_ids_batch, attention_mask_batch)
print("This the output size for answer start scores from the question answering model", answer_start_scores.size())
print("This the output for answer start scores from the question answering model", answer_start_scores)
print("This the output size for answer end scores from the question answering model", answer_end_scores.size())
Expand All @@ -188,19 +195,19 @@ def inference(self, input_batch):
answer_start = torch.argmax(answer_start_scores_one_seq)
answer_end_scores_one_seq = answer_end_scores[i].unsqueeze(0)
answer_end = torch.argmax(answer_end_scores_one_seq) + 1
prediction = self.tokenizer.convert_tokens_to_string(self.tokenizer.convert_ids_to_tokens(input_batch[i].tolist()[answer_start:answer_end]))
prediction = self.tokenizer.convert_tokens_to_string(self.tokenizer.convert_ids_to_tokens(input_ids_batch[i].tolist()[answer_start:answer_end]))
inferences.append(prediction)
logger.info("Model predicted: '%s'", prediction)
# Handling inference for token_classification.
elif self.setup_config["mode"]== "token_classification":
outputs = self.model(input_batch)[0]
outputs = self.model(input_ids_batch, attention_mask_batch)[0]
print("This the output size from the token classification model", outputs.size())
print("This the output from the token classification model",outputs)
num_rows = outputs.shape[0]
for i in range(num_rows):
output = outputs[i].unsqueeze(0)
predictions = torch.argmax(output, dim=2)
tokens = self.tokenizer.tokenize(self.tokenizer.decode(input_batch[i]))
tokens = self.tokenizer.tokenize(self.tokenizer.decode(input_ids_batch[i]))
if self.mapping:
label_list = self.mapping["label_list"]
label_list = label_list.strip('][').split(', ')
Expand Down
6 changes: 0 additions & 6 deletions examples/Huggingface_Transformers/model.py

This file was deleted.