diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py index 57b0cb04e949..391e5b7a3998 100755 --- a/examples/pytorch/question-answering/run_qa.py +++ b/examples/pytorch/question-answering/run_qa.py @@ -283,6 +283,7 @@ def main(): cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, + ort = training_args.ort, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index cf535a719c8b..c078b14b4c5d 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -1414,6 +1414,7 @@ class RobertaForQuestionAnswering(RobertaPreTrainedModel): def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels + self.ort = config.ort self.roberta = RobertaModel(config, add_pooling_layer=False) self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) @@ -1480,7 +1481,7 @@ def forward( if len(end_positions.size()) > 1: end_positions = end_positions.squeeze(-1) # sometimes the start/end positions are outside our model inputs, we ignore these terms - ignored_index = start_logits.size(1) + ignored_index = start_logits.size(1) if not self.ort else 344 start_positions.clamp_(0, ignored_index) end_positions.clamp_(0, ignored_index)