From 0844df5e753dd174c95a3f0dd30c6b4df648e89b Mon Sep 17 00:00:00 2001 From: Lalit Pagaria Date: Mon, 19 Oct 2020 22:46:10 +0200 Subject: [PATCH] Bumping up transformers version to 3.3.1 (#579) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [RAG] Bumping up transformers version to 3.3.x * Use Pytorch's native LayerNorm code, with default eps as 1e-12. Refer https://github.com/huggingface/transformers/pull/1089 Signed-off-by: lalitpagaria * Using apex's FusedLayerNorm if available instead of Pytorch LayerNorm * Remove pooling layer before converting to transformers Co-authored-by: Bogdan Kostić --- farm/conversion/transformers.py | 6 ++++++ farm/modeling/prediction_head.py | 8 +++++++- requirements.txt | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/farm/conversion/transformers.py b/farm/conversion/transformers.py index 6f076513f..60f05e2e0 100644 --- a/farm/conversion/transformers.py +++ b/farm/conversion/transformers.py @@ -182,6 +182,8 @@ def _convert_to_transformers_classification_regression(adaptive_model, predictio def _convert_to_transformers_qa(adaptive_model, prediction_head): # TODO add more infos to config + # remove pooling layer + adaptive_model.language_model.model.pooler = None # init model transformers_model = AutoModelForQuestionAnswering.from_config(adaptive_model.language_model.model.config) # transfer weights for language model + prediction head @@ -193,6 +195,8 @@ def _convert_to_transformers_qa(adaptive_model, prediction_head): @staticmethod def _convert_to_transformers_lm(adaptive_model, prediction_head): + # remove pooling layer + adaptive_model.language_model.model.pooler = None # init model transformers_model = AutoModelWithLMHead.from_config(adaptive_model.language_model.model.config) # transfer weights for language model + prediction head @@ -211,6 +215,8 @@ def _convert_to_transformers_lm(adaptive_model, prediction_head): @staticmethod def _convert_to_transformers_ner(adaptive_model, prediction_head): + # remove pooling layer + adaptive_model.language_model.model.pooler = None # add more info to config adaptive_model.language_model.model.config.num_labels = prediction_head.num_labels adaptive_model.language_model.model.config.id2label = {id: label for id, label in diff --git a/farm/modeling/prediction_head.py b/farm/modeling/prediction_head.py index 301dd805a..80810cf2f 100644 --- a/farm/modeling/prediction_head.py +++ b/farm/modeling/prediction_head.py @@ -4,7 +4,7 @@ import numpy as np from pathlib import Path -from transformers.modeling_bert import BertForPreTraining, BertLayerNorm, ACT2FN +from transformers.modeling_bert import BertForPreTraining, ACT2FN from transformers.modeling_auto import AutoModelForQuestionAnswering, AutoModelForTokenClassification, AutoModelForSequenceClassification from typing import List @@ -18,6 +18,12 @@ logger = logging.getLogger(__name__) +try: + from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm +except (ImportError, AttributeError) as e: + logger.info("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .") + BertLayerNorm = torch.nn.LayerNorm + class PredictionHead(nn.Module): """ Takes word embeddings from a language model and generates logits for a given task. Can also convert logits diff --git a/requirements.txt b/requirements.txt index a08883dfb..3c4403a26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,7 @@ sklearn seqeval mlflow==1.0.0 # huggingface repository -transformers==3.1.0 +transformers==3.3.1 # accessing dictionary elements with dot notation dotmap==1.3.0 # for inference-rest-apis