Skip to content

Commit

Permalink
Bumping up transformers version to 3.3.1 (#579)
Browse files Browse the repository at this point in the history
* [RAG] Bumping up transformers version to 3.3.x

* Use Pytorch's native LayerNorm code, with default eps as 1e-12. Refer huggingface/transformers#1089

Signed-off-by: lalitpagaria <pagaria.lalit@gmail.com>

* Using apex's FusedLayerNorm if available instead of Pytorch LayerNorm

* Remove pooling layer before converting to transformers

Co-authored-by: Bogdan Kostić <bogdankostic@web.de>
  • Loading branch information
lalitpagaria and bogdankostic authored Oct 19, 2020
1 parent 2ab42d2 commit 0844df5
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 2 deletions.
6 changes: 6 additions & 0 deletions farm/conversion/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ def _convert_to_transformers_classification_regression(adaptive_model, predictio
def _convert_to_transformers_qa(adaptive_model, prediction_head):
# TODO add more infos to config

# remove pooling layer
adaptive_model.language_model.model.pooler = None
# init model
transformers_model = AutoModelForQuestionAnswering.from_config(adaptive_model.language_model.model.config)
# transfer weights for language model + prediction head
Expand All @@ -193,6 +195,8 @@ def _convert_to_transformers_qa(adaptive_model, prediction_head):

@staticmethod
def _convert_to_transformers_lm(adaptive_model, prediction_head):
# remove pooling layer
adaptive_model.language_model.model.pooler = None
# init model
transformers_model = AutoModelWithLMHead.from_config(adaptive_model.language_model.model.config)
# transfer weights for language model + prediction head
Expand All @@ -211,6 +215,8 @@ def _convert_to_transformers_lm(adaptive_model, prediction_head):

@staticmethod
def _convert_to_transformers_ner(adaptive_model, prediction_head):
# remove pooling layer
adaptive_model.language_model.model.pooler = None
# add more info to config
adaptive_model.language_model.model.config.num_labels = prediction_head.num_labels
adaptive_model.language_model.model.config.id2label = {id: label for id, label in
Expand Down
8 changes: 7 additions & 1 deletion farm/modeling/prediction_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np

from pathlib import Path
from transformers.modeling_bert import BertForPreTraining, BertLayerNorm, ACT2FN
from transformers.modeling_bert import BertForPreTraining, ACT2FN
from transformers.modeling_auto import AutoModelForQuestionAnswering, AutoModelForTokenClassification, AutoModelForSequenceClassification
from typing import List

Expand All @@ -18,6 +18,12 @@

logger = logging.getLogger(__name__)

try:
from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm
except (ImportError, AttributeError) as e:
logger.info("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .")
BertLayerNorm = torch.nn.LayerNorm


class PredictionHead(nn.Module):
""" Takes word embeddings from a language model and generates logits for a given task. Can also convert logits
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ sklearn
seqeval
mlflow==1.0.0
# huggingface repository
transformers==3.1.0
transformers==3.3.1
# accessing dictionary elements with dot notation
dotmap==1.3.0
# for inference-rest-apis
Expand Down

0 comments on commit 0844df5

Please sign in to comment.