From fc0f4bdcbe4baa0bd9859a23cc4fc097088a0fca Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Tue, 19 Oct 2021 15:43:08 -0700 Subject: [PATCH] rebase upstream and re-run make style/quality --- docs/source/index.rst | 108 +++++++++++--------- src/transformers/models/esm/modeling_esm.py | 8 +- 2 files changed, 64 insertions(+), 52 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 7310003699bc43..b74de3bf4dc61a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -189,146 +189,158 @@ Supported models 29. :doc:`ELECTRA ` (from Google Research/Stanford University) released with the paper `ELECTRA: Pre-training text encoders as discriminators rather than generators `__ by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning. -28. :doc:`ESM ` (from Facebook Research) released with the paper `Biological structure and function +30. :doc:`ESM ` (from Facebook Research) released with the paper `Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences `__ by Alexander Rives, Joshua Meier, Tom Sercu, Siddharth Goyal, Zeming Lin, Jason Liu, Demi Guo, Myle Ott, C. Lawrence Zitnick, Jerry Ma, and Rob Fergus. -29. :doc:`FlauBERT ` (from CNRS) released with the paper `FlauBERT: Unsupervised Language Model +31. :doc:`FlauBERT ` (from CNRS) released with the paper `FlauBERT: Unsupervised Language Model Pre-training for French `__ by Hang Le, Loïc Vial, Jibril Frej, Vincent Segonne, Maximin Coavoux, Benjamin Lecouteux, Alexandre Allauzen, Benoît Crabbé, Laurent Besacier, Didier Schwab. -30. :doc:`FNet ` (from Google Research) released with the paper `FNet: Mixing Tokens with Fourier +32. :doc:`FNet ` (from Google Research) released with the paper `FNet: Mixing Tokens with Fourier Transforms `__ by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon. -31. :doc:`Funnel Transformer ` (from CMU/Google Brain) released with the paper `Funnel-Transformer: +33. :doc:`Funnel Transformer ` (from CMU/Google Brain) released with the paper `Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing `__ by Zihang Dai, Guokun Lai, Yiming Yang, Quoc V. Le. -32. :doc:`GPT ` (from OpenAI) released with the paper `Improving Language Understanding by Generative +34. :doc:`GPT ` (from OpenAI) released with the paper `Improving Language Understanding by Generative Pre-Training `__ by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. -33. :doc:`GPT-2 ` (from OpenAI) released with the paper `Language Models are Unsupervised Multitask +35. :doc:`GPT-2 ` (from OpenAI) released with the paper `Language Models are Unsupervised Multitask Learners `__ by Alec Radford*, Jeffrey Wu*, Rewon Child, David Luan, Dario Amodei** and Ilya Sutskever**. -34. :doc:`GPT-J ` (from EleutherAI) released in the repository `kingoflolz/mesh-transformer-jax +36. :doc:`GPT-J ` (from EleutherAI) released in the repository `kingoflolz/mesh-transformer-jax `__ by Ben Wang and Aran Komatsuzaki. -35. :doc:`GPT Neo ` (from EleutherAI) released in the repository `EleutherAI/gpt-neo +37. :doc:`GPT Neo ` (from EleutherAI) released in the repository `EleutherAI/gpt-neo `__ by Sid Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. -36. :doc:`Hubert ` (from Facebook) released with the paper `HuBERT: Self-Supervised Speech +38. :doc:`Hubert ` (from Facebook) released with the paper `HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units `__ by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed. -37. :doc:`I-BERT ` (from Berkeley) released with the paper `I-BERT: Integer-only BERT Quantization +39. :doc:`I-BERT ` (from Berkeley) released with the paper `I-BERT: Integer-only BERT Quantization `__ by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer. -38. :doc:`LayoutLM ` (from Microsoft Research Asia) released with the paper `LayoutLM: Pre-training +40. :doc:`LayoutLM ` (from Microsoft Research Asia) released with the paper `LayoutLM: Pre-training of Text and Layout for Document Image Understanding `__ by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou. -39. :doc:`LayoutLMv2 ` (from Microsoft Research Asia) released with the paper `LayoutLMv2: +41. :doc:`LayoutLMv2 ` (from Microsoft Research Asia) released with the paper `LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding `__ by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou. -40. :doc:`LayoutXLM ` (from Microsoft Research Asia) released with the paper `LayoutXLM: +42. :doc:`LayoutXLM ` (from Microsoft Research Asia) released with the paper `LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding `__ by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha Zhang, Furu Wei. -41. :doc:`LED ` (from AllenAI) released with the paper `Longformer: The Long-Document Transformer +43. :doc:`LED ` (from AllenAI) released with the paper `Longformer: The Long-Document Transformer `__ by Iz Beltagy, Matthew E. Peters, Arman Cohan. -42. :doc:`Longformer ` (from AllenAI) released with the paper `Longformer: The Long-Document +44. :doc:`Longformer ` (from AllenAI) released with the paper `Longformer: The Long-Document Transformer `__ by Iz Beltagy, Matthew E. Peters, Arman Cohan. -43. :doc:`LUKE ` (from Studio Ousia) released with the paper `LUKE: Deep Contextualized Entity +45. :doc:`LUKE ` (from Studio Ousia) released with the paper `LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention `__ by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda, Yuji Matsumoto. -44. :doc:`LXMERT ` (from UNC Chapel Hill) released with the paper `LXMERT: Learning Cross-Modality +46. :doc:`LXMERT ` (from UNC Chapel Hill) released with the paper `LXMERT: Learning Cross-Modality Encoder Representations from Transformers for Open-Domain Question Answering `__ by Hao Tan and Mohit Bansal. -45. :doc:`M2M100 ` (from Facebook) released with the paper `Beyond English-Centric Multilingual +47. :doc:`M2M100 ` (from Facebook) released with the paper `Beyond English-Centric Multilingual Machine Translation `__ by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky, Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin. -46. :doc:`MarianMT ` Machine translation models trained using `OPUS `__ data by +48. :doc:`MarianMT ` Machine translation models trained using `OPUS `__ data by Jörg Tiedemann. The `Marian Framework `__ is being developed by the Microsoft Translator Team. -47. :doc:`MBart ` (from Facebook) released with the paper `Multilingual Denoising Pre-training for +49. :doc:`MBart ` (from Facebook) released with the paper `Multilingual Denoising Pre-training for Neural Machine Translation `__ by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov, Marjan Ghazvininejad, Mike Lewis, Luke Zettlemoyer. -48. :doc:`MBart-50 ` (from Facebook) released with the paper `Multilingual Translation with Extensible +50. :doc:`MBart-50 ` (from Facebook) released with the paper `Multilingual Translation with Extensible Multilingual Pretraining and Finetuning `__ by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav Chaudhary, Jiatao Gu, Angela Fan. -49. :doc:`Megatron-BERT ` (from NVIDIA) released with the paper `Megatron-LM: Training +51. :doc:`Megatron-BERT ` (from NVIDIA) released with the paper `Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism `__ by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro. -50. :doc:`Megatron-GPT2 ` (from NVIDIA) released with the paper `Megatron-LM: Training +52. :doc:`Megatron-GPT2 ` (from NVIDIA) released with the paper `Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism `__ by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro. -51. :doc:`MPNet ` (from Microsoft Research) released with the paper `MPNet: Masked and Permuted +53. :doc:`MPNet ` (from Microsoft Research) released with the paper `MPNet: Masked and Permuted Pre-training for Language Understanding `__ by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu. -52. :doc:`MT5 ` (from Google AI) released with the paper `mT5: A massively multilingual pre-trained +54. :doc:`MT5 ` (from Google AI) released with the paper `mT5: A massively multilingual pre-trained text-to-text transformer `__ by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel. -53. :doc:`Pegasus ` (from Google) released with the paper `PEGASUS: Pre-training with Extracted +55. :doc:`Pegasus ` (from Google) released with the paper `PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization `__ by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu. -54. :doc:`ProphetNet ` (from Microsoft Research) released with the paper `ProphetNet: Predicting +56. `PhoBERT `__ (from VinAI Research) released with + the paper `PhoBERT: Pre-trained language models for Vietnamese + `__ by Dat Quoc Nguyen and Anh Tuan Nguyen. +57. :doc:`ProphetNet ` (from Microsoft Research) released with the paper `ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training `__ by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou. -55. :doc:`Reformer ` (from Google Research) released with the paper `Reformer: The Efficient +58. :doc:`Reformer ` (from Google Research) released with the paper `Reformer: The Efficient Transformer `__ by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya. -56. :doc:`RemBERT ` (from Google Research) released with the paper `Rethinking embedding coupling in +59. :doc:`RemBERT ` (from Google Research) released with the paper `Rethinking embedding coupling in pre-trained language models `__ by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder. -57. :doc:`RoBERTa ` (from Facebook), released together with the paper a `Robustly Optimized BERT +60. :doc:`RoBERTa ` (from Facebook), released together with the paper a `Robustly Optimized BERT Pretraining Approach `__ by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. -58. :doc:`RoFormer ` (from ZhuiyiTechnology), released together with the paper a `RoFormer: +61. :doc:`RoFormer ` (from ZhuiyiTechnology), released together with the paper a `RoFormer: Enhanced Transformer with Rotary Position Embedding `__ by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu. -59. :doc:`SpeechEncoderDecoder ` -60. :doc:`SpeechToTextTransformer ` (from Facebook), released together with the paper +62. :doc:`SEW ` (from ASAPP) released with the paper `Performance-Efficiency Trade-offs in Unsupervised + Pre-training for Speech Recognition `__ by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu + Han, Kilian Q. Weinberger, Yoav Artzi. +63. :doc:`SEW-D ` (from ASAPP) released with the paper `Performance-Efficiency Trade-offs in + Unsupervised Pre-training for Speech Recognition `__ by Felix Wu, Kwangyoun Kim, + Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi. +64. :doc:`SpeechToTextTransformer ` (from Facebook), released together with the paper `fairseq S2T: Fast Speech-to-Text Modeling with fairseq `__ by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino. -61. :doc:`SpeechToTextTransformer2 ` (from Facebook), released together with the paper +65. :doc:`SpeechToTextTransformer2 ` (from Facebook), released together with the paper `Large-Scale Self- and Semi-Supervised Learning for Speech Translation `__ by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau. -62. :doc:`Splinter ` (from Tel Aviv University), released together with the paper `Few-Shot +66. :doc:`Splinter ` (from Tel Aviv University), released together with the paper `Few-Shot Question Answering by Pretraining Span Selection `__ by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy. -63. :doc:`SqueezeBert ` (from Berkeley) released with the paper `SqueezeBERT: What can computer +67. :doc:`SqueezeBert ` (from Berkeley) released with the paper `SqueezeBERT: What can computer vision teach NLP about efficient neural networks? `__ by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer. -64. :doc:`T5 ` (from Google AI) released with the paper `Exploring the Limits of Transfer Learning with a +68. :doc:`T5 ` (from Google AI) released with the paper `Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer `__ by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu. -65. :doc:`T5v1.1 ` (from Google AI) released in the repository +69. :doc:`T5v1.1 ` (from Google AI) released in the repository `google-research/text-to-text-transfer-transformer `__ by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu. -66. :doc:`TAPAS ` (from Google AI) released with the paper `TAPAS: Weakly Supervised Table Parsing via +70. :doc:`TAPAS ` (from Google AI) released with the paper `TAPAS: Weakly Supervised Table Parsing via Pre-training `__ by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos. -67. :doc:`Transformer-XL ` (from Google/CMU) released with the paper `Transformer-XL: +71. :doc:`Transformer-XL ` (from Google/CMU) released with the paper `Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context `__ by Zihang Dai*, Zhilin Yang*, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan Salakhutdinov. -68. :doc:`Vision Transformer (ViT) ` (from Google AI) released with the paper `An Image is Worth 16x16 +72. `TrOCR `__ (from Microsoft), released together + with the paper `TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models + `__ by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang, + Zhoujun Li, Furu Wei. +73. :doc:`Vision Transformer (ViT) ` (from Google AI) released with the paper `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale `__ by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. -69. :doc:`VisualBERT ` (from UCLA NLP) released with the paper `VisualBERT: A Simple and +74. :doc:`VisualBERT ` (from UCLA NLP) released with the paper `VisualBERT: A Simple and Performant Baseline for Vision and Language `__ by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang. -70. :doc:`Wav2Vec2 ` (from Facebook AI) released with the paper `wav2vec 2.0: A Framework for +75. :doc:`Wav2Vec2 ` (from Facebook AI) released with the paper `wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations `__ by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli. -71. :doc:`XLM ` (from Facebook) released together with the paper `Cross-lingual Language Model +76. :doc:`XLM ` (from Facebook) released together with the paper `Cross-lingual Language Model Pretraining `__ by Guillaume Lample and Alexis Conneau. -72. :doc:`XLM-ProphetNet ` (from Microsoft Research) released with the paper `ProphetNet: +77. :doc:`XLM-ProphetNet ` (from Microsoft Research) released with the paper `ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training `__ by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou. -73. :doc:`XLM-RoBERTa ` (from Facebook AI), released together with the paper `Unsupervised +78. :doc:`XLM-RoBERTa ` (from Facebook AI), released together with the paper `Unsupervised Cross-lingual Representation Learning at Scale `__ by Alexis Conneau*, Kartikay Khandelwal*, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. -74. :doc:`XLNet ` (from Google/CMU) released with the paper `​XLNet: Generalized Autoregressive +79. :doc:`XLNet ` (from Google/CMU) released with the paper `​XLNet: Generalized Autoregressive Pretraining for Language Understanding `__ by Zhilin Yang*, Zihang Dai*, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le. -75. :doc:`XLSR-Wav2Vec2 ` (from Facebook AI) released with the paper `Unsupervised +80. :doc:`XLSR-Wav2Vec2 ` (from Facebook AI) released with the paper `Unsupervised Cross-Lingual Representation Learning For Speech Recognition `__ by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael Auli. diff --git a/src/transformers/models/esm/modeling_esm.py b/src/transformers/models/esm/modeling_esm.py index 7538a4bbaca898..715ed58439e8a7 100755 --- a/src/transformers/models/esm/modeling_esm.py +++ b/src/transformers/models/esm/modeling_esm.py @@ -811,7 +811,7 @@ class PreTrainedModel @add_start_docstrings_to_model_forward(ESM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( - tokenizer_class=_TOKENIZER_FOR_DOC, + processor_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithPoolingAndCrossAttentions, config_class=_CONFIG_FOR_DOC, @@ -1130,7 +1130,7 @@ def set_output_embeddings(self, new_embeddings): @add_start_docstrings_to_model_forward(ESM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( - tokenizer_class=_TOKENIZER_FOR_DOC, + processor_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, @@ -1241,7 +1241,7 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(ESM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( - tokenizer_class=_TOKENIZER_FOR_DOC, + processor_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, @@ -1339,7 +1339,7 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(ESM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( - tokenizer_class=_TOKENIZER_FOR_DOC, + processor_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC,