diff --git a/src/transformers/models/gpt2/configuration_gpt2.py b/src/transformers/models/gpt2/configuration_gpt2.py index ef1c591a279607..d56935f9991ecb 100644 --- a/src/transformers/models/gpt2/configuration_gpt2.py +++ b/src/transformers/models/gpt2/configuration_gpt2.py @@ -58,7 +58,7 @@ class GPT2Config(PretrainedConfig): Number of hidden layers in the Transformer encoder. n_head (`int`, *optional*, defaults to 12): Number of attention heads for each attention layer in the Transformer encoder. - n_inner (`int`, *optional*, defaults to None): + n_inner (`int`, *optional*): Dimensionality of the inner feed-forward layers. `None` will set it to 4 times n_embd activation_function (`str`, *optional*, defaults to `"gelu_new"`): Activation function, to be selected in the list `["relu", "silu", "gelu", "tanh", "gelu_new"]`. @@ -68,7 +68,7 @@ class GPT2Config(PretrainedConfig): The dropout ratio for the embeddings. attn_pdrop (`float`, *optional*, defaults to 0.1): The dropout ratio for the attention. - layer_norm_epsilon (`float`, *optional*, defaults to 1e-5): + layer_norm_epsilon (`float`, *optional*, defaults to 1e-05): The epsilon to use in the layer normalization layers. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. @@ -107,6 +107,10 @@ class GPT2Config(PretrainedConfig): Scale attention weights by dividing by sqrt(hidden_size).. use_cache (`bool`, *optional*, defaults to `True`): Whether or not the model should return the last key/values attentions (not used by all models). + bos_token_id (`int`, *optional*, defaults to 50256): + Beginning of stream token id + eos_token_id (`int`, *optional*, defaults to 50256): + End of stream token id scale_attn_by_inverse_layer_idx (`bool`, *optional*, defaults to `False`): Whether to additionally scale attention weights by `1 / layer_idx + 1`. reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`): diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 650b519eaa5722..d7b2aacdb279ff 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -325,7 +325,6 @@ "FlavaTextModel", "FocalNetModel", "FunnelTokenizerFast", - "GPT2Config", "GPT2Tokenizer", "GPT2TokenizerFast", "GPTBigCodeConfig",