Skip to content

Commit

Permalink
Backbone kwargs in config (#28784)
Browse files Browse the repository at this point in the history
* Enable instantiating model with pretrained backbone weights

* Clarify pretrained import

* Use load_backbone instead

* Add backbone_kwargs to config

* Pass kwargs to constructors

* Fix up

* Input verification

* Add tests

* Tidy up

* Update tests/utils/test_backbone_utils.py

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

---------

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
  • Loading branch information
2 people authored and Ita Zaporozhets committed May 14, 2024
1 parent cc24ecc commit d06ffc0
Show file tree
Hide file tree
Showing 16 changed files with 181 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ class ConditionalDetrConfig(PretrainedConfig):
is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights.
use_pretrained_backbone (`bool`, *optional*, defaults to `True`):
Whether to use pretrained weights for the backbone.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
dilation (`bool`, *optional*, defaults to `False`):
Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when
`use_timm_backbone` = `True`.
Expand Down Expand Up @@ -168,6 +171,7 @@ def __init__(
position_embedding_type="sine",
backbone="resnet50",
use_pretrained_backbone=True,
backbone_kwargs=None,
dilation=False,
class_cost=2,
bbox_cost=5,
Expand All @@ -191,6 +195,9 @@ def __init__(
if backbone_config is not None and use_timm_backbone:
raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.")

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

if not use_timm_backbone:
if backbone_config is None:
logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.")
Expand Down Expand Up @@ -224,6 +231,7 @@ def __init__(
self.position_embedding_type = position_embedding_type
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.backbone_kwargs = backbone_kwargs
self.dilation = dilation
# Hungarian matcher
self.class_cost = class_cost
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ class DeformableDetrConfig(PretrainedConfig):
is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights.
use_pretrained_backbone (`bool`, *optional*, defaults to `True`):
Whether to use pretrained weights for the backbone.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
dilation (`bool`, *optional*, defaults to `False`):
Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when
`use_timm_backbone` = `True`.
Expand Down Expand Up @@ -177,6 +180,7 @@ def __init__(
position_embedding_type="sine",
backbone="resnet50",
use_pretrained_backbone=True,
backbone_kwargs=None,
dilation=False,
num_feature_levels=4,
encoder_n_points=4,
Expand Down Expand Up @@ -207,6 +211,9 @@ def __init__(
if backbone_config is not None and use_timm_backbone:
raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.")

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

if not use_timm_backbone:
if backbone_config is None:
logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.")
Expand Down Expand Up @@ -238,6 +245,7 @@ def __init__(
self.position_embedding_type = position_embedding_type
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.backbone_kwargs = backbone_kwargs
self.dilation = dilation
# deformable attributes
self.num_feature_levels = num_feature_levels
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/models/deta/configuration_deta.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ class DetaConfig(PretrainedConfig):
use_timm_backbone (`bool`, *optional*, `False`):
Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers
library.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
num_queries (`int`, *optional*, defaults to 900):
Number of object queries, i.e. detection slots. This is the maximal number of objects [`DetaModel`] can
detect in a single image. In case `two_stage` is set to `True`, we use `two_stage_num_proposals` instead.
Expand Down Expand Up @@ -150,6 +153,7 @@ def __init__(
backbone=None,
use_pretrained_backbone=False,
use_timm_backbone=False,
backbone_kwargs=None,
num_queries=900,
max_position_embeddings=2048,
encoder_layers=6,
Expand Down Expand Up @@ -204,10 +208,14 @@ def __init__(
config_class = CONFIG_MAPPING[backbone_model_type]
backbone_config = config_class.from_dict(backbone_config)

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

self.backbone_config = backbone_config
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.use_timm_backbone = use_timm_backbone
self.backbone_kwargs = backbone_kwargs
self.num_queries = num_queries
self.max_position_embeddings = max_position_embeddings
self.d_model = d_model
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/models/detr/configuration_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ class DetrConfig(PretrainedConfig):
is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights.
use_pretrained_backbone (`bool`, *optional*, `True`):
Whether to use pretrained weights for the backbone.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
dilation (`bool`, *optional*, defaults to `False`):
Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when
`use_timm_backbone` = `True`.
Expand Down Expand Up @@ -166,6 +169,7 @@ def __init__(
position_embedding_type="sine",
backbone="resnet50",
use_pretrained_backbone=True,
backbone_kwargs=None,
dilation=False,
class_cost=1,
bbox_cost=5,
Expand All @@ -188,6 +192,9 @@ def __init__(
if backbone_config is not None and use_timm_backbone:
raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.")

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

if not use_timm_backbone:
if backbone_config is None:
logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.")
Expand Down Expand Up @@ -223,6 +230,7 @@ def __init__(
self.position_embedding_type = position_embedding_type
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.backbone_kwargs = backbone_kwargs
self.dilation = dilation
# Hungarian matcher
self.class_cost = class_cost
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/models/dpt/configuration_dpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ class DPTConfig(PretrainedConfig):
use_timm_backbone (`bool`, *optional*, defaults to `False`):
Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers
library.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
Example:
Expand Down Expand Up @@ -173,6 +176,7 @@ def __init__(
backbone=None,
use_pretrained_backbone=False,
use_timm_backbone=False,
backbone_kwargs=None,
**kwargs,
):
super().__init__(**kwargs)
Expand Down Expand Up @@ -230,9 +234,13 @@ def __init__(
if use_autobackbone and backbone_config is not None and backbone is not None:
raise ValueError("You can't specify both `backbone` and `backbone_config`.")

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.use_timm_backbone = use_timm_backbone
self.backbone_kwargs = backbone_kwargs
self.num_hidden_layers = None if use_autobackbone else num_hidden_layers
self.num_attention_heads = None if use_autobackbone else num_attention_heads
self.intermediate_size = None if use_autobackbone else intermediate_size
Expand Down
14 changes: 11 additions & 3 deletions src/transformers/models/mask2former/configuration_mask2former.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ class Mask2FormerConfig(PretrainedConfig):
use_timm_backbone (`bool`, *optional*, `False`):
Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers
library.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
feature_size (`int`, *optional*, defaults to 256):
The features (channels) of the resulting feature maps.
mask_feature_size (`int`, *optional*, defaults to 256):
Expand Down Expand Up @@ -163,9 +166,10 @@ def __init__(
use_auxiliary_loss: bool = True,
feature_strides: List[int] = [4, 8, 16, 32],
output_auxiliary_logits: bool = None,
backbone=None,
use_pretrained_backbone=False,
use_timm_backbone=False,
backbone: Optional[str] = None,
use_pretrained_backbone: bool = False,
use_timm_backbone: bool = False,
backbone_kwargs: Optional[Dict] = None,
**kwargs,
):
if use_pretrained_backbone:
Expand All @@ -189,6 +193,9 @@ def __init__(
out_features=["stage1", "stage2", "stage3", "stage4"],
)

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

if isinstance(backbone_config, dict):
backbone_model_type = backbone_config.pop("model_type")
config_class = CONFIG_MAPPING[backbone_model_type]
Expand Down Expand Up @@ -233,6 +240,7 @@ def __init__(
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.use_timm_backbone = use_timm_backbone
self.backbone_kwargs = backbone_kwargs

super().__init__(**kwargs)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ class MaskFormerConfig(PretrainedConfig):
use_timm_backbone (`bool`, *optional*, `False`):
Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers
library.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
decoder_config (`Dict`, *optional*):
The configuration passed to the transformer decoder model, if unset the base config for `detr-resnet-50`
will be used.
Expand Down Expand Up @@ -126,6 +129,7 @@ def __init__(
backbone: Optional[str] = None,
use_pretrained_backbone: bool = False,
use_timm_backbone: bool = False,
backbone_kwargs: Optional[Dict] = None,
**kwargs,
):
if use_pretrained_backbone:
Expand All @@ -134,6 +138,9 @@ def __init__(
if backbone_config is not None and backbone is not None:
raise ValueError("You can't specify both `backbone` and `backbone_config`.")

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

if backbone_config is None and backbone is None:
# fall back to https://huggingface.co/microsoft/swin-base-patch4-window12-384-in22k
backbone_config = SwinConfig(
Expand Down Expand Up @@ -198,6 +205,7 @@ def __init__(
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.use_timm_backbone = use_timm_backbone
self.backbone_kwargs = backbone_kwargs
super().__init__(**kwargs)

@classmethod
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/models/oneformer/configuration_oneformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ class OneFormerConfig(PretrainedConfig):
use_timm_backbone (`bool`, *optional*, defaults to `False`):
Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers
library.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
ignore_value (`int`, *optional*, defaults to 255):
Values to be ignored in GT label while calculating loss.
num_queries (`int`, *optional*, defaults to 150):
Expand Down Expand Up @@ -156,6 +159,7 @@ def __init__(
backbone: Optional[str] = None,
use_pretrained_backbone: bool = False,
use_timm_backbone: bool = False,
backbone_kwargs: Optional[Dict] = None,
ignore_value: int = 255,
num_queries: int = 150,
no_object_weight: int = 0.1,
Expand Down Expand Up @@ -223,10 +227,14 @@ def __init__(
config_class = CONFIG_MAPPING[backbone_model_type]
backbone_config = config_class.from_dict(backbone_config)

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

self.backbone_config = backbone_config
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.use_timm_backbone = use_timm_backbone
self.backbone_kwargs = backbone_kwargs
self.ignore_value = ignore_value
self.num_queries = num_queries
self.no_object_weight = no_object_weight
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ class TableTransformerConfig(PretrainedConfig):
is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights.
use_pretrained_backbone (`bool`, *optional*, `True`):
Whether to use pretrained weights for the backbone.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
dilation (`bool`, *optional*, defaults to `False`):
Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when
`use_timm_backbone` = `True`.
Expand Down Expand Up @@ -167,6 +170,7 @@ def __init__(
position_embedding_type="sine",
backbone="resnet50",
use_pretrained_backbone=True,
backbone_kwargs=None,
dilation=False,
class_cost=1,
bbox_cost=5,
Expand All @@ -189,6 +193,9 @@ def __init__(
if backbone_config is not None and use_timm_backbone:
raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.")

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

if not use_timm_backbone:
if backbone_config is None:
logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.")
Expand Down Expand Up @@ -224,6 +231,7 @@ def __init__(
self.position_embedding_type = position_embedding_type
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.backbone_kwargs = backbone_kwargs
self.dilation = dilation
# Hungarian matcher
self.class_cost = class_cost
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/models/tvp/configuration_tvp.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ class TvpConfig(PretrainedConfig):
use_timm_backbone (`bool`, *optional*, defaults to `False`):
Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers
library.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
distance_loss_weight (`float`, *optional*, defaults to 1.0):
The weight of distance loss.
duration_loss_weight (`float`, *optional*, defaults to 0.1):
Expand Down Expand Up @@ -107,6 +110,7 @@ def __init__(
backbone=None,
use_pretrained_backbone=False,
use_timm_backbone=False,
backbone_kwargs=None,
distance_loss_weight=1.0,
duration_loss_weight=0.1,
visual_prompter_type="framepad",
Expand Down Expand Up @@ -144,10 +148,14 @@ def __init__(
config_class = CONFIG_MAPPING[backbone_model_type]
backbone_config = config_class.from_dict(backbone_config)

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

self.backbone_config = backbone_config
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.use_timm_backbone = use_timm_backbone
self.backbone_kwargs = backbone_kwargs
self.distance_loss_weight = distance_loss_weight
self.duration_loss_weight = duration_loss_weight
self.visual_prompter_type = visual_prompter_type
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/models/upernet/configuration_upernet.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ class UperNetConfig(PretrainedConfig):
use_timm_backbone (`bool`, *optional*, `False`):
Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers
library.
backbone_kwargs (`dict`, *optional*):
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
hidden_size (`int`, *optional*, defaults to 512):
The number of hidden units in the convolutional layers.
initializer_range (`float`, *optional*, defaults to 0.02):
Expand Down Expand Up @@ -87,6 +90,7 @@ def __init__(
backbone=None,
use_pretrained_backbone=False,
use_timm_backbone=False,
backbone_kwargs=None,
hidden_size=512,
initializer_range=0.02,
pool_scales=[1, 2, 3, 6],
Expand Down Expand Up @@ -114,10 +118,14 @@ def __init__(
config_class = CONFIG_MAPPING[backbone_model_type]
backbone_config = config_class.from_dict(backbone_config)

if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")

self.backbone_config = backbone_config
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.use_timm_backbone = use_timm_backbone
self.backbone_kwargs = backbone_kwargs
self.hidden_size = hidden_size
self.initializer_range = initializer_range
self.pool_scales = pool_scales
Expand Down
Loading

0 comments on commit d06ffc0

Please sign in to comment.