open-mmlab · ZwwWayne · Mar 10, 2023 · Feb 17, 2023 · Feb 17, 2023 · Feb 21, 2023
diff --git a/projects/EfficientDet/REMEAD.md → projects/EfficientDet/README.md b/projects/EfficientDet/REMEAD.md → projects/EfficientDet/README.md
@@ -22,45 +22,29 @@ In contrast to other feature pyramid network, such as FPN, FPN + PAN, NAS-FPN, B
 
 ## Usage
 
-### Model conversion
+### Training command
 
-Firstly, download EfficientDet [weights](https://github.com/google/automl/tree/master/efficientdet) and unzip,  please use the following command
+In MMDetection's root directory, run the following command for single-gpu training:
 
 ```bash
-tar -xzvf {EFFICIENTDET_WEIGHT}
-```
-
-Then, install tensorflow, please use the following command
-
-```bash
-pip install tensorflow-gpu==2.6.0
-```
-
-Lastly, convert weights from tensorflow to pytorch, please use the following command
-
-```bash
-python projects/EfficientDet/convert_tf_to_pt.py --backbone {BACKBONE_NAME} --tensorflow_weight {TENSORFLOW_WEIGHT_PATH} --out_weight {OUT_PATH}
+python tools/train.py projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py
 ```
 
 ### Testing commands
 
 In MMDetection's root directory, run the following command to test the model:
 
 ```bash
-python tools/test.py projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py ${CHECKPOINT_PATH}
+python tools/test.py projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py ${CHECKPOINT_PATH}
 ```
 
 ## Results
 
-Based on mmdetection, this project aligns the test accuracy of the [official model](https://github.com/google/automl).
-<br>
-If you want to reproduce the test results, you need to convert model weights first, then run the test command.
-<br>
-The training accuracy will also be aligned with the official in the future
+Based on mmdetection, this project aligns the accuracy of the [official model](https://github.com/google/automl).
 
-|                                      Method                                      |    Backbone     | Pretrained Model |  Training set  |   Test set   | Epoch | Val Box AP | Official AP |
-| :------------------------------------------------------------------------------: | :-------------: | :--------------: | :------------: | :----------: | :---: | :--------: | :---------: |
-| [efficientdet-d0](./configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py) | efficientnet-b0 |     ImageNet     | COCO2017 Train | COCO2017 Val |  300  |    34.4    |    34.3     |
+|                                      Method                                      |    Backbone     | Pretrained Model |  Training set  |   Test set   | Epoch | Val Box AP | Official AP | Download |
+| :------------------------------------------------------------------------------: | :-------------: | :--------------: | :------------: | :----------: | :---: | :--------: | :---------: | :------: |
+| [efficientdet-d3](./configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py) | efficientnet-b3 |     ImageNet     | COCO2017 Train | COCO2017 Val |  300  |    47.2    |    46.8     | model()  |
 
 ## Citation
 
@@ -99,9 +83,9 @@ A project does not necessarily have to be finished in a single PR, but it's esse
 
     <!-- As this template does. -->
 
-- [ ] Milestone 2: Indicates a successful model implementation.
+- [x] Milestone 2: Indicates a successful model implementation.
 
-  - [ ] Training-time correctness
+  - [x] Training-time correctness
 
     <!-- If you are reproducing the result from a paper, checking this item means that you should have trained your model from scratch based on the original paper's specification and verified that the final result matches the report within a minor error range. -->
 

diff --git a/...et_effb0_bifpn_16xb8-crop512-300e_coco.py → ...et_effb0_bifpn_8xb16-crop512-300e_coco.py b/...et_effb0_bifpn_16xb8-crop512-300e_coco.py → ...et_effb0_bifpn_8xb16-crop512-300e_coco.py
@@ -7,11 +7,11 @@
     imports=['projects.EfficientDet.efficientdet'], allow_failed_imports=False)
 
 image_size = 512
-dataset_type = 'Coco90Dataset'
-evalute_type = 'Coco90Metric'
 batch_augments = [
     dict(type='BatchFixedSizePad', size=(image_size, image_size))
 ]
+dataset_type = 'Coco90Dataset'
+evalute_type = 'Coco90Metric'
 norm_cfg = dict(type='SyncBN', requires_grad=True, eps=1e-3, momentum=0.01)
 checkpoint = 'https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b0_3rdparty_8xb32-aa-advprop_in1k_20220119-26434485.pth'  # noqa
 model = dict(
@@ -29,6 +29,7 @@
         drop_path_rate=0.2,
         out_indices=(3, 4, 5),
         frozen_stages=0,
+        conv_cfg=dict(type='Conv2dSamePadding'),
         norm_cfg=norm_cfg,
         norm_eval=False,
         init_cfg=dict(
@@ -41,35 +42,35 @@
         start_level=0,
         norm_cfg=norm_cfg),
     bbox_head=dict(
-        type='EfficientDetSepBNHead',
+        type='EfficientDetSepBNHead_Huber',
         num_classes=90,
         num_ins=5,
         in_channels=64,
         feat_channels=64,
         stacked_convs=3,
         norm_cfg=norm_cfg,
         anchor_generator=dict(
-            type='YXYXAnchorGenerator',
+            type='AnchorGenerator',
             octave_base_scale=4,
             scales_per_octave=3,
             ratios=[1.0, 0.5, 2.0],
             strides=[8, 16, 32, 64, 128],
             center_offset=0.5),
         bbox_coder=dict(
-            type='YXYXDeltaXYWHBBoxCoder',
+            type='DeltaXYWHBBoxCoder',
             target_means=[.0, .0, .0, .0],
             target_stds=[1.0, 1.0, 1.0, 1.0]),
         loss_cls=dict(
             type='FocalLoss',
             use_sigmoid=True,
-            gamma=2.0,
+            gamma=1.5,
             alpha=0.25,
             loss_weight=1.0),
-        loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)),
+        loss_bbox=dict(type='HuberLoss', beta=0.1, loss_weight=50)),
     # training and testing settings
     train_cfg=dict(
         assigner=dict(
-            type='TransMaxIoUAssigner',
+            type='MaxIoUAssigner',
             pos_iou_thr=0.5,
             neg_iou_thr=0.5,
             min_pos_iou=0,
@@ -125,23 +126,25 @@
 val_dataloader = dict(dataset=dict(type=dataset_type, pipeline=test_pipeline))
 test_dataloader = val_dataloader
 
-val_evaluator = dict(type=evalute_type)
+val_evaluator = dict(type='Coco90Metric')
 test_evaluator = val_evaluator
 
 optim_wrapper = dict(
-    optimizer=dict(lr=0.16),
-    paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True))
+    optimizer=dict(lr=0.16, weight_decay=4e-5),
+    paramwise_cfg=dict(
+        norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True),
+    clip_grad=dict(max_norm=10, norm_type=2))
 
 # learning policy
 max_epochs = 300
 param_scheduler = [
     dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=917),
     dict(
         type='CosineAnnealingLR',
-        eta_min=0.0016,
+        eta_min=0.0,
         begin=1,
-        T_max=284,
-        end=285,
+        T_max=299,
+        end=300,
         by_epoch=True,
         convert_to_iter_based=True)
 ]
@@ -155,6 +158,14 @@
     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
 
 default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=15))
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        priority=49)
+]
 # cudnn_benchmark=True can accelerate fix-size training
 env_cfg = dict(cudnn_benchmark=True)
 

diff --git a/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py b/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py
@@ -0,0 +1,175 @@
+_base_ = [
+    'mmdet::_base_/datasets/coco_detection.py',
+    'mmdet::_base_/schedules/schedule_1x.py',
+    'mmdet::_base_/default_runtime.py'
+]
+custom_imports = dict(
+    imports=['projects.EfficientDet.efficientdet'], allow_failed_imports=False)
+
+image_size = 896
+batch_augments = [
+    dict(type='BatchFixedSizePad', size=(image_size, image_size))
+]
+dataset_type = 'Coco90Dataset'
+evalute_type = 'Coco90Metric'
+norm_cfg = dict(type='SyncBN', requires_grad=True, eps=1e-3, momentum=0.01)
+checkpoint = 'https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa-advprop_in1k_20220119-53b41118.pth'  # noqa
+model = dict(
+    type='EfficientDet',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=image_size,
+        batch_augments=batch_augments),
+    backbone=dict(
+        type='EfficientNet',
+        arch='b3',
+        drop_path_rate=0.3,
+        out_indices=(3, 4, 5),
+        frozen_stages=0,
+        conv_cfg=dict(type='Conv2dSamePadding'),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        init_cfg=dict(
+            type='Pretrained', prefix='backbone', checkpoint=checkpoint)),
+    neck=dict(
+        type='BiFPN',
+        num_stages=6,
+        in_channels=[48, 136, 384],
+        out_channels=160,
+        start_level=0,
+        norm_cfg=norm_cfg),
+    bbox_head=dict(
+        type='EfficientDetSepBNHead_Huber',
+        num_classes=90,
+        num_ins=5,
+        in_channels=160,
+        feat_channels=160,
+        stacked_convs=4,
+        norm_cfg=norm_cfg,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            octave_base_scale=4,
+            scales_per_octave=3,
+            ratios=[1.0, 0.5, 2.0],
+            strides=[8, 16, 32, 64, 128],
+            center_offset=0.5),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=1.5,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='HuberLoss', beta=0.1, loss_weight=50)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.5,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='PseudoSampler'),  # Focal loss should use PseudoSampler
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(
+            type='soft_nms',
+            iou_threshold=0.3,
+            sigma=0.5,
+            min_score=1e-3,
+            method='gaussian'),
+        max_per_img=100))
+
+# dataset settings
+train_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        file_client_args={{_base_.file_client_args}}),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='RandomResize',
+        scale=(image_size, image_size),
+        ratio_range=(0.1, 2.0),
+        keep_ratio=True),
+    dict(type='RandomCrop', crop_size=(image_size, image_size)),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        file_client_args={{_base_.file_client_args}}),
+    dict(type='Resize', scale=(image_size, image_size), keep_ratio=True),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=16,
+    num_workers=16,
+    dataset=dict(type=dataset_type, pipeline=train_pipeline))
+val_dataloader = dict(dataset=dict(type=dataset_type, pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(type='Coco90Metric')
+test_evaluator = val_evaluator
+
+optim_wrapper = dict(
+    optimizer=dict(lr=0.16, weight_decay=4e-5),
+    paramwise_cfg=dict(
+        norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True),
+    clip_grad=dict(max_norm=10, norm_type=2))
+
+# learning policy
+max_epochs = 300
+param_scheduler = [
+    dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=917),
+    dict(
+        type='CosineAnnealingLR',
+        eta_min=0.0,
+        begin=1,
+        T_max=299,
+        end=300,
+        by_epoch=True,
+        convert_to_iter_based=True)
+]
+train_cfg = dict(max_epochs=max_epochs, val_interval=1)
+
+vis_backends = [
+    dict(type='LocalVisBackend'),
+    dict(type='TensorboardVisBackend')
+]
+visualizer = dict(
+    type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+
+default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=15))
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        priority=49)
+]
+# cudnn_benchmark=True can accelerate fix-size training
+env_cfg = dict(cudnn_benchmark=True)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)