open-mmlab · ZwwWayne · Nov 1, 2021 · Jul 22, 2021 · Jul 29, 2021 · Aug 5, 2021
diff --git a/.dev_scripts/gather_models.py b/.dev_scripts/gather_models.py
@@ -3,6 +3,16 @@
 
 Usage:
 python gather_models.py ${root_path} ${out_dir}
+
+Example:
+python gather_models.py \
+work_dirs/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d \
+work_dirs/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d
+
+Note that before running the above command, rename the directory with the
+config name if you did not use the default directory name, create
+a corresponding directory 'pgd' under the above path and put the used config
+into it.
 """
 
 import argparse
@@ -36,16 +46,18 @@
 RESULTS_LUT = {
     'coco': ['bbox_mAP', 'segm_mAP'],
     'nus': ['pts_bbox_NuScenes/NDS', 'NDS'],
-    'kitti-3d-3class': [
-        'KITTI/Overall_3D_moderate',
-        'Overall_3D_moderate',
-    ],
+    'kitti-3d-3class': ['KITTI/Overall_3D_moderate', 'Overall_3D_moderate'],
     'kitti-3d-car': ['KITTI/Car_3D_moderate_strict', 'Car_3D_moderate_strict'],
     'lyft': ['score'],
     'scannet_seg': ['miou'],
     's3dis_seg': ['miou'],
     'scannet': ['mAP_0.50'],
-    'sunrgbd': ['mAP_0.50']
+    'sunrgbd': ['mAP_0.50'],
+    'kitti-mono3d': [
+        'img_bbox/KITTI/Car_3D_AP40_moderate_strict',
+        'Car_3D_AP40_moderate_strict'
+    ],
+    'nus-mono3d': ['img_bbox_NuScenes/NDS', 'NDS']
 }
 
 
@@ -145,15 +157,13 @@ def main():
     # and parse the best performance
     model_infos = []
     for used_config in used_configs:
-        exp_dir = osp.join(models_root, used_config)
-
         # get logs
-        log_json_path = glob.glob(osp.join(exp_dir, '*.log.json'))[0]
-        log_txt_path = glob.glob(osp.join(exp_dir, '*.log'))[0]
+        log_json_path = glob.glob(osp.join(models_root, '*.log.json'))[0]
+        log_txt_path = glob.glob(osp.join(models_root, '*.log'))[0]
         model_performance = get_best_results(log_json_path)
         final_epoch = model_performance['epoch']
         final_model = 'epoch_{}.pth'.format(final_epoch)
-        model_path = osp.join(exp_dir, final_model)
+        model_path = osp.join(models_root, final_model)
 
         # skip if the model is still training
         if not osp.exists(model_path):
@@ -182,7 +192,7 @@ def main():
         model_name = model['config'].split('/')[-1].rstrip(
             '.py') + '_' + model['model_time']
         publish_model_path = osp.join(model_publish_dir, model_name)
-        trained_model_path = osp.join(models_root, model['config'],
+        trained_model_path = osp.join(models_root,
                                       'epoch_{}.pth'.format(model['epochs']))
 
         # convert model
@@ -191,11 +201,10 @@ def main():
 
         # copy log
         shutil.copy(
-            osp.join(models_root, model['config'], model['log_json_path']),
+            osp.join(models_root, model['log_json_path']),
             osp.join(model_publish_dir, f'{model_name}.log.json'))
         shutil.copy(
-            osp.join(models_root, model['config'],
-                     model['log_json_path'].rstrip('.json')),
+            osp.join(models_root, model['log_json_path'].rstrip('.json')),
             osp.join(model_publish_dir, f'{model_name}.log'))
 
         # copy config to guarantee reproducibility

diff --git a/configs/_base_/models/fcos3d.py b/configs/_base_/models/fcos3d.py
@@ -1,6 +1,5 @@
 model = dict(
     type='FCOSMono3D',
-    pretrained='open-mmlab://detectron2/resnet101_caffe',
     backbone=dict(
         type='ResNet',
         depth=101,
@@ -9,7 +8,10 @@
         frozen_stages=1,
         norm_cfg=dict(type='BN', requires_grad=False),
         norm_eval=True,
-        style='caffe'),
+        style='caffe',
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='open-mmlab://detectron2/resnet101_caffe')),
     neck=dict(
         type='FPN',
         in_channels=[256, 512, 1024, 2048],

diff --git a/configs/_base_/models/pgd.py b/configs/_base_/models/pgd.py
@@ -0,0 +1,55 @@
+_base_ = './fcos3d.py'
+# model settings
+model = dict(
+    bbox_head=dict(
+        _delete_=True,
+        type='PGDHead',
+        num_classes=10,
+        in_channels=256,
+        stacked_convs=2,
+        feat_channels=256,
+        use_direction_classifier=True,
+        diff_rad_by_sin=True,
+        pred_attrs=True,
+        pred_velo=True,
+        pred_bbox2d=True,
+        pred_keypoints=False,
+        dir_offset=0.7854,  # pi/4
+        strides=[8, 16, 32, 64, 128],
+        group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo
+        cls_branch=(256, ),
+        reg_branch=(
+            (256, ),  # offset
+            (256, ),  # depth
+            (256, ),  # size
+            (256, ),  # rot
+            ()  # velo
+        ),
+        dir_branch=(256, ),
+        attr_branch=(256, ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+        loss_attr=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        norm_on_bbox=True,
+        centerness_on_reg=True,
+        center_sampling=True,
+        conv_bias=True,
+        dcn_on_last_conv=True,
+        use_depth_classifier=True,
+        depth_branch=(256, ),
+        depth_range=(0, 50),
+        depth_unit=10,
+        division='uniform',
+        depth_bins=6,
+        bbox_coder=dict(type='PGDBBoxCoder', code_size=9)),
+    test_cfg=dict(nms_pre=1000, nms_thr=0.8, score_thr=0.01, max_per_img=200))
diff --git a/configs/pgd/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d.py b/configs/pgd/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d.py
@@ -0,0 +1,127 @@
+_base_ = [
+    '../_base_/datasets/kitti-mono3d.py', '../_base_/models/pgd.py',
+    '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
+]
+# model settings
+model = dict(
+    backbone=dict(frozen_stages=0),
+    neck=dict(start_level=0, num_outs=4),
+    bbox_head=dict(
+        num_classes=3,
+        bbox_code_size=7,
+        pred_attrs=False,
+        pred_velo=False,
+        pred_bbox2d=True,
+        use_onlyreg_proj=True,
+        strides=(4, 8, 16, 32),
+        regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 1e8)),
+        group_reg_dims=(2, 1, 3, 1, 16,
+                        4),  # offset, depth, size, rot, kpts, bbox2d
+        reg_branch=(
+            (256, ),  # offset
+            (256, ),  # depth
+            (256, ),  # size
+            (256, ),  # rot
+            (256, ),  # kpts
+            (256, )  # bbox2d
+        ),
+        centerness_branch=(256, ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        use_depth_classifier=True,
+        depth_branch=(256, ),
+        depth_range=(0, 70),
+        depth_unit=10,
+        division='uniform',
+        depth_bins=8,
+        pred_keypoints=True,
+        weight_dim=1,
+        loss_depth=dict(
+            type='UncertainSmoothL1Loss', alpha=1.0, beta=3.0,
+            loss_weight=1.0),
+        bbox_coder=dict(
+            type='PGDBBoxCoder',
+            base_depths=((28.01, 16.32), ),
+            base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6), (3.9, 1.56, 1.6)),
+            code_size=7)),
+    # set weight 1.0 for base 7 dims (offset, depth, size, rot)
+    # 0.2 for 16-dim keypoint offsets and 1.0 for 4-dim 2D distance targets
+    train_cfg=dict(code_weight=[
+        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+        0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0
+    ]),
+    test_cfg=dict(nms_pre=100, nms_thr=0.05, score_thr=0.001, max_per_img=20))
+
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+img_norm_cfg = dict(
+    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+train_pipeline = [
+    dict(type='LoadImageFromFileMono3D'),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    dict(type='Resize', img_scale=(1242, 375), keep_ratio=True),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D',
+        keys=[
+            'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d',
+            'centers2d', 'depths'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFileMono3D'),
+    dict(
+        type='MultiScaleFlipAug',
+        scale_factor=1.0,
+        flip=False,
+        transforms=[
+            dict(type='RandomFlip3D'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=3,
+    workers_per_gpu=3,
+    train=dict(pipeline=train_pipeline),
+    val=dict(pipeline=test_pipeline),
+    test=dict(pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+    lr=0.001, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))
+optimizer_config = dict(
+    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[32, 44])
+total_epochs = 48
+runner = dict(type='EpochBasedRunner', max_epochs=48)
+evaluation = dict(interval=2)
+checkpoint_config = dict(interval=8)
diff --git a/mmdet3d/core/bbox/coders/pgd_bbox_coder.py b/mmdet3d/core/bbox/coders/pgd_bbox_coder.py
@@ -1,4 +1,5 @@
 import numpy as np
+import torch
 from torch.nn import functional as F
 
 from mmdet.core.bbox.builder import BBOX_CODERS
@@ -45,8 +46,9 @@ def decode_2d(self,
             scale_kpts = scale[3]
             # 2 dimension of offsets x 8 corners of a 3D bbox
             bbox[:, self.bbox_code_size:self.bbox_code_size + 16] = \
-                scale_kpts(clone_bbox[
-                    :, self.bbox_code_size:self.bbox_code_size + 16]).float()
+                torch.tanh(scale_kpts(clone_bbox[
+                    :, self.bbox_code_size:self.bbox_code_size + 16]).float())
+
         if pred_bbox2d:
             scale_bbox2d = scale[-1]
             # The last four dimensions are offsets to four sides of a 2D bbox

diff --git a/mmdet3d/models/dense_heads/__init__.py b/mmdet3d/models/dense_heads/__init__.py
@@ -8,6 +8,7 @@
 from .free_anchor3d_head import FreeAnchor3DHead
 from .groupfree3d_head import GroupFree3DHead
 from .parta2_rpn_head import PartA2RPNHead
+from .pgd_head import PGDHead
 from .shape_aware_head import ShapeAwareHead
 from .smoke_mono3d_head import SMOKEMono3DHead
 from .ssd_3d_head import SSD3DHead
@@ -17,5 +18,5 @@
     'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead',
     'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
     'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
-    'GroupFree3DHead', 'SMOKEMono3DHead'
+    'GroupFree3DHead', 'SMOKEMono3DHead', 'PGDHead'
 ]
diff --git a/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py b/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
@@ -176,13 +176,6 @@ def __init__(
             self.attr_branch = attr_branch
 
         self._init_layers()
-        if init_cfg is None:
-            self.init_cfg = dict(
-                type='Normal',
-                layer='Conv2d',
-                std=0.01,
-                override=dict(
-                    type='Normal', name='conv_cls', std=0.01, bias_prob=0.01))
 
     def _init_layers(self):
         """Initialize layers of the head."""
@@ -288,8 +281,34 @@ def _init_predictor(self):
             self.conv_attr = nn.Conv2d(self.attr_branch[-1], self.num_attrs, 1)
 
     def init_weights(self):
-        super().init_weights()
+        """Initialize weights of the head.
+
+        We currently still use the customized defined init_weights because the
+        default init of DCN triggered by the init_cfg will init
+        conv_offset.weight, which mistakenly affects the training stability.
+        """
+        for modules in [self.cls_convs, self.reg_convs, self.conv_cls_prev]:
+            for m in modules:
+                if isinstance(m.conv, nn.Conv2d):
+                    normal_init(m.conv, std=0.01)
+        for conv_reg_prev in self.conv_reg_prevs:
+            if conv_reg_prev is None:
+                continue
+            for m in conv_reg_prev:
+                if isinstance(m.conv, nn.Conv2d):
+                    normal_init(m.conv, std=0.01)
+        if self.use_direction_classifier:
+            for m in self.conv_dir_cls_prev:
+                if isinstance(m.conv, nn.Conv2d):
+                    normal_init(m.conv, std=0.01)
+        if self.pred_attrs:
+            for m in self.conv_attr_prev:
+                if isinstance(m.conv, nn.Conv2d):
+                    normal_init(m.conv, std=0.01)
         bias_cls = bias_init_with_prob(0.01)
+        normal_init(self.conv_cls, std=0.01, bias=bias_cls)
+        for conv_reg in self.conv_regs:
+            normal_init(conv_reg, std=0.01)
         if self.use_direction_classifier:
             normal_init(self.conv_dir_cls, std=0.01, bias=bias_cls)
         if self.pred_attrs: