add mvfcos3d

open-mmlab · Dec 4, 2023 · bfb10b6 · bfb10b6
1 parent 651dbf1
commit bfb10b6
Show file tree

Hide file tree

Showing 9 changed files with 240 additions and 71 deletions.
diff --git a/configs/_base_/datasets/waymoD3-fov-mono3d-3class.py b/configs/_base_/datasets/waymoD3-fov-mono3d-3class.py
@@ -35,7 +35,7 @@
     # base shape (1248, 832), scale (0.95, 1.05)
     dict(
         type='RandomResize3D',
-        scale=(1284, 832),
+        scale=(1248, 832),
         ratio_range=(0.95, 1.05),
         # ratio_range=(1., 1.),
         interpolation='nearest',

diff --git a/configs/_base_/datasets/waymoD3-mv-mono3d-3class.py b/configs/_base_/datasets/waymoD3-mv-mono3d-3class.py
@@ -2,7 +2,7 @@
 # D3 in the config name means the whole dataset is divided into 3 folds
 # We only use one fold for efficient experiments
 dataset_type = 'WaymoDataset'
-data_root = 'data/waymo_mini/kitti_format/'
+data_root = 'data/waymo/kitti_format/'
 class_names = ['Pedestrian', 'Cyclist', 'Car']
 metainfo = dict(classes=class_names)
 input_modality = dict(use_lidar=False, use_camera=True)
@@ -35,11 +35,13 @@
     # base shape (1248, 832), scale (0.95, 1.05)
     dict(
         type='RandomResize3D',
-        scale=(1284, 832),
+        scale=(1248, 832),
+        # ratio_range=(1., 1.),
         ratio_range=(0.95, 1.05),
+        interpolation='nearest',
         keep_ratio=True,
     ),
-    # dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
     dict(
         type='Pack3DDetInputs',
         keys=[
@@ -83,9 +85,9 @@
 
 train_dataloader = dict(
     batch_size=3,
-    num_workers=0,
-    persistent_workers=False,
-    sampler=dict(type='DefaultSampler', shuffle=False),
+    num_workers=3,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
@@ -170,9 +172,9 @@
 
 val_evaluator = dict(
     type='WaymoMetric',
-    ann_file='./data/waymo_mini/kitti_format/waymo_infos_val.pkl',
-    waymo_bin_file='./data/waymo_mini/waymo_format/cam_gt_mini.bin',
-    pklfile_prefix='./waymo_mv_pred_fix_resize_2',
+    ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
+    waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
+    pklfile_prefix='./pgd_mv',
     metric='LET_mAP',
     convert_kitti_format=False,
     load_type='mv_image_based',

diff --git a/configs/_base_/datasets/waymoD5-mv3d-3class.py b/configs/_base_/datasets/waymoD5-mv3d-3class.py
@@ -19,7 +19,7 @@
 #      }))
 backend_args = None
 
-class_names = ['Car', 'Pedestrian', 'Cyclist']
+class_names = ['Pedestrian', 'Cyclist', 'Car']
 input_modality = dict(use_lidar=False, use_camera=True)
 point_cloud_range = [-35.0, -75.0, -2, 75.0, 75.0, 4]
 
@@ -29,8 +29,9 @@
         type='RandomResize3D',
         scale=(1248, 832),
         ratio_range=(0.95, 1.05),
+        # ratio_range=(1., 1.),
         keep_ratio=True),
-    dict(type='RandomCrop3D', crop_size=(720, 1080)),
+    dict(type='RandomCrop3D', crop_size=(1080, 720)),
     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5, flip_box3d=False),
 ]
 
@@ -48,6 +49,9 @@
         with_label_3d=True,
         with_bbox_depth=True),
     dict(type='MultiViewWrapper', transforms=train_transforms),
+        #  randomness_keys= [
+        #     'scale', 'scale_factor', 'crop_size', 'img_crop_offset', 'flip',
+        #     'flip_direction']),
     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
     dict(type='ObjectNameFilter', classes=class_names),
     dict(
@@ -70,7 +74,11 @@
         to_float32=True,
         backend_args=backend_args),
     dict(type='MultiViewWrapper', transforms=test_transforms),
-    dict(type='Pack3DDetInputs', keys=['img'])
+    dict(type='Pack3DDetInputs', keys=['img'], meta_keys=[
+            'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
+            'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
+            'num_ref_frames', 'num_views'
+        ])
 ]
 # construct a pipeline for data and gt loading in show function
 # please keep its loading function consistent with test_pipeline (e.g. client)
@@ -80,7 +88,11 @@
         to_float32=True,
         backend_args=backend_args),
     dict(type='MultiViewWrapper', transforms=test_transforms),
-    dict(type='Pack3DDetInputs', keys=['img'])
+    dict(type='Pack3DDetInputs', keys=['img'], meta_keys=[
+            'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
+            'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
+            'num_ref_frames', 'num_views'
+        ])
 ]
 metainfo = dict(classes=class_names)
 
@@ -103,6 +115,7 @@
         pipeline=train_pipeline,
         modality=input_modality,
         test_mode=False,
+        cam_sync_instances=True,
         metainfo=metainfo,
         box_type_3d='Lidar',
         load_interval=5,
@@ -149,7 +162,7 @@
             CAM_FRONT_RIGHT='training/image_2',
             CAM_SIDE_LEFT='training/image_3',
             CAM_SIDE_RIGHT='training/image_4'),
-        pipeline=eval_pipeline,
+        pipeline=test_pipeline,
         modality=input_modality,
         test_mode=True,
         metainfo=metainfo,
@@ -159,7 +172,8 @@
     type='WaymoMetric',
     ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
     waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
-    data_root='./data/waymo/waymo_format',
+    pklfile_prefix='./mmdet3d_mvfoc3d_pred',
+    convert_kitti_format=False,
     metric='LET_mAP',
     backend_args=backend_args)
 

diff --git a/configs/_base_/models/multiview_dfm.py b/configs/_base_/models/multiview_dfm.py
@@ -35,21 +35,21 @@
         type='AlignedAnchor3DRangeGenerator',
         ranges=[[-35.0, -75.0, -2, 75.0, 75.0, 4]],
         rotations=[.0]),
-    bbox_head=dict(
+    bbox_head_3d=dict(
         type='Anchor3DHead',
         num_classes=3,
         in_channels=256,
         feat_channels=256,
         use_direction_classifier=True,
         anchor_generator=dict(
             type='AlignedAnchor3DRangeGenerator',
-            ranges=[[-35.0, -75.0, -0.0345, 75.0, 75.0, -0.0345],
-                    [-35.0, -75.0, 0, 75.0, 75.0, 0],
-                    [-35.0, -75.0, -0.1188, 75.0, 75.0, -0.1188]],
+            ranges=[[-35.0, -75.0, 0, 75.0, 75.0, 0],
+                    [-35.0, -75.0, -0.1188, 75.0, 75.0, -0.1188],
+                    [-35.0, -75.0, -0.0345, 75.0, 75.0, -0.0345]],
             sizes=[
-                [4.73, 2.08, 1.77],  # car
                 [0.91, 0.84, 1.74],  # pedestrian
                 [1.81, 0.84, 1.77],  # cyclist
+                [4.73, 2.08, 1.77],  # car
             ],
             rotations=[0, 1.57],
             reshape_out=False),
@@ -69,13 +69,6 @@
             loss_weight=0.2)),
     train_cfg=dict(
         assigner=[
-            dict(  # for Car
-                type='Max3DIoUAssigner',
-                iou_calculator=dict(type='BboxOverlapsNearest3D'),
-                pos_iou_thr=0.6,
-                neg_iou_thr=0.45,
-                min_pos_iou=0.45,
-                ignore_iof_thr=-1),
             dict(  # for Pedestrian
                 type='Max3DIoUAssigner',
                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
@@ -90,6 +83,14 @@
                 neg_iou_thr=0.35,
                 min_pos_iou=0.35,
                 ignore_iof_thr=-1),
+            dict(  # for Car
+                type='Max3DIoUAssigner',
+                iou_calculator=dict(type='BboxOverlapsNearest3D'),
+                pos_iou_thr=0.6,
+                neg_iou_thr=0.45,
+                min_pos_iou=0.45,
+                ignore_iof_thr=-1),
+
         ],
         allowed_border=0,
         pos_weight=-1,
@@ -100,5 +101,5 @@
         nms_thr=0.05,
         score_thr=0.001,
         min_bbox_size=0,
-        nms_pre=500,
-        max_num=100))
+        nms_pre=4096,
+        max_num=500))
diff --git a/configs/dfm/multiview-dfm_r101-dcn_16xb2_waymoD5-3d-3class.py b/configs/dfm/multiview-dfm_r101-dcn_16xb2_waymoD5-3d-3class.py
@@ -44,6 +44,6 @@
 )
 
 log_level = 'INFO'
-load_from = None
+load_from = 'work_dirs/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d/epoch_24.pth'
 resume = False
 find_unused_parameters = True  # only 1 of 4 FPN outputs is used
diff --git a/configs/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d.py b/configs/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d.py
@@ -79,3 +79,33 @@
         0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0
     ]),
     test_cfg=dict(nms_pre=100, nms_thr=0.05, score_thr=0.001, max_per_img=20))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(
+        type='SGD',
+        lr=0.008,
+    ),
+    paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
+    clip_grad=dict(max_norm=35, norm_type=2))
+
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1.0 / 3,
+        by_epoch=False,
+        begin=0,
+        end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=24,
+        by_epoch=True,
+        milestones=[16, 22],
+        gamma=0.1)
+]
+
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=24)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+auto_scale_lr = dict(enable=False, base_batch_size=48)
diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -2071,6 +2071,7 @@ def _crop_data(self,
                 offset_w = np.random.randint(
                     self.rel_offset_w[0] * margin_w,
                     self.rel_offset_w[1] * margin_w + 1)
+                # offset_h, offset_w = 0, 0
             else:
                 offset_w, offset_h = results['img_crop_offset']
 

diff --git a/mmdet3d/models/detectors/dfm.py b/mmdet3d/models/detectors/dfm.py
@@ -13,6 +13,9 @@ class DfM(BaseDetector):
         <https://arxiv.org/abs/2207.12988>`_.
 
     Args:
+        data_preprocessor (:obj:`ConfigDict` or dict): The pre-process
+            config of :class:`BaseDataPreprocessor`.  it usually includes,
+            ``pad_size_divisor``, ``pad_value``, ``mean`` and ``std``.
         backbone (:obj:`ConfigDict` or dict): The backbone config.
         neck (:obj:`ConfigDict` or dict): The neck config.
         backbone_stereo (:obj:`ConfigDict` or dict): The stereo backbone
@@ -39,6 +42,7 @@ class DfM(BaseDetector):
     """
 
     def __init__(self,
+                 data_preprocessor: ConfigType,
                  backbone: ConfigType,
                  neck: ConfigType,
                  backbone_stereo: ConfigType,
@@ -53,7 +57,7 @@ def __init__(self,
                  test_cfg=None,
                  pretrained=None,
                  init_cfg=None):
-        super().__init__(init_cfg=init_cfg)
+        super().__init__(data_preprocessor= data_preprocessor,init_cfg=init_cfg)
         self.backbone = MODELS.build(backbone)
         self.neck = MODELS.build(neck)
         if backbone_stereo is not None: