Skip to content

Commit

Permalink
add mvfcos3d
Browse files Browse the repository at this point in the history
  • Loading branch information
sunjiahao1999 committed Dec 4, 2023
1 parent 651dbf1 commit bfb10b6
Show file tree
Hide file tree
Showing 9 changed files with 240 additions and 71 deletions.
2 changes: 1 addition & 1 deletion configs/_base_/datasets/waymoD3-fov-mono3d-3class.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
# base shape (1248, 832), scale (0.95, 1.05)
dict(
type='RandomResize3D',
scale=(1284, 832),
scale=(1248, 832),
ratio_range=(0.95, 1.05),
# ratio_range=(1., 1.),
interpolation='nearest',
Expand Down
20 changes: 11 additions & 9 deletions configs/_base_/datasets/waymoD3-mv-mono3d-3class.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo_mini/kitti_format/'
data_root = 'data/waymo/kitti_format/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(classes=class_names)
input_modality = dict(use_lidar=False, use_camera=True)
Expand Down Expand Up @@ -35,11 +35,13 @@
# base shape (1248, 832), scale (0.95, 1.05)
dict(
type='RandomResize3D',
scale=(1284, 832),
scale=(1248, 832),
# ratio_range=(1., 1.),
ratio_range=(0.95, 1.05),
interpolation='nearest',
keep_ratio=True,
),
# dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='Pack3DDetInputs',
keys=[
Expand Down Expand Up @@ -83,9 +85,9 @@

train_dataloader = dict(
batch_size=3,
num_workers=0,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=False),
num_workers=3,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
Expand Down Expand Up @@ -170,9 +172,9 @@

val_evaluator = dict(
type='WaymoMetric',
ann_file='./data/waymo_mini/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo_mini/waymo_format/cam_gt_mini.bin',
pklfile_prefix='./waymo_mv_pred_fix_resize_2',
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
pklfile_prefix='./pgd_mv',
metric='LET_mAP',
convert_kitti_format=False,
load_type='mv_image_based',
Expand Down
26 changes: 20 additions & 6 deletions configs/_base_/datasets/waymoD5-mv3d-3class.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# }))
backend_args = None

class_names = ['Car', 'Pedestrian', 'Cyclist']
class_names = ['Pedestrian', 'Cyclist', 'Car']
input_modality = dict(use_lidar=False, use_camera=True)
point_cloud_range = [-35.0, -75.0, -2, 75.0, 75.0, 4]

Expand All @@ -29,8 +29,9 @@
type='RandomResize3D',
scale=(1248, 832),
ratio_range=(0.95, 1.05),
# ratio_range=(1., 1.),
keep_ratio=True),
dict(type='RandomCrop3D', crop_size=(720, 1080)),
dict(type='RandomCrop3D', crop_size=(1080, 720)),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5, flip_box3d=False),
]

Expand All @@ -48,6 +49,9 @@
with_label_3d=True,
with_bbox_depth=True),
dict(type='MultiViewWrapper', transforms=train_transforms),
# randomness_keys= [
# 'scale', 'scale_factor', 'crop_size', 'img_crop_offset', 'flip',
# 'flip_direction']),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(
Expand All @@ -70,7 +74,11 @@
to_float32=True,
backend_args=backend_args),
dict(type='MultiViewWrapper', transforms=test_transforms),
dict(type='Pack3DDetInputs', keys=['img'])
dict(type='Pack3DDetInputs', keys=['img'], meta_keys=[
'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
'num_ref_frames', 'num_views'
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
Expand All @@ -80,7 +88,11 @@
to_float32=True,
backend_args=backend_args),
dict(type='MultiViewWrapper', transforms=test_transforms),
dict(type='Pack3DDetInputs', keys=['img'])
dict(type='Pack3DDetInputs', keys=['img'], meta_keys=[
'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
'num_ref_frames', 'num_views'
])
]
metainfo = dict(classes=class_names)

Expand All @@ -103,6 +115,7 @@
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
cam_sync_instances=True,
metainfo=metainfo,
box_type_3d='Lidar',
load_interval=5,
Expand Down Expand Up @@ -149,7 +162,7 @@
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=eval_pipeline,
pipeline=test_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
Expand All @@ -159,7 +172,8 @@
type='WaymoMetric',
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
data_root='./data/waymo/waymo_format',
pklfile_prefix='./mmdet3d_mvfoc3d_pred',
convert_kitti_format=False,
metric='LET_mAP',
backend_args=backend_args)

Expand Down
29 changes: 15 additions & 14 deletions configs/_base_/models/multiview_dfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,21 @@
type='AlignedAnchor3DRangeGenerator',
ranges=[[-35.0, -75.0, -2, 75.0, 75.0, 4]],
rotations=[.0]),
bbox_head=dict(
bbox_head_3d=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-35.0, -75.0, -0.0345, 75.0, 75.0, -0.0345],
[-35.0, -75.0, 0, 75.0, 75.0, 0],
[-35.0, -75.0, -0.1188, 75.0, 75.0, -0.1188]],
ranges=[[-35.0, -75.0, 0, 75.0, 75.0, 0],
[-35.0, -75.0, -0.1188, 75.0, 75.0, -0.1188],
[-35.0, -75.0, -0.0345, 75.0, 75.0, -0.0345]],
sizes=[
[4.73, 2.08, 1.77], # car
[0.91, 0.84, 1.74], # pedestrian
[1.81, 0.84, 1.77], # cyclist
[4.73, 2.08, 1.77], # car
],
rotations=[0, 1.57],
reshape_out=False),
Expand All @@ -69,13 +69,6 @@
loss_weight=0.2)),
train_cfg=dict(
assigner=[
dict( # for Car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
dict( # for Pedestrian
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
Expand All @@ -90,6 +83,14 @@
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),

],
allowed_border=0,
pos_weight=-1,
Expand All @@ -100,5 +101,5 @@
nms_thr=0.05,
score_thr=0.001,
min_bbox_size=0,
nms_pre=500,
max_num=100))
nms_pre=4096,
max_num=500))
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,6 @@
)

log_level = 'INFO'
load_from = None
load_from = 'work_dirs/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d/epoch_24.pth'
resume = False
find_unused_parameters = True # only 1 of 4 FPN outputs is used
30 changes: 30 additions & 0 deletions configs/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,33 @@
0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0
]),
test_cfg=dict(nms_pre=100, nms_thr=0.05, score_thr=0.001, max_per_img=20))

# optimizer
optim_wrapper = dict(
optimizer=dict(
type='SGD',
lr=0.008,
),
paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
clip_grad=dict(max_norm=35, norm_type=2))

param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0 / 3,
by_epoch=False,
begin=0,
end=500),
dict(
type='MultiStepLR',
begin=0,
end=24,
by_epoch=True,
milestones=[16, 22],
gamma=0.1)
]

train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=24)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
auto_scale_lr = dict(enable=False, base_batch_size=48)
1 change: 1 addition & 0 deletions mmdet3d/datasets/transforms/transforms_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -2071,6 +2071,7 @@ def _crop_data(self,
offset_w = np.random.randint(
self.rel_offset_w[0] * margin_w,
self.rel_offset_w[1] * margin_w + 1)
# offset_h, offset_w = 0, 0
else:
offset_w, offset_h = results['img_crop_offset']

Expand Down
6 changes: 5 additions & 1 deletion mmdet3d/models/detectors/dfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ class DfM(BaseDetector):
<https://arxiv.org/abs/2207.12988>`_.
Args:
data_preprocessor (:obj:`ConfigDict` or dict): The pre-process
config of :class:`BaseDataPreprocessor`. it usually includes,
``pad_size_divisor``, ``pad_value``, ``mean`` and ``std``.
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
backbone_stereo (:obj:`ConfigDict` or dict): The stereo backbone
Expand All @@ -39,6 +42,7 @@ class DfM(BaseDetector):
"""

def __init__(self,
data_preprocessor: ConfigType,
backbone: ConfigType,
neck: ConfigType,
backbone_stereo: ConfigType,
Expand All @@ -53,7 +57,7 @@ def __init__(self,
test_cfg=None,
pretrained=None,
init_cfg=None):
super().__init__(init_cfg=init_cfg)
super().__init__(data_preprocessor= data_preprocessor,init_cfg=init_cfg)
self.backbone = MODELS.build(backbone)
self.neck = MODELS.build(neck)
if backbone_stereo is not None:
Expand Down
Loading

0 comments on commit bfb10b6

Please sign in to comment.