diff --git a/configs/fast_mask_rcnn_r50_fpn_1x.py b/configs/fast_mask_rcnn_r50_fpn_1x.py new file mode 100644 index 00000000000..4281c161813 --- /dev/null +++ b/configs/fast_mask_rcnn_r50_fpn_1x.py @@ -0,0 +1,132 @@ +# model settings +model = dict( + type='FastRCNN', + pretrained='modelzoo://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCRoIHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81)) +# model training and testing settings +train_cfg = dict( + rcnn=dict( + mask_size=28, + pos_iou_thr=0.5, + neg_iou_thr=0.5, + crowd_thr=1.1, + roi_batch_size=512, + add_gt_as_proposals=True, + pos_fraction=0.25, + pos_balance_sampling=False, + neg_pos_ub=512, + neg_balance_thr=0, + min_pos_iou=0.5, + pos_weight=-1, + debug=False)) +test_cfg = dict( + rcnn=dict( + score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + proposal_file=data_root + 'proposals/train2017_r50_fpn_rpn_1x.pkl', + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/fast_mask_rcnn_r50_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/fast_rcnn_r50_fpn_1x.py b/configs/fast_rcnn_r50_fpn_1x.py new file mode 100644 index 00000000000..47ec415cf0a --- /dev/null +++ b/configs/fast_rcnn_r50_fpn_1x.py @@ -0,0 +1,118 @@ +# model settings +model = dict( + type='FastRCNN', + pretrained='modelzoo://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCRoIHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False)) +# model training and testing settings +train_cfg = dict( + rcnn=dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + crowd_thr=1.1, + roi_batch_size=512, + add_gt_as_proposals=True, + pos_fraction=0.25, + pos_balance_sampling=False, + neg_pos_ub=512, + neg_balance_thr=0, + min_pos_iou=0.5, + pos_weight=-1, + debug=False)) +test_cfg = dict(rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + proposal_file=data_root + 'proposals/train2017_r50_fpn_rpn_1x.pkl', + flip_ratio=0.5, + with_mask=False, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/fast_rcnn_r50_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/faster_rcnn_r50_fpn_1x.py b/configs/faster_rcnn_r50_fpn_1x.py index b15405e0997..1c06c4cca7e 100644 --- a/configs/faster_rcnn_r50_fpn_1x.py +++ b/configs/faster_rcnn_r50_fpn_1x.py @@ -65,7 +65,7 @@ pos_balance_sampling=False, neg_pos_ub=512, neg_balance_thr=0, - min_pos_iou=1.1, + min_pos_iou=0.5, pos_weight=-1, debug=False)) test_cfg = dict( @@ -139,7 +139,6 @@ # yapf:enable # runtime settings total_epochs = 12 -device_ids = range(8) dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' diff --git a/configs/mask_rcnn_r50_fpn_1x.py b/configs/mask_rcnn_r50_fpn_1x.py index e2d47217cc4..8868cf6ebd9 100644 --- a/configs/mask_rcnn_r50_fpn_1x.py +++ b/configs/mask_rcnn_r50_fpn_1x.py @@ -77,7 +77,7 @@ pos_balance_sampling=False, neg_pos_ub=512, neg_balance_thr=0, - min_pos_iou=1.1, + min_pos_iou=0.5, pos_weight=-1, debug=False)) test_cfg = dict( @@ -152,7 +152,6 @@ # yapf:enable # runtime settings total_epochs = 12 -device_ids = range(8) dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/mask_rcnn_r50_fpn_1x'