diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 178508f565..69503dd96a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -91,8 +91,8 @@ jobs:
       - name: Install mmdet3d dependencies
         run: |
           pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/${{matrix.torch_version}}/index.html
-          pip install mmdet==2.11.0
-          pip install mmsegmentation==0.14.0
+          pip install mmdet==2.14.0
+          pip install mmsegmentation==0.14.1
           pip install -r requirements.txt
       - name: Build and install
         run: |
diff --git a/configs/_base_/models/fcos3d.py b/configs/_base_/models/fcos3d.py
index b0928e455e..84596c05e1 100644
--- a/configs/_base_/models/fcos3d.py
+++ b/configs/_base_/models/fcos3d.py
@@ -16,7 +16,6 @@
         out_channels=256,
         start_level=1,
         add_extra_convs=True,
-        extra_convs_on_inputs=False,  # use P5
         num_outs=5,
         relu_before_extra_convs=True),
     bbox_head=dict(
diff --git a/configs/_base_/models/imvotenet_image.py b/configs/_base_/models/imvotenet_image.py
index 8ddfa8c877..c5067f2c5b 100644
--- a/configs/_base_/models/imvotenet_image.py
+++ b/configs/_base_/models/imvotenet_image.py
@@ -99,8 +99,8 @@
             nms_across_levels=False,
             nms_pre=1000,
             nms_post=1000,
-            max_num=1000,
-            nms_thr=0.7,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
             min_bbox_size=0),
         img_rcnn=dict(
             score_thr=0.05,
diff --git a/configs/imvoxelnet/imvoxelnet_kitti-3d-car.py b/configs/imvoxelnet/imvoxelnet_kitti-3d-car.py
index e47fb9c752..47932d7f6d 100644
--- a/configs/imvoxelnet/imvoxelnet_kitti-3d-car.py
+++ b/configs/imvoxelnet/imvoxelnet_kitti-3d-car.py
@@ -1,6 +1,5 @@
 model = dict(
     type='ImVoxelNet',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
@@ -9,6 +8,7 @@
         frozen_stages=1,
         norm_cfg=dict(type='BN', requires_grad=False),
         norm_eval=True,
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/docs/compatibility.md b/docs/compatibility.md
index 471a770c49..26e0c74d87 100644
--- a/docs/compatibility.md
+++ b/docs/compatibility.md
@@ -4,6 +4,15 @@ This document provides detailed descriptions of the BC-breaking changes in MMDet
 
 ## MMDetection3D 0.15.0
 
+### Unified parameter initialization
+
+To unify the parameter initialization in OpenMMLab projects, MMCV supports `BaseModule` that accepts `init_cfg` to allow the modules' parameters initialized in a flexible and unified manner. Now the users need to explicitly call `model.init_weights()` in the training script to initialize the model (as in [here](https://github.com/open-mmlab/mmdetection3d/blob/master/tools/train.py#L183), previously this was handled by the detector. Please refer to PR #622 for details.
+
+### BackgroundPointsFilter
+
+We modified the dataset aumentation function `BackgroundPointsFilter`(in [here](https://github.com/open-mmlab/mmdetection3d/blob/mmdet3d/datasets/pipelines/transforms_3d.py#L1101)). In previous version of MMdetection3D, `BackgroundPointsFilter` changes the gt_bboxes_3d's bottom center to the gravity center. In MMDetection3D 0.15.0,
+`BackgroundPointsFilter` will not change it. Please refer to PR #609 for details.
+
 ### Enhance `IndoorPatchPointSample` transform
 
 We enhance the pipeline function `IndoorPatchPointSample` used in point cloud segmentation task by adding more choices for patch selection. Also, we plan to remove the unused parameter `sample_rate` in the future. Please modify the code as well as the config files accordingly if you use this transform.
diff --git a/docs/getting_started.md b/docs/getting_started.md
index ae4e9aa59d..0afe9c3df2 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -12,8 +12,8 @@ The required versions of MMCV, MMDetection and MMSegmentation for different vers
 
 | MMDetection3D version | MMDetection version | MMSegmentation version |    MMCV version     |
 |:-------------------:|:-------------------:|:-------------------:|:-------------------:|
-| master              | mmdet>=2.10.0, <=2.11.0| mmseg==0.14.0 | mmcv-full>=1.3.1, <=1.4|
-| 0.14.0              | mmdet>=2.10.0, <=2.11.0| mmseg==0.14.0 | mmcv-full>=1.3.1, <=1.4|
+| master              | mmdet>=2.12.0          | mmseg>=0.14.1 | mmcv-full>=1.3.2, <=1.4|
+| 0.14.0              | mmdet>=2.10.0, <=2.11.0| mmseg>=0.13.0 | mmcv-full>=1.3.1, <=1.4|
 | 0.13.0              | mmdet>=2.10.0, <=2.11.0| Not required  | mmcv-full>=1.2.4, <=1.4|
 | 0.12.0              | mmdet>=2.5.0, <=2.11.0 | Not required  | mmcv-full>=1.2.4, <=1.4|
 | 0.11.0              | mmdet>=2.5.0, <=2.11.0 | Not required  | mmcv-full>=1.2.4, <=1.4|
diff --git a/docs/tutorials/customize_models.md b/docs/tutorials/customize_models.md
index 8028e3961d..a87b286b2f 100644
--- a/docs/tutorials/customize_models.md
+++ b/docs/tutorials/customize_models.md
@@ -33,9 +33,6 @@ class HardVFE(nn.Module):
 
     def forward(self, x):  # should return a tuple
         pass
-
-    def init_weights(self, pretrained=None):
-        pass
 ```
 
 #### 2. Import the module
@@ -83,16 +80,13 @@ from ..builder import BACKBONES
 
 
 @BACKBONES.register_module()
-class SECOND(nn.Module):
+class SECOND(BaseModule):
 
     def __init__(self, arg1, arg2):
         pass
 
     def forward(self, x):  # should return a tuple
         pass
-
-    def init_weights(self, pretrained=None):
-        pass
 ```
 
 #### 2. Import the module
@@ -135,7 +129,7 @@ Create a new file `mmdet3d/models/necks/second_fpn.py`.
 from ..builder import NECKS
 
 @NECKS.register
-class SECONDFPN(nn.Module):
+class SECONDFPN(BaseModule):
 
     def __init__(self,
                  in_channels=[128, 128, 256],
@@ -144,7 +138,8 @@ class SECONDFPN(nn.Module):
                  norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
                  upsample_cfg=dict(type='deconv', bias=False),
                  conv_cfg=dict(type='Conv2d', bias=False),
-                 use_conv_for_no_stride=False):
+                 use_conv_for_no_stride=False,
+                 init_cfg=None):
         pass
 
     def forward(self, X):
@@ -198,7 +193,7 @@ from mmdet.models.builder import HEADS
 from .bbox_head import BBoxHead
 
 @HEADS.register_module()
-class PartA2BboxHead(nn.Module):
+class PartA2BboxHead(BaseModule):
     """PartA2 RoI head."""
 
     def __init__(self,
@@ -224,11 +219,9 @@ class PartA2BboxHead(nn.Module):
                      type='CrossEntropyLoss',
                      use_sigmoid=True,
                      reduction='none',
-                     loss_weight=1.0)):
-        super(PartA2BboxHead, self).__init__()
-
-    def init_weights(self):
-        # conv layers are already initialized by ConvModule
+                     loss_weight=1.0),
+                 init_cfg=None):
+        super(PartA2BboxHead, self).__init__(init_cfg=init_cfg)
 
     def forward(self, seg_feats, part_feats):
 
@@ -242,7 +235,7 @@ from torch import nn as nn
 
 
 @HEADS.register_module()
-class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
+class Base3DRoIHead(BaseModule, metaclass=ABCMeta):
     """Base class for 3d RoIHeads."""
 
     def __init__(self,
@@ -250,7 +243,8 @@ class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
                  mask_roi_extractor=None,
                  mask_head=None,
                  train_cfg=None,
-                 test_cfg=None):
+                 test_cfg=None,
+                 init_cfg=None):
 
     @property
     def with_bbox(self):
@@ -333,9 +327,13 @@ class PartAggregationROIHead(Base3DRoIHead):
                  part_roi_extractor=None,
                  bbox_head=None,
                  train_cfg=None,
-                 test_cfg=None):
+                 test_cfg=None,
+                 init_cfg=None):
         super(PartAggregationROIHead, self).__init__(
-            bbox_head=bbox_head, train_cfg=train_cfg, test_cfg=test_cfg)
+            bbox_head=bbox_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            init_cfg=init_cfg)
         self.num_classes = num_classes
         assert semantic_head is not None
         self.semantic_head = build_head(semantic_head)
diff --git a/mmdet3d/__init__.py b/mmdet3d/__init__.py
index 09bff83f2a..839cded3ba 100644
--- a/mmdet3d/__init__.py
+++ b/mmdet3d/__init__.py
@@ -17,7 +17,7 @@ def digit_version(version_str):
     return digit_version
 
 
-mmcv_minimum_version = '1.3.1'
+mmcv_minimum_version = '1.3.8'
 mmcv_maximum_version = '1.4.0'
 mmcv_version = digit_version(mmcv.__version__)
 
@@ -27,8 +27,8 @@ def digit_version(version_str):
     f'MMCV=={mmcv.__version__} is used but incompatible. ' \
     f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
 
-mmdet_minimum_version = '2.10.0'
-mmdet_maximum_version = '2.11.0'
+mmdet_minimum_version = '2.14.0'
+mmdet_maximum_version = '3.0.0'
 mmdet_version = digit_version(mmdet.__version__)
 assert (mmdet_version >= digit_version(mmdet_minimum_version)
         and mmdet_version <= digit_version(mmdet_maximum_version)), \
@@ -36,8 +36,8 @@ def digit_version(version_str):
     f'Please install mmdet>={mmdet_minimum_version}, ' \
     f'<={mmdet_maximum_version}.'
 
-mmseg_minimum_version = '0.14.0'
-mmseg_maximum_version = '0.14.0'
+mmseg_minimum_version = '0.14.1'
+mmseg_maximum_version = '1.0.0'
 mmseg_version = digit_version(mmseg.__version__)
 assert (mmseg_version >= digit_version(mmseg_minimum_version)
         and mmseg_version <= digit_version(mmseg_maximum_version)), \
diff --git a/mmdet3d/models/backbones/base_pointnet.py b/mmdet3d/models/backbones/base_pointnet.py
index 5330c91543..8ba5deecf0 100644
--- a/mmdet3d/models/backbones/base_pointnet.py
+++ b/mmdet3d/models/backbones/base_pointnet.py
@@ -1,23 +1,20 @@
+import warnings
 from abc import ABCMeta
-from mmcv.runner import load_checkpoint
-from torch import nn as nn
+from mmcv.runner import BaseModule
 
 
-class BasePointNet(nn.Module, metaclass=ABCMeta):
+class BasePointNet(BaseModule, metaclass=ABCMeta):
     """Base class for PointNet."""
 
-    def __init__(self):
-        super(BasePointNet, self).__init__()
+    def __init__(self, init_cfg=None, pretrained=None):
+        super(BasePointNet, self).__init__(init_cfg)
         self.fp16_enabled = False
-
-    def init_weights(self, pretrained=None):
-        """Initialize the weights of PointNet backbone."""
-        # Do not initialize the conv layers
-        # to follow the original implementation
+        assert not (init_cfg and pretrained), \
+            'init_cfg and pretrained cannot be setting at the same time'
         if isinstance(pretrained, str):
-            from mmdet3d.utils import get_root_logger
-            logger = get_root_logger()
-            load_checkpoint(self, pretrained, strict=False, logger=logger)
+            warnings.warn('DeprecationWarning: pretrained is a deprecated, '
+                          'please use "init_cfg" instead')
+            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
 
     @staticmethod
     def _split_point_feats(points):
diff --git a/mmdet3d/models/backbones/multi_backbone.py b/mmdet3d/models/backbones/multi_backbone.py
index 77180fbfd7..33374e85e3 100644
--- a/mmdet3d/models/backbones/multi_backbone.py
+++ b/mmdet3d/models/backbones/multi_backbone.py
@@ -1,14 +1,15 @@
 import copy
 import torch
+import warnings
 from mmcv.cnn import ConvModule
-from mmcv.runner import auto_fp16, load_checkpoint
+from mmcv.runner import BaseModule, auto_fp16
 from torch import nn as nn
 
 from mmdet.models import BACKBONES, build_backbone
 
 
 @BACKBONES.register_module()
-class MultiBackbone(nn.Module):
+class MultiBackbone(BaseModule):
     """MultiBackbone with different configs.
 
     Args:
@@ -31,8 +32,10 @@ def __init__(self,
                  norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),
                  act_cfg=dict(type='ReLU'),
                  suffixes=('net0', 'net1'),
+                 init_cfg=None,
+                 pretrained=None,
                  **kwargs):
-        super().__init__()
+        super().__init__(init_cfg=init_cfg)
         assert isinstance(backbones, dict) or isinstance(backbones, list)
         if isinstance(backbones, dict):
             backbones_list = []
@@ -77,14 +80,12 @@ def __init__(self,
                     bias=True,
                     inplace=True))
 
-    def init_weights(self, pretrained=None):
-        """Initialize the weights of PointNet++ backbone."""
-        # Do not initialize the conv layers
-        # to follow the original implementation
+        assert not (init_cfg and pretrained), \
+            'init_cfg and pretrained cannot be setting at the same time'
         if isinstance(pretrained, str):
-            from mmdet3d.utils import get_root_logger
-            logger = get_root_logger()
-            load_checkpoint(self, pretrained, strict=False, logger=logger)
+            warnings.warn('DeprecationWarning: pretrained is a deprecated, '
+                          'please use "init_cfg" instead')
+            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
 
     @auto_fp16()
     def forward(self, points):
diff --git a/mmdet3d/models/backbones/nostem_regnet.py b/mmdet3d/models/backbones/nostem_regnet.py
index 72a2216a48..75b4a5176d 100644
--- a/mmdet3d/models/backbones/nostem_regnet.py
+++ b/mmdet3d/models/backbones/nostem_regnet.py
@@ -57,8 +57,8 @@ class NoStemRegNet(RegNet):
         (1, 1008, 1, 1)
     """
 
-    def __init__(self, arch, **kwargs):
-        super(NoStemRegNet, self).__init__(arch, **kwargs)
+    def __init__(self, arch, init_cfg=None, **kwargs):
+        super(NoStemRegNet, self).__init__(arch, init_cfg=init_cfg, **kwargs)
 
     def _make_stem_layer(self, in_channels, base_channels):
         """Override the original function that do not initialize a stem layer
diff --git a/mmdet3d/models/backbones/pointnet2_sa_msg.py b/mmdet3d/models/backbones/pointnet2_sa_msg.py
index 6048da8501..8e1f083ab8 100644
--- a/mmdet3d/models/backbones/pointnet2_sa_msg.py
+++ b/mmdet3d/models/backbones/pointnet2_sa_msg.py
@@ -56,8 +56,9 @@ def __init__(self,
                      type='PointSAModuleMSG',
                      pool_mod='max',
                      use_xyz=True,
-                     normalize_xyz=False)):
-        super().__init__()
+                     normalize_xyz=False),
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
         self.num_sa = len(sa_channels)
         self.out_indices = out_indices
         assert max(out_indices) < self.num_sa
diff --git a/mmdet3d/models/backbones/pointnet2_sa_ssg.py b/mmdet3d/models/backbones/pointnet2_sa_ssg.py
index 93cae65f43..fea5e3d403 100644
--- a/mmdet3d/models/backbones/pointnet2_sa_ssg.py
+++ b/mmdet3d/models/backbones/pointnet2_sa_ssg.py
@@ -43,8 +43,9 @@ def __init__(self,
                      type='PointSAModule',
                      pool_mod='max',
                      use_xyz=True,
-                     normalize_xyz=True)):
-        super().__init__()
+                     normalize_xyz=True),
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
         self.num_sa = len(sa_channels)
         self.num_fp = len(fp_channels)
 
diff --git a/mmdet3d/models/backbones/second.py b/mmdet3d/models/backbones/second.py
index 7dd7aca922..01e853248a 100644
--- a/mmdet3d/models/backbones/second.py
+++ b/mmdet3d/models/backbones/second.py
@@ -1,12 +1,13 @@
+import warnings
 from mmcv.cnn import build_conv_layer, build_norm_layer
-from mmcv.runner import load_checkpoint
+from mmcv.runner import BaseModule
 from torch import nn as nn
 
 from mmdet.models import BACKBONES
 
 
 @BACKBONES.register_module()
-class SECOND(nn.Module):
+class SECOND(BaseModule):
     """Backbone network for SECOND/PointPillars/PartA2/MVXNet.
 
     Args:
@@ -24,8 +25,10 @@ def __init__(self,
                  layer_nums=[3, 5, 5],
                  layer_strides=[2, 2, 2],
                  norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
-                 conv_cfg=dict(type='Conv2d', bias=False)):
-        super(SECOND, self).__init__()
+                 conv_cfg=dict(type='Conv2d', bias=False),
+                 init_cfg=None,
+                 pretrained=None):
+        super(SECOND, self).__init__(init_cfg=init_cfg)
         assert len(layer_strides) == len(layer_nums)
         assert len(out_channels) == len(layer_nums)
 
@@ -61,14 +64,14 @@ def __init__(self,
 
         self.blocks = nn.ModuleList(blocks)
 
-    def init_weights(self, pretrained=None):
-        """Initialize weights of the 2D backbone."""
-        # Do not initialize the conv layers
-        # to follow the original implementation
+        assert not (init_cfg and pretrained), \
+            'init_cfg and pretrained cannot be setting at the same time'
         if isinstance(pretrained, str):
-            from mmdet3d.utils import get_root_logger
-            logger = get_root_logger()
-            load_checkpoint(self, pretrained, strict=False, logger=logger)
+            warnings.warn('DeprecationWarning: pretrained is a deprecated, '
+                          'please use "init_cfg" instead')
+            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
+        else:
+            self.init_cfg = dict(type='Kaiming', layer='Conv2d')
 
     def forward(self, x):
         """Forward function.
diff --git a/mmdet3d/models/builder.py b/mmdet3d/models/builder.py
index 6d1b448ee8..af5839ae50 100644
--- a/mmdet3d/models/builder.py
+++ b/mmdet3d/models/builder.py
@@ -1,43 +1,46 @@
 import warnings
+from mmcv.cnn import MODELS as MMCV_MODELS
 from mmcv.utils import Registry
 
 from mmdet.models.builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
-                                  ROI_EXTRACTORS, SHARED_HEADS, build)
+                                  ROI_EXTRACTORS, SHARED_HEADS)
 from mmseg.models.builder import SEGMENTORS
 
-VOXEL_ENCODERS = Registry('voxel_encoder')
-MIDDLE_ENCODERS = Registry('middle_encoder')
-FUSION_LAYERS = Registry('fusion_layer')
+MODELS = Registry('models', parent=MMCV_MODELS)
+
+VOXEL_ENCODERS = MODELS
+MIDDLE_ENCODERS = MODELS
+FUSION_LAYERS = MODELS
 
 
 def build_backbone(cfg):
     """Build backbone."""
-    return build(cfg, BACKBONES)
+    return BACKBONES.build(cfg)
 
 
 def build_neck(cfg):
     """Build neck."""
-    return build(cfg, NECKS)
+    return NECKS.build(cfg)
 
 
 def build_roi_extractor(cfg):
     """Build RoI feature extractor."""
-    return build(cfg, ROI_EXTRACTORS)
+    return ROI_EXTRACTORS.build(cfg)
 
 
 def build_shared_head(cfg):
     """Build shared head of detector."""
-    return build(cfg, SHARED_HEADS)
+    return SHARED_HEADS.build(cfg)
 
 
 def build_head(cfg):
     """Build head."""
-    return build(cfg, HEADS)
+    return HEADS.build(cfg)
 
 
 def build_loss(cfg):
     """Build loss function."""
-    return build(cfg, LOSSES)
+    return LOSSES.build(cfg)
 
 
 def build_detector(cfg, train_cfg=None, test_cfg=None):
@@ -50,7 +53,8 @@ def build_detector(cfg, train_cfg=None, test_cfg=None):
         'train_cfg specified in both outer field and model field '
     assert cfg.get('test_cfg') is None or test_cfg is None, \
         'test_cfg specified in both outer field and model field '
-    return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
+    return DETECTORS.build(
+        cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))
 
 
 def build_segmentor(cfg, train_cfg=None, test_cfg=None):
@@ -63,7 +67,8 @@ def build_segmentor(cfg, train_cfg=None, test_cfg=None):
         'train_cfg specified in both outer field and model field '
     assert cfg.get('test_cfg') is None or test_cfg is None, \
         'test_cfg specified in both outer field and model field '
-    return build(cfg, SEGMENTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
+    return SEGMENTORS.build(
+        cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))
 
 
 def build_model(cfg, train_cfg=None, test_cfg=None):
@@ -80,14 +85,14 @@ def build_model(cfg, train_cfg=None, test_cfg=None):
 
 def build_voxel_encoder(cfg):
     """Build voxel encoder."""
-    return build(cfg, VOXEL_ENCODERS)
+    return VOXEL_ENCODERS.build(cfg)
 
 
 def build_middle_encoder(cfg):
     """Build middle level encoder."""
-    return build(cfg, MIDDLE_ENCODERS)
+    return MIDDLE_ENCODERS.build(cfg)
 
 
 def build_fusion_layer(cfg):
     """Build fusion layer."""
-    return build(cfg, FUSION_LAYERS)
+    return FUSION_LAYERS.build(cfg)
diff --git a/mmdet3d/models/decode_heads/decode_head.py b/mmdet3d/models/decode_heads/decode_head.py
index b44d0c50c6..f339f574c3 100644
--- a/mmdet3d/models/decode_heads/decode_head.py
+++ b/mmdet3d/models/decode_heads/decode_head.py
@@ -1,12 +1,12 @@
 from abc import ABCMeta, abstractmethod
 from mmcv.cnn import normal_init
-from mmcv.runner import auto_fp16, force_fp32
+from mmcv.runner import BaseModule, auto_fp16, force_fp32
 from torch import nn as nn
 
 from mmseg.models.builder import build_loss
 
 
-class Base3DDecodeHead(nn.Module, metaclass=ABCMeta):
+class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
     """Base class for BaseDecodeHead.
 
     Args:
@@ -37,8 +37,9 @@ def __init__(self,
                      use_sigmoid=False,
                      class_weight=None,
                      loss_weight=1.0),
-                 ignore_index=255):
-        super(Base3DDecodeHead, self).__init__()
+                 ignore_index=255,
+                 init_cfg=None):
+        super(Base3DDecodeHead, self).__init__(init_cfg=init_cfg)
         self.channels = channels
         self.num_classes = num_classes
         self.dropout_ratio = dropout_ratio
@@ -57,6 +58,7 @@ def __init__(self,
 
     def init_weights(self):
         """Initialize weights of classification layer."""
+        super().init_weights()
         normal_init(self.conv_seg, mean=0, std=0.01)
 
     @auto_fp16()
diff --git a/mmdet3d/models/dense_heads/anchor3d_head.py b/mmdet3d/models/dense_heads/anchor3d_head.py
index d55b871664..59c79129a2 100644
--- a/mmdet3d/models/dense_heads/anchor3d_head.py
+++ b/mmdet3d/models/dense_heads/anchor3d_head.py
@@ -1,7 +1,6 @@
 import numpy as np
 import torch
-from mmcv.cnn import bias_init_with_prob, normal_init
-from mmcv.runner import force_fp32
+from mmcv.runner import BaseModule, force_fp32
 from torch import nn as nn
 
 from mmdet3d.core import (PseudoSampler, box3d_multiclass_nms, limit_period,
@@ -14,7 +13,7 @@
 
 
 @HEADS.register_module()
-class Anchor3DHead(nn.Module, AnchorTrainMixin):
+class Anchor3DHead(BaseModule, AnchorTrainMixin):
     """Anchor head for SECOND/PointPillars/MVXNet/PartA2.
 
     Args:
@@ -67,8 +66,9 @@ def __init__(self,
                      loss_weight=1.0),
                  loss_bbox=dict(
                      type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
-                 loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)):
-        super().__init__()
+                 loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2),
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
         self.in_channels = in_channels
         self.num_classes = num_classes
         self.feat_channels = feat_channels
@@ -103,6 +103,14 @@ def __init__(self,
         self._init_layers()
         self._init_assigner_sampler()
 
+        if init_cfg is None:
+            self.init_cfg = dict(
+                type='Normal',
+                layer='Conv2d',
+                std=0.01,
+                override=dict(
+                    type='Normal', name='conv_cls', std=0.01, bias_prob=0.01))
+
     def _init_assigner_sampler(self):
         """Initialize the target assigner and sampler of the head."""
         if self.train_cfg is None:
@@ -129,12 +137,6 @@ def _init_layers(self):
             self.conv_dir_cls = nn.Conv2d(self.feat_channels,
                                           self.num_anchors * 2, 1)
 
-    def init_weights(self):
-        """Initialize the weights of head."""
-        bias_cls = bias_init_with_prob(0.01)
-        normal_init(self.conv_cls, std=0.01, bias=bias_cls)
-        normal_init(self.conv_reg, std=0.01)
-
     def forward_single(self, x):
         """Forward function on a single-scale feature map.
 
diff --git a/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py b/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
index 7dea2e3f07..b16ae50a61 100644
--- a/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
+++ b/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
@@ -109,8 +109,9 @@ def __init__(
             conv_cfg=None,
             norm_cfg=None,
             train_cfg=None,
-            test_cfg=None):
-        super(AnchorFreeMono3DHead, self).__init__()
+            test_cfg=None,
+            init_cfg=None):
+        super(AnchorFreeMono3DHead, self).__init__(init_cfg=init_cfg)
         self.num_classes = num_classes
         self.cls_out_channels = num_classes
         self.in_channels = in_channels
diff --git a/mmdet3d/models/dense_heads/base_conv_bbox_head.py b/mmdet3d/models/dense_heads/base_conv_bbox_head.py
index ea81288fc6..9958c93d32 100644
--- a/mmdet3d/models/dense_heads/base_conv_bbox_head.py
+++ b/mmdet3d/models/dense_heads/base_conv_bbox_head.py
@@ -1,12 +1,13 @@
 from mmcv.cnn import ConvModule
 from mmcv.cnn.bricks import build_conv_layer
+from mmcv.runner import BaseModule
 from torch import nn as nn
 
 from mmdet.models.builder import HEADS
 
 
 @HEADS.register_module()
-class BaseConvBboxHead(nn.Module):
+class BaseConvBboxHead(BaseModule):
     r"""More general bbox head, with shared conv layers and two optional
     separated branches.
 
@@ -28,9 +29,11 @@ def __init__(self,
                  norm_cfg=dict(type='BN1d'),
                  act_cfg=dict(type='ReLU'),
                  bias='auto',
+                 init_cfg=None,
                  *args,
                  **kwargs):
-        super(BaseConvBboxHead, self).__init__(*args, **kwargs)
+        super(BaseConvBboxHead, self).__init__(
+            init_cfg=init_cfg, *args, **kwargs)
         assert in_channels > 0
         assert num_cls_out_channels > 0
         assert num_reg_out_channels > 0
@@ -98,10 +101,6 @@ def _add_conv_branch(self, in_channels, conv_channels):
                     inplace=True))
         return conv_layers
 
-    def init_weights(self):
-        # conv layers are already initialized by ConvModule
-        pass
-
     def forward(self, feats):
         """Forward.
 
diff --git a/mmdet3d/models/dense_heads/base_mono3d_dense_head.py b/mmdet3d/models/dense_heads/base_mono3d_dense_head.py
index bff464def9..05815acad4 100644
--- a/mmdet3d/models/dense_heads/base_mono3d_dense_head.py
+++ b/mmdet3d/models/dense_heads/base_mono3d_dense_head.py
@@ -1,12 +1,12 @@
 from abc import ABCMeta, abstractmethod
-from torch import nn as nn
+from mmcv.runner import BaseModule
 
 
-class BaseMono3DDenseHead(nn.Module, metaclass=ABCMeta):
+class BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta):
     """Base class for Monocular 3D DenseHeads."""
 
-    def __init__(self):
-        super(BaseMono3DDenseHead, self).__init__()
+    def __init__(self, init_cfg=None):
+        super(BaseMono3DDenseHead, self).__init__(init_cfg=init_cfg)
 
     @abstractmethod
     def loss(self, **kwargs):
diff --git a/mmdet3d/models/dense_heads/centerpoint_head.py b/mmdet3d/models/dense_heads/centerpoint_head.py
index adf92df26e..98ab23a588 100644
--- a/mmdet3d/models/dense_heads/centerpoint_head.py
+++ b/mmdet3d/models/dense_heads/centerpoint_head.py
@@ -1,8 +1,8 @@
 import copy
 import numpy as np
 import torch
-from mmcv.cnn import ConvModule, build_conv_layer, kaiming_init
-from mmcv.runner import force_fp32
+from mmcv.cnn import ConvModule, build_conv_layer
+from mmcv.runner import BaseModule, force_fp32
 from torch import nn
 
 from mmdet3d.core import (circle_nms, draw_heatmap_gaussian, gaussian_radius,
@@ -15,7 +15,7 @@
 
 
 @HEADS.register_module()
-class SeparateHead(nn.Module):
+class SeparateHead(BaseModule):
     """SeparateHead for CenterHead.
 
     Args:
@@ -42,9 +42,11 @@ def __init__(self,
                  conv_cfg=dict(type='Conv2d'),
                  norm_cfg=dict(type='BN2d'),
                  bias='auto',
+                 init_cfg=None,
                  **kwargs):
-        super(SeparateHead, self).__init__()
-
+        assert init_cfg is None, 'To prevent abnormal initialization ' \
+            'behavior, init_cfg is not allowed to be set'
+        super(SeparateHead, self).__init__(init_cfg=init_cfg)
         self.heads = heads
         self.init_bias = init_bias
         for head in self.heads:
@@ -78,15 +80,15 @@ def __init__(self,
 
             self.__setattr__(head, conv_layers)
 
+            if init_cfg is None:
+                self.init_cfg = dict(type='Kaiming', layer='Conv2d')
+
     def init_weights(self):
         """Initialize weights."""
+        super().init_weights()
         for head in self.heads:
             if head == 'heatmap':
                 self.__getattr__(head)[-1].bias.data.fill_(self.init_bias)
-            else:
-                for m in self.__getattr__(head).modules():
-                    if isinstance(m, nn.Conv2d):
-                        kaiming_init(m)
 
     def forward(self, x):
         """Forward function for SepHead.
@@ -119,7 +121,7 @@ def forward(self, x):
 
 
 @HEADS.register_module()
-class DCNSeparateHead(nn.Module):
+class DCNSeparateHead(BaseModule):
     r"""DCNSeparateHead for CenterHead.
 
     .. code-block:: none
@@ -154,8 +156,11 @@ def __init__(self,
                  conv_cfg=dict(type='Conv2d'),
                  norm_cfg=dict(type='BN2d'),
                  bias='auto',
+                 init_cfg=None,
                  **kwargs):
-        super(DCNSeparateHead, self).__init__()
+        assert init_cfg is None, 'To prevent abnormal initialization ' \
+            'behavior, init_cfg is not allowed to be set'
+        super(DCNSeparateHead, self).__init__(init_cfg=init_cfg)
         if 'heatmap' in heads:
             heads.pop('heatmap')
         # feature adaptation with dcn
@@ -192,11 +197,13 @@ def __init__(self,
             head_conv=head_conv,
             final_kernel=final_kernel,
             bias=bias)
+        if init_cfg is None:
+            self.init_cfg = dict(type='Kaiming', layer='Conv2d')
 
     def init_weights(self):
         """Initialize weights."""
+        super().init_weights()
         self.cls_head[-1].bias.data.fill_(self.init_bias)
-        self.task_head.init_weights()
 
     def forward(self, x):
         """Forward function for DCNSepHead.
@@ -232,7 +239,7 @@ def forward(self, x):
 
 
 @HEADS.register_module()
-class CenterHead(nn.Module):
+class CenterHead(BaseModule):
     """CenterHead for CenterPoint.
 
     Args:
@@ -280,8 +287,11 @@ def __init__(self,
                  conv_cfg=dict(type='Conv2d'),
                  norm_cfg=dict(type='BN2d'),
                  bias='auto',
-                 norm_bbox=True):
-        super(CenterHead, self).__init__()
+                 norm_bbox=True,
+                 init_cfg=None):
+        assert init_cfg is None, 'To prevent abnormal initialization ' \
+            'behavior, init_cfg is not allowed to be set'
+        super(CenterHead, self).__init__(init_cfg=init_cfg)
 
         num_classes = [len(t['class_names']) for t in tasks]
         self.class_names = [t['class_names'] for t in tasks]
@@ -316,11 +326,6 @@ def __init__(self,
                 in_channels=share_conv_channel, heads=heads, num_cls=num_cls)
             self.task_heads.append(builder.build_head(separate_head))
 
-    def init_weights(self):
-        """Initialize weights."""
-        for task_head in self.task_heads:
-            task_head.init_weights()
-
     def forward_single(self, x):
         """Forward function for CenterPoint.
 
diff --git a/mmdet3d/models/dense_heads/fcos_mono3d_head.py b/mmdet3d/models/dense_heads/fcos_mono3d_head.py
index ad2c19c761..6d99e3b6f9 100644
--- a/mmdet3d/models/dense_heads/fcos_mono3d_head.py
+++ b/mmdet3d/models/dense_heads/fcos_mono3d_head.py
@@ -74,6 +74,7 @@ def __init__(self,
                      loss_weight=1.0),
                  norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
                  centerness_branch=(64, ),
+                 init_cfg=None,
                  **kwargs):
         self.regress_ranges = regress_ranges
         self.center_sampling = center_sampling
@@ -90,6 +91,7 @@ def __init__(self,
             loss_dir=loss_dir,
             loss_attr=loss_attr,
             norm_cfg=norm_cfg,
+            init_cfg=init_cfg,
             **kwargs)
         self.loss_centerness = build_loss(loss_centerness)
 
diff --git a/mmdet3d/models/dense_heads/free_anchor3d_head.py b/mmdet3d/models/dense_heads/free_anchor3d_head.py
index 633c635901..90bcd00e5f 100644
--- a/mmdet3d/models/dense_heads/free_anchor3d_head.py
+++ b/mmdet3d/models/dense_heads/free_anchor3d_head.py
@@ -32,8 +32,9 @@ def __init__(self,
                  bbox_thr=0.6,
                  gamma=2.0,
                  alpha=0.5,
+                 init_cfg=None,
                  **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(init_cfg=init_cfg, **kwargs)
         self.pre_anchor_topk = pre_anchor_topk
         self.bbox_thr = bbox_thr
         self.gamma = gamma
diff --git a/mmdet3d/models/dense_heads/parta2_rpn_head.py b/mmdet3d/models/dense_heads/parta2_rpn_head.py
index d45d9a5508..24492aec60 100644
--- a/mmdet3d/models/dense_heads/parta2_rpn_head.py
+++ b/mmdet3d/models/dense_heads/parta2_rpn_head.py
@@ -75,12 +75,13 @@ def __init__(self,
                      loss_weight=1.0),
                  loss_bbox=dict(
                      type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
-                 loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)):
+                 loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2),
+                 init_cfg=None):
         super().__init__(num_classes, in_channels, train_cfg, test_cfg,
                          feat_channels, use_direction_classifier,
                          anchor_generator, assigner_per_size, assign_per_class,
                          diff_rad_by_sin, dir_offset, dir_limit_offset,
-                         bbox_coder, loss_cls, loss_bbox, loss_dir)
+                         bbox_coder, loss_cls, loss_bbox, loss_dir, init_cfg)
 
     @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))
     def loss(self,
diff --git a/mmdet3d/models/dense_heads/shape_aware_head.py b/mmdet3d/models/dense_heads/shape_aware_head.py
index 771f011b26..082f46ab6c 100644
--- a/mmdet3d/models/dense_heads/shape_aware_head.py
+++ b/mmdet3d/models/dense_heads/shape_aware_head.py
@@ -1,6 +1,8 @@
 import numpy as np
 import torch
-from mmcv.cnn import ConvModule, bias_init_with_prob, normal_init
+import warnings
+from mmcv.cnn import ConvModule
+from mmcv.runner import BaseModule
 from torch import nn as nn
 
 from mmdet3d.core import box3d_multiclass_nms, limit_period, xywhr2xyxyr
@@ -11,7 +13,7 @@
 
 
 @HEADS.register_module()
-class BaseShapeHead(nn.Module):
+class BaseShapeHead(BaseModule):
     """Base Shape-aware Head in Shape Signature Network.
 
     Note:
@@ -48,8 +50,9 @@ def __init__(self,
                  use_direction_classifier=True,
                  conv_cfg=dict(type='Conv2d'),
                  norm_cfg=dict(type='BN2d'),
-                 bias=False):
-        super().__init__()
+                 bias=False,
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
         self.num_cls = num_cls
         self.num_base_anchors = num_base_anchors
         self.use_direction_classifier = use_direction_classifier
@@ -84,15 +87,36 @@ def __init__(self,
         if use_direction_classifier:
             self.conv_dir_cls = nn.Conv2d(out_channels, num_base_anchors * 2,
                                           1)
-
-    def init_weights(self):
-        """Initialize weights."""
-        bias_cls = bias_init_with_prob(0.01)
-        # shared conv layers have already been initialized by ConvModule
-        normal_init(self.conv_cls, std=0.01, bias=bias_cls)
-        normal_init(self.conv_reg, std=0.01)
-        if self.use_direction_classifier:
-            normal_init(self.conv_dir_cls, std=0.01, bias=bias_cls)
+        if init_cfg is None:
+            if use_direction_classifier:
+                self.init_cfg = dict(
+                    type='Kaiming',
+                    layer='Conv2d',
+                    override=[
+                        dict(type='Normal', name='conv_reg', std=0.01),
+                        dict(
+                            type='Normal',
+                            name='conv_cls',
+                            std=0.01,
+                            bias_prob=0.01),
+                        dict(
+                            type='Normal',
+                            name='conv_dir_cls',
+                            std=0.01,
+                            bias_prob=0.01)
+                    ])
+            else:
+                self.init_cfg = dict(
+                    type='Kaiming',
+                    layer='Conv2d',
+                    override=[
+                        dict(type='Normal', name='conv_reg', std=0.01),
+                        dict(
+                            type='Normal',
+                            name='conv_cls',
+                            std=0.01,
+                            bias_prob=0.01)
+                    ])
 
     def forward(self, x):
         """Forward function for SmallHead.
@@ -149,10 +173,21 @@ class ShapeAwareHead(Anchor3DHead):
             :class:`Anchor3DHead`.
     """
 
-    def __init__(self, tasks, assign_per_class=True, **kwargs):
+    def __init__(self, tasks, assign_per_class=True, init_cfg=None, **kwargs):
         self.tasks = tasks
         self.featmap_sizes = []
-        super().__init__(assign_per_class=assign_per_class, **kwargs)
+        super().__init__(
+            assign_per_class=assign_per_class, init_cfg=init_cfg, **kwargs)
+
+    def init_weights(self):
+        if not self._is_init:
+            for m in self.heads:
+                if hasattr(m, 'init_weights'):
+                    m.init_weights()
+            self._is_init = True
+        else:
+            warnings.warn(f'init_weights of {self.__class__.__name__} has '
+                          f'been called more than once.')
 
     def _init_layers(self):
         """Initialize neural network layers of the head."""
@@ -175,11 +210,6 @@ def _init_layers(self):
             self.heads.append(build_head(branch))
             cls_ptr += task['num_class']
 
-    def init_weights(self):
-        """Initialize the weights of head."""
-        for head in self.heads:
-            head.init_weights()
-
     def forward_single(self, x):
         """Forward function on a single-scale feature map.
 
diff --git a/mmdet3d/models/dense_heads/ssd_3d_head.py b/mmdet3d/models/dense_heads/ssd_3d_head.py
index 20651794a5..aea2879391 100644
--- a/mmdet3d/models/dense_heads/ssd_3d_head.py
+++ b/mmdet3d/models/dense_heads/ssd_3d_head.py
@@ -58,7 +58,8 @@ def __init__(self,
                  dir_res_loss=None,
                  size_res_loss=None,
                  corner_loss=None,
-                 vote_loss=None):
+                 vote_loss=None,
+                 init_cfg=None):
         super(SSD3DHead, self).__init__(
             num_classes,
             bbox_coder,
@@ -75,7 +76,8 @@ def __init__(self,
             dir_res_loss=dir_res_loss,
             size_class_loss=None,
             size_res_loss=size_res_loss,
-            semantic_loss=None)
+            semantic_loss=None,
+            init_cfg=init_cfg)
 
         self.corner_loss = build_loss(corner_loss)
         self.vote_loss = build_loss(vote_loss)
diff --git a/mmdet3d/models/dense_heads/vote_head.py b/mmdet3d/models/dense_heads/vote_head.py
index 2bb2262d4c..e03fe44413 100644
--- a/mmdet3d/models/dense_heads/vote_head.py
+++ b/mmdet3d/models/dense_heads/vote_head.py
@@ -1,7 +1,6 @@
 import numpy as np
 import torch
-from mmcv.runner import force_fp32
-from torch import nn as nn
+from mmcv.runner import BaseModule, force_fp32
 from torch.nn import functional as F
 
 from mmdet3d.core.post_processing import aligned_3d_nms
@@ -15,7 +14,7 @@
 
 
 @HEADS.register_module()
-class VoteHead(nn.Module):
+class VoteHead(BaseModule):
     r"""Bbox head of `Votenet <https://arxiv.org/abs/1904.09664>`_.
 
     Args:
@@ -56,8 +55,9 @@ def __init__(self,
                  size_class_loss=None,
                  size_res_loss=None,
                  semantic_loss=None,
-                 iou_loss=None):
-        super(VoteHead, self).__init__()
+                 iou_loss=None,
+                 init_cfg=None):
+        super(VoteHead, self).__init__(init_cfg=init_cfg)
         self.num_classes = num_classes
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
@@ -92,10 +92,6 @@ def __init__(self,
             num_cls_out_channels=self._get_cls_out_channels(),
             num_reg_out_channels=self._get_reg_out_channels())
 
-    def init_weights(self):
-        """Initialize weights of VoteHead."""
-        pass
-
     def _get_cls_out_channels(self):
         """Return the channel number of classification outputs."""
         # Class numbers (k) + objectness (2)
diff --git a/mmdet3d/models/detectors/centerpoint.py b/mmdet3d/models/detectors/centerpoint.py
index 7705ce1a94..a7f0796f27 100644
--- a/mmdet3d/models/detectors/centerpoint.py
+++ b/mmdet3d/models/detectors/centerpoint.py
@@ -23,13 +23,14 @@ def __init__(self,
                  img_rpn_head=None,
                  train_cfg=None,
                  test_cfg=None,
-                 pretrained=None):
+                 pretrained=None,
+                 init_cfg=None):
         super(CenterPoint,
               self).__init__(pts_voxel_layer, pts_voxel_encoder,
                              pts_middle_encoder, pts_fusion_layer,
                              img_backbone, pts_backbone, img_neck, pts_neck,
                              pts_bbox_head, img_roi_head, img_rpn_head,
-                             train_cfg, test_cfg, pretrained)
+                             train_cfg, test_cfg, pretrained, init_cfg)
 
     def extract_pts_feat(self, pts, img_feats, img_metas):
         """Extract features of points."""
diff --git a/mmdet3d/models/detectors/dynamic_voxelnet.py b/mmdet3d/models/detectors/dynamic_voxelnet.py
index a7241ac43e..c7defdff14 100644
--- a/mmdet3d/models/detectors/dynamic_voxelnet.py
+++ b/mmdet3d/models/detectors/dynamic_voxelnet.py
@@ -20,7 +20,8 @@ def __init__(self,
                  bbox_head=None,
                  train_cfg=None,
                  test_cfg=None,
-                 pretrained=None):
+                 pretrained=None,
+                 init_cfg=None):
         super(DynamicVoxelNet, self).__init__(
             voxel_layer=voxel_layer,
             voxel_encoder=voxel_encoder,
@@ -31,7 +32,7 @@ def __init__(self,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
             pretrained=pretrained,
-        )
+            init_cfg=init_cfg)
 
     def extract_feat(self, points, img_metas):
         """Extract features from points."""
diff --git a/mmdet3d/models/detectors/h3dnet.py b/mmdet3d/models/detectors/h3dnet.py
index 11d3077ec7..36fcb3bc45 100644
--- a/mmdet3d/models/detectors/h3dnet.py
+++ b/mmdet3d/models/detectors/h3dnet.py
@@ -19,7 +19,8 @@ def __init__(self,
                  roi_head=None,
                  train_cfg=None,
                  test_cfg=None,
-                 pretrained=None):
+                 pretrained=None,
+                 init_cfg=None):
         super(H3DNet, self).__init__(
             backbone=backbone,
             neck=neck,
@@ -27,7 +28,8 @@ def __init__(self,
             roi_head=roi_head,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
-            pretrained=pretrained)
+            pretrained=pretrained,
+            init_cfg=init_cfg)
 
     def forward_train(self,
                       points,
diff --git a/mmdet3d/models/detectors/imvotenet.py b/mmdet3d/models/detectors/imvotenet.py
index 53b980d429..950e067560 100644
--- a/mmdet3d/models/detectors/imvotenet.py
+++ b/mmdet3d/models/detectors/imvotenet.py
@@ -1,6 +1,6 @@
 import numpy as np
 import torch
-from torch import nn as nn
+import warnings
 
 from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d
 from mmdet3d.models.utils import MLP
@@ -69,9 +69,10 @@ def __init__(self,
                  num_sampled_seed=None,
                  train_cfg=None,
                  test_cfg=None,
-                 pretrained=None):
+                 pretrained=None,
+                 init_cfg=None):
 
-        super(ImVoteNet, self).__init__()
+        super(ImVoteNet, self).__init__(init_cfg=init_cfg)
 
         # point branch
         if pts_backbone is not None:
@@ -134,11 +135,7 @@ def __init__(self,
 
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
-        self.init_weights(pretrained=pretrained)
 
-    def init_weights(self, pretrained=None):
-        """Initialize model weights."""
-        super(ImVoteNet, self).init_weights(pretrained)
         if pretrained is None:
             img_pretrained = None
             pts_pretrained = None
@@ -148,29 +145,26 @@ def init_weights(self, pretrained=None):
         else:
             raise ValueError(
                 f'pretrained should be a dict, got {type(pretrained)}')
-        if self.with_img_backbone:
-            self.img_backbone.init_weights(pretrained=img_pretrained)
-        if self.with_img_neck:
-            if isinstance(self.img_neck, nn.Sequential):
-                for m in self.img_neck:
-                    m.init_weights()
-            else:
-                self.img_neck.init_weights()
 
+        if self.with_img_backbone:
+            if img_pretrained is not None:
+                warnings.warn('DeprecationWarning: pretrained is a deprecated \
+                    key, please consider using init_cfg')
+                self.img_backbone.init_cfg = dict(
+                    type='Pretrained', checkpoint=img_pretrained)
         if self.with_img_roi_head:
-            self.img_roi_head.init_weights(img_pretrained)
-        if self.with_img_rpn:
-            self.img_rpn_head.init_weights()
+            if img_pretrained is not None:
+                warnings.warn('DeprecationWarning: pretrained is a deprecated \
+                    key, please consider using init_cfg')
+                self.img_roi_head.init_cfg = dict(
+                    type='Pretrained', checkpoint=img_pretrained)
+
         if self.with_pts_backbone:
-            self.pts_backbone.init_weights(pretrained=pts_pretrained)
-        if self.with_pts_bbox:
-            self.pts_bbox_head.init_weights()
-        if self.with_pts_neck:
-            if isinstance(self.pts_neck, nn.Sequential):
-                for m in self.pts_neck:
-                    m.init_weights()
-            else:
-                self.pts_neck.init_weights()
+            if img_pretrained is not None:
+                warnings.warn('DeprecationWarning: pretrained is a deprecated \
+                    key, please consider using init_cfg')
+                self.pts_backbone.init_cfg = dict(
+                    type='Pretrained', checkpoint=pts_pretrained)
 
     def freeze_img_branch_params(self):
         """Freeze all image branch parameters."""
diff --git a/mmdet3d/models/detectors/imvoxelnet.py b/mmdet3d/models/detectors/imvoxelnet.py
index aac2882b25..2b70be7a54 100644
--- a/mmdet3d/models/detectors/imvoxelnet.py
+++ b/mmdet3d/models/detectors/imvoxelnet.py
@@ -19,8 +19,9 @@ def __init__(self,
                  anchor_generator,
                  train_cfg=None,
                  test_cfg=None,
-                 pretrained=None):
-        super().__init__()
+                 pretrained=None,
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
         self.backbone = build_backbone(backbone)
         self.neck = build_neck(neck)
         self.neck_3d = build_neck(neck_3d)
@@ -31,20 +32,6 @@ def __init__(self,
         self.anchor_generator = build_anchor_generator(anchor_generator)
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
-        self.init_weights(pretrained=pretrained)
-
-    def init_weights(self, pretrained=None):
-        """Initialize the weights in detector.
-
-        Args:
-            pretrained (str, optional): Path to pre-trained weights.
-                Defaults to None.
-        """
-        super().init_weights(pretrained)
-        self.backbone.init_weights(pretrained=pretrained)
-        self.neck.init_weights()
-        self.neck_3d.init_weights()
-        self.bbox_head.init_weights()
 
     def extract_feat(self, img, img_metas):
         """Extract 3d features from the backbone -> fpn -> 3d projection.
diff --git a/mmdet3d/models/detectors/mvx_two_stage.py b/mmdet3d/models/detectors/mvx_two_stage.py
index 8297f2e746..fae6d673a5 100644
--- a/mmdet3d/models/detectors/mvx_two_stage.py
+++ b/mmdet3d/models/detectors/mvx_two_stage.py
@@ -1,9 +1,9 @@
 import mmcv
 import torch
+import warnings
 from mmcv.parallel import DataContainer as DC
 from mmcv.runner import force_fp32
 from os import path as osp
-from torch import nn as nn
 from torch.nn import functional as F
 
 from mmdet3d.core import (Box3DMode, Coord3DMode, bbox3d2result,
@@ -33,8 +33,9 @@ def __init__(self,
                  img_rpn_head=None,
                  train_cfg=None,
                  test_cfg=None,
-                 pretrained=None):
-        super(MVXTwoStageDetector, self).__init__()
+                 pretrained=None,
+                 init_cfg=None):
+        super(MVXTwoStageDetector, self).__init__(init_cfg=init_cfg)
 
         if pts_voxel_layer:
             self.pts_voxel_layer = Voxelization(**pts_voxel_layer)
@@ -69,11 +70,7 @@ def __init__(self,
 
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
-        self.init_weights(pretrained=pretrained)
 
-    def init_weights(self, pretrained=None):
-        """Initialize model weights."""
-        super(MVXTwoStageDetector, self).init_weights(pretrained)
         if pretrained is None:
             img_pretrained = None
             pts_pretrained = None
@@ -83,23 +80,26 @@ def init_weights(self, pretrained=None):
         else:
             raise ValueError(
                 f'pretrained should be a dict, got {type(pretrained)}')
-        if self.with_img_backbone:
-            self.img_backbone.init_weights(pretrained=img_pretrained)
-        if self.with_pts_backbone:
-            self.pts_backbone.init_weights(pretrained=pts_pretrained)
-        if self.with_img_neck:
-            if isinstance(self.img_neck, nn.Sequential):
-                for m in self.img_neck:
-                    m.init_weights()
-            else:
-                self.img_neck.init_weights()
 
+        if self.with_img_backbone:
+            if img_pretrained is not None:
+                warnings.warn('DeprecationWarning: pretrained is a deprecated \
+                    key, please consider using init_cfg')
+                self.img_backbone.init_cfg = dict(
+                    type='Pretrained', checkpoint=img_pretrained)
         if self.with_img_roi_head:
-            self.img_roi_head.init_weights(img_pretrained)
-        if self.with_img_rpn:
-            self.img_rpn_head.init_weights()
-        if self.with_pts_bbox:
-            self.pts_bbox_head.init_weights()
+            if img_pretrained is not None:
+                warnings.warn('DeprecationWarning: pretrained is a deprecated \
+                    key, please consider using init_cfg')
+                self.img_roi_head.init_cfg = dict(
+                    type='Pretrained', checkpoint=img_pretrained)
+
+        if self.with_pts_backbone:
+            if img_pretrained is not None:
+                warnings.warn('DeprecationWarning: pretrained is a deprecated \
+                    key, please consider using init_cfg')
+                self.pts_backbone.init_cfg = dict(
+                    type='Pretrained', checkpoint=pts_pretrained)
 
     @property
     def with_img_shared_head(self):
diff --git a/mmdet3d/models/detectors/parta2.py b/mmdet3d/models/detectors/parta2.py
index da3e0d1697..8c907d2653 100644
--- a/mmdet3d/models/detectors/parta2.py
+++ b/mmdet3d/models/detectors/parta2.py
@@ -24,7 +24,8 @@ def __init__(self,
                  roi_head=None,
                  train_cfg=None,
                  test_cfg=None,
-                 pretrained=None):
+                 pretrained=None,
+                 init_cfg=None):
         super(PartA2, self).__init__(
             backbone=backbone,
             neck=neck,
@@ -33,7 +34,7 @@ def __init__(self,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
             pretrained=pretrained,
-        )
+            init_cfg=init_cfg)
         self.voxel_layer = Voxelization(**voxel_layer)
         self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)
         self.middle_encoder = builder.build_middle_encoder(middle_encoder)
diff --git a/mmdet3d/models/detectors/single_stage.py b/mmdet3d/models/detectors/single_stage.py
index e77f4280cd..abd8a665d0 100644
--- a/mmdet3d/models/detectors/single_stage.py
+++ b/mmdet3d/models/detectors/single_stage.py
@@ -1,5 +1,3 @@
-from torch import nn as nn
-
 from mmdet.models import DETECTORS, build_backbone, build_head, build_neck
 from .base import Base3DDetector
 
@@ -28,8 +26,9 @@ def __init__(self,
                  bbox_head=None,
                  train_cfg=None,
                  test_cfg=None,
+                 init_cfg=None,
                  pretrained=None):
-        super(SingleStage3DDetector, self).__init__()
+        super(SingleStage3DDetector, self).__init__(init_cfg)
         self.backbone = build_backbone(backbone)
         if neck is not None:
             self.neck = build_neck(neck)
@@ -38,19 +37,6 @@ def __init__(self,
         self.bbox_head = build_head(bbox_head)
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
-        self.init_weights(pretrained=pretrained)
-
-    def init_weights(self, pretrained=None):
-        """Initialize weights of detector."""
-        super(SingleStage3DDetector, self).init_weights(pretrained)
-        self.backbone.init_weights(pretrained=pretrained)
-        if self.with_neck:
-            if isinstance(self.neck, nn.Sequential):
-                for m in self.neck:
-                    m.init_weights()
-            else:
-                self.neck.init_weights()
-        self.bbox_head.init_weights()
 
     def extract_feat(self, points, img_metas=None):
         """Directly extract features from the backbone+neck.
diff --git a/mmdet3d/models/detectors/ssd3dnet.py b/mmdet3d/models/detectors/ssd3dnet.py
index 733514a579..93d42d41e9 100644
--- a/mmdet3d/models/detectors/ssd3dnet.py
+++ b/mmdet3d/models/detectors/ssd3dnet.py
@@ -14,10 +14,12 @@ def __init__(self,
                  bbox_head=None,
                  train_cfg=None,
                  test_cfg=None,
+                 init_cfg=None,
                  pretrained=None):
         super(SSD3DNet, self).__init__(
             backbone=backbone,
             bbox_head=bbox_head,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
+            init_cfg=init_cfg,
             pretrained=pretrained)
diff --git a/mmdet3d/models/detectors/votenet.py b/mmdet3d/models/detectors/votenet.py
index 6c389d5bc8..7b3af721cc 100644
--- a/mmdet3d/models/detectors/votenet.py
+++ b/mmdet3d/models/detectors/votenet.py
@@ -14,12 +14,14 @@ def __init__(self,
                  bbox_head=None,
                  train_cfg=None,
                  test_cfg=None,
+                 init_cfg=None,
                  pretrained=None):
         super(VoteNet, self).__init__(
             backbone=backbone,
             bbox_head=bbox_head,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
+            init_cfg=None,
             pretrained=pretrained)
 
     def forward_train(self,
diff --git a/mmdet3d/models/detectors/voxelnet.py b/mmdet3d/models/detectors/voxelnet.py
index 1b4841582c..b3ed47ed0a 100644
--- a/mmdet3d/models/detectors/voxelnet.py
+++ b/mmdet3d/models/detectors/voxelnet.py
@@ -22,6 +22,7 @@ def __init__(self,
                  bbox_head=None,
                  train_cfg=None,
                  test_cfg=None,
+                 init_cfg=None,
                  pretrained=None):
         super(VoxelNet, self).__init__(
             backbone=backbone,
@@ -29,8 +30,8 @@ def __init__(self,
             bbox_head=bbox_head,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
-            pretrained=pretrained,
-        )
+            init_cfg=init_cfg,
+            pretrained=pretrained)
         self.voxel_layer = Voxelization(**voxel_layer)
         self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)
         self.middle_encoder = builder.build_middle_encoder(middle_encoder)
diff --git a/mmdet3d/models/fusion_layers/point_fusion.py b/mmdet3d/models/fusion_layers/point_fusion.py
index 88d2af0aa6..0739b6585d 100644
--- a/mmdet3d/models/fusion_layers/point_fusion.py
+++ b/mmdet3d/models/fusion_layers/point_fusion.py
@@ -1,5 +1,6 @@
 import torch
-from mmcv.cnn import ConvModule, xavier_init
+from mmcv.cnn import ConvModule
+from mmcv.runner import BaseModule
 from torch import nn as nn
 from torch.nn import functional as F
 
@@ -96,7 +97,7 @@ def point_sample(
 
 
 @FUSION_LAYERS.register_module()
-class PointFusion(nn.Module):
+class PointFusion(BaseModule):
     """Fuse image features from multi-scale features.
 
     Args:
@@ -138,6 +139,7 @@ def __init__(self,
                  conv_cfg=None,
                  norm_cfg=None,
                  act_cfg=None,
+                 init_cfg=None,
                  activate_out=True,
                  fuse_out=False,
                  dropout_ratio=0,
@@ -145,7 +147,7 @@ def __init__(self,
                  align_corners=True,
                  padding_mode='zeros',
                  lateral_conv=True):
-        super(PointFusion, self).__init__()
+        super(PointFusion, self).__init__(init_cfg=init_cfg)
         if isinstance(img_levels, int):
             img_levels = [img_levels]
         if isinstance(img_channels, int):
@@ -200,14 +202,11 @@ def __init__(self,
                 nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
                 nn.ReLU(inplace=False))
 
-        self.init_weights()
-
-    # default init_weights for conv(msra) and norm in ConvModule
-    def init_weights(self):
-        """Initialize the weights of modules."""
-        for m in self.modules():
-            if isinstance(m, (nn.Conv2d, nn.Linear)):
-                xavier_init(m, distribution='uniform')
+        if init_cfg is None:
+            self.init_cfg = [
+                dict(type='Xavier', layer='Conv2d', distribution='uniform'),
+                dict(type='Xavier', layer='Linear', distribution='uniform')
+            ]
 
     def forward(self, img_feats, pts, pts_feats, img_metas):
         """Forward function.
diff --git a/mmdet3d/models/middle_encoders/sparse_unet.py b/mmdet3d/models/middle_encoders/sparse_unet.py
index e4a4206b9b..34e1b2b6d5 100644
--- a/mmdet3d/models/middle_encoders/sparse_unet.py
+++ b/mmdet3d/models/middle_encoders/sparse_unet.py
@@ -1,6 +1,5 @@
 import torch
-from mmcv.runner import auto_fp16
-from torch import nn as nn
+from mmcv.runner import BaseModule, auto_fp16
 
 from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule
 from mmdet3d.ops import spconv as spconv
@@ -8,7 +7,7 @@
 
 
 @MIDDLE_ENCODERS.register_module()
-class SparseUNet(nn.Module):
+class SparseUNet(BaseModule):
     r"""SparseUNet for PartA^2.
 
     See the `paper <https://arxiv.org/abs/1907.03670>`_ for more details.
@@ -40,8 +39,9 @@ def __init__(self,
                                                                  1)),
                  decoder_channels=((64, 64, 64), (64, 64, 32), (32, 32, 16),
                                    (16, 16, 16)),
-                 decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1))):
-        super().__init__()
+                 decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1)),
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
         self.sparse_shape = sparse_shape
         self.in_channels = in_channels
         self.order = order
diff --git a/mmdet3d/models/necks/second_fpn.py b/mmdet3d/models/necks/second_fpn.py
index 4acd318aa7..d3f30e2a06 100644
--- a/mmdet3d/models/necks/second_fpn.py
+++ b/mmdet3d/models/necks/second_fpn.py
@@ -1,15 +1,14 @@
 import numpy as np
 import torch
-from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
-                      constant_init, is_norm, kaiming_init)
-from mmcv.runner import auto_fp16
+from mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer
+from mmcv.runner import BaseModule, auto_fp16
 from torch import nn as nn
 
 from mmdet.models import NECKS
 
 
 @NECKS.register_module()
-class SECONDFPN(nn.Module):
+class SECONDFPN(BaseModule):
     """FPN used in SECOND/PointPillars/PartA2/MVXNet.
 
     Args:
@@ -30,10 +29,11 @@ def __init__(self,
                  norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
                  upsample_cfg=dict(type='deconv', bias=False),
                  conv_cfg=dict(type='Conv2d', bias=False),
-                 use_conv_for_no_stride=False):
+                 use_conv_for_no_stride=False,
+                 init_cfg=None):
         # if for GroupNorm,
         # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True)
-        super(SECONDFPN, self).__init__()
+        super(SECONDFPN, self).__init__(init_cfg=init_cfg)
         assert len(out_channels) == len(upsample_strides) == len(in_channels)
         self.in_channels = in_channels
         self.out_channels = out_channels
@@ -64,13 +64,11 @@ def __init__(self,
             deblocks.append(deblock)
         self.deblocks = nn.ModuleList(deblocks)
 
-    def init_weights(self):
-        """Initialize weights of FPN."""
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                kaiming_init(m)
-            elif is_norm(m):
-                constant_init(m, 1)
+        if init_cfg is None:
+            self.init_cfg = [
+                dict(type='Kaiming', layer='ConvTranspose2d'),
+                dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0)
+            ]
 
     @auto_fp16()
     def forward(self, x):
diff --git a/mmdet3d/models/roi_heads/base_3droi_head.py b/mmdet3d/models/roi_heads/base_3droi_head.py
index 21809a7f32..3ab0645cce 100644
--- a/mmdet3d/models/roi_heads/base_3droi_head.py
+++ b/mmdet3d/models/roi_heads/base_3droi_head.py
@@ -1,8 +1,8 @@
 from abc import ABCMeta, abstractmethod
-from torch import nn as nn
+from mmcv.runner import BaseModule
 
 
-class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
+class Base3DRoIHead(BaseModule, metaclass=ABCMeta):
     """Base class for 3d RoIHeads."""
 
     def __init__(self,
@@ -10,8 +10,10 @@ def __init__(self,
                  mask_roi_extractor=None,
                  mask_head=None,
                  train_cfg=None,
-                 test_cfg=None):
-        super(Base3DRoIHead, self).__init__()
+                 test_cfg=None,
+                 pretrained=None,
+                 init_cfg=None):
+        super(Base3DRoIHead, self).__init__(init_cfg=init_cfg)
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
 
@@ -33,11 +35,6 @@ def with_mask(self):
         """bool: whether the RoIHead has mask head"""
         return hasattr(self, 'mask_head') and self.mask_head is not None
 
-    @abstractmethod
-    def init_weights(self, pretrained):
-        """Initialize the module with pre-trained weights."""
-        pass
-
     @abstractmethod
     def init_bbox_head(self):
         """Initialize the box head."""
diff --git a/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py
index 2af212acc4..fc4bfc3b01 100644
--- a/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py
@@ -1,5 +1,6 @@
 import torch
 from mmcv.cnn import ConvModule
+from mmcv.runner import BaseModule
 from torch import nn as nn
 from torch.nn import functional as F
 
@@ -13,7 +14,7 @@
 
 
 @HEADS.register_module()
-class H3DBboxHead(nn.Module):
+class H3DBboxHead(BaseModule):
     r"""Bbox head of `H3DNet <https://arxiv.org/abs/2006.05682>`_.
 
     Args:
@@ -80,8 +81,9 @@ def __init__(self,
                  cues_objectness_loss=None,
                  cues_semantic_loss=None,
                  proposal_objectness_loss=None,
-                 primitive_center_loss=None):
-        super(H3DBboxHead, self).__init__()
+                 primitive_center_loss=None,
+                 init_cfg=None):
+        super(H3DBboxHead, self).__init__(init_cfg=init_cfg)
         self.num_classes = num_classes
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
@@ -198,15 +200,6 @@ def __init__(self,
                             bbox_coder['num_sizes'] * 4 + self.num_classes)
         self.bbox_pred.append(nn.Conv1d(prev_channel, conv_out_channel, 1))
 
-    def init_weights(self, pretrained=None):
-        """Initialize the weights in detector.
-
-        Args:
-            pretrained (str, optional): Path to pre-trained weights.
-                Defaults to None.
-        """
-        pass
-
     def forward(self, feats_dict, sample_mod):
         """Forward pass.
 
diff --git a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
index 27457bcb23..79d9cce95c 100644
--- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
@@ -1,6 +1,7 @@
 import numpy as np
 import torch
-from mmcv.cnn import ConvModule, normal_init, xavier_init
+from mmcv.cnn import ConvModule, normal_init
+from mmcv.runner import BaseModule
 from torch import nn as nn
 
 from mmdet3d.core.bbox.structures import (LiDARInstance3DBoxes,
@@ -14,7 +15,7 @@
 
 
 @HEADS.register_module()
-class PartA2BboxHead(nn.Module):
+class PartA2BboxHead(BaseModule):
     """PartA2 RoI head.
 
     Args:
@@ -67,8 +68,9 @@ def __init__(self,
                      type='CrossEntropyLoss',
                      use_sigmoid=True,
                      reduction='none',
-                     loss_weight=1.0)):
-        super(PartA2BboxHead, self).__init__()
+                     loss_weight=1.0),
+                 init_cfg=None):
+        super(PartA2BboxHead, self).__init__(init_cfg=init_cfg)
         self.num_classes = num_classes
         self.with_corner_loss = with_corner_loss
         self.bbox_coder = build_bbox_coder(bbox_coder)
@@ -220,14 +222,14 @@ def __init__(self,
 
         self.conv_reg = nn.Sequential(*reg_layers)
 
-        self.init_weights()
+        if init_cfg is None:
+            self.init_cfg = dict(
+                type='Xavier',
+                layer=['Conv2d', 'Conv1d'],
+                distribution='uniform')
 
     def init_weights(self):
-        """Initialize weights of the bbox head."""
-        for m in self.modules():
-            if isinstance(m, (nn.Conv2d, nn.Conv1d)):
-                xavier_init(m, distribution='uniform')
-
+        super().init_weights()
         normal_init(self.conv_reg[-1].conv, mean=0, std=0.001)
 
     def forward(self, seg_feats, part_feats):
diff --git a/mmdet3d/models/roi_heads/h3d_roi_head.py b/mmdet3d/models/roi_heads/h3d_roi_head.py
index b96a50248a..4792083857 100644
--- a/mmdet3d/models/roi_heads/h3d_roi_head.py
+++ b/mmdet3d/models/roi_heads/h3d_roi_head.py
@@ -19,20 +19,21 @@ def __init__(self,
                  primitive_list,
                  bbox_head=None,
                  train_cfg=None,
-                 test_cfg=None):
+                 test_cfg=None,
+                 pretrained=None,
+                 init_cfg=None):
         super(H3DRoIHead, self).__init__(
-            bbox_head=bbox_head, train_cfg=train_cfg, test_cfg=test_cfg)
+            bbox_head=bbox_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            pretrained=pretrained,
+            init_cfg=init_cfg)
         # Primitive module
         assert len(primitive_list) == 3
         self.primitive_z = build_head(primitive_list[0])
         self.primitive_xy = build_head(primitive_list[1])
         self.primitive_line = build_head(primitive_list[2])
 
-    def init_weights(self, pretrained):
-        """Initialize weights, skip since ``H3DROIHead`` does not need to
-        initialize weights."""
-        pass
-
     def init_mask_head(self):
         """Initialize mask head, skip since ``H3DROIHead`` does not have
         one."""
diff --git a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
index 8ed6271960..6f7d114bde 100644
--- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
@@ -1,4 +1,5 @@
 import torch
+from mmcv.runner import BaseModule
 from torch import nn as nn
 from torch.nn import functional as F
 
@@ -9,7 +10,7 @@
 
 
 @HEADS.register_module()
-class PointwiseSemanticHead(nn.Module):
+class PointwiseSemanticHead(BaseModule):
     """Semantic segmentation head for point-wise segmentation.
 
     Predict point-wise segmentation and part regression results for PartA2.
@@ -28,6 +29,7 @@ def __init__(self,
                  num_classes=3,
                  extra_width=0.2,
                  seg_score_thr=0.3,
+                 init_cfg=None,
                  loss_seg=dict(
                      type='FocalLoss',
                      use_sigmoid=True,
@@ -39,7 +41,7 @@ def __init__(self,
                      type='CrossEntropyLoss',
                      use_sigmoid=True,
                      loss_weight=1.0)):
-        super(PointwiseSemanticHead, self).__init__()
+        super(PointwiseSemanticHead, self).__init__(init_cfg=init_cfg)
         self.extra_width = extra_width
         self.num_classes = num_classes
         self.seg_score_thr = seg_score_thr
diff --git a/mmdet3d/models/roi_heads/mask_heads/primitive_head.py b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py
index 0cac77e2c8..147219a28d 100644
--- a/mmdet3d/models/roi_heads/mask_heads/primitive_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py
@@ -1,5 +1,6 @@
 import torch
 from mmcv.cnn import ConvModule
+from mmcv.runner import BaseModule
 from torch import nn as nn
 from torch.nn import functional as F
 
@@ -11,7 +12,7 @@
 
 
 @HEADS.register_module()
-class PrimitiveHead(nn.Module):
+class PrimitiveHead(BaseModule):
     r"""Primitive head of `H3DNet <https://arxiv.org/abs/2006.05682>`_.
 
     Args:
@@ -52,8 +53,9 @@ def __init__(self,
                  objectness_loss=None,
                  center_loss=None,
                  semantic_reg_loss=None,
-                 semantic_cls_loss=None):
-        super(PrimitiveHead, self).__init__()
+                 semantic_cls_loss=None,
+                 init_cfg=None):
+        super(PrimitiveHead, self).__init__(init_cfg=init_cfg)
         assert primitive_mode in ['z', 'xy', 'line']
         # The dimension of primitive semantic information.
         self.num_dims = num_dims
@@ -110,10 +112,6 @@ def __init__(self,
         self.conv_pred.add_module('conv_out',
                                   nn.Conv1d(prev_channel, conv_out_channel, 1))
 
-    def init_weights(self):
-        """Initialize weights of VoteHead."""
-        pass
-
     def forward(self, feats_dict, sample_mod):
         """Forward pass.
 
diff --git a/mmdet3d/models/roi_heads/part_aggregation_roi_head.py b/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
index 2d83199227..2cc3e33687 100644
--- a/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+++ b/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
@@ -1,3 +1,4 @@
+import warnings
 from torch.nn import functional as F
 
 from mmdet3d.core import AssignResult
@@ -29,9 +30,14 @@ def __init__(self,
                  part_roi_extractor=None,
                  bbox_head=None,
                  train_cfg=None,
-                 test_cfg=None):
+                 test_cfg=None,
+                 pretrained=None,
+                 init_cfg=None):
         super(PartAggregationROIHead, self).__init__(
-            bbox_head=bbox_head, train_cfg=train_cfg, test_cfg=test_cfg)
+            bbox_head=bbox_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            init_cfg=init_cfg)
         self.num_classes = num_classes
         assert semantic_head is not None
         self.semantic_head = build_head(semantic_head)
@@ -43,10 +49,12 @@ def __init__(self,
 
         self.init_assigner_sampler()
 
-    def init_weights(self, pretrained):
-        """Initialize weights, skip since ``PartAggregationROIHead`` does not
-        need to initialize weights."""
-        pass
+        assert not (init_cfg and pretrained), \
+            'init_cfg and pretrained cannot be setting at the same time'
+        if isinstance(pretrained, str):
+            warnings.warn('DeprecationWarning: pretrained is a deprecated, '
+                          'please use "init_cfg" instead')
+            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
 
     def init_mask_head(self):
         """Initialize mask head, skip since ``PartAggregationROIHead`` does not
diff --git a/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py b/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
index 01ef222266..a859e8b30f 100644
--- a/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
@@ -1,12 +1,12 @@
 import torch
-from torch import nn as nn
+from mmcv.runner import BaseModule
 
 from mmdet3d import ops
 from mmdet.models.builder import ROI_EXTRACTORS
 
 
 @ROI_EXTRACTORS.register_module()
-class Single3DRoIAwareExtractor(nn.Module):
+class Single3DRoIAwareExtractor(BaseModule):
     """Point-wise roi-aware Extractor.
 
     Extract Point-wise roi features.
@@ -15,8 +15,8 @@ class Single3DRoIAwareExtractor(nn.Module):
         roi_layer (dict): The config of roi layer.
     """
 
-    def __init__(self, roi_layer=None):
-        super(Single3DRoIAwareExtractor, self).__init__()
+    def __init__(self, roi_layer=None, init_cfg=None):
+        super(Single3DRoIAwareExtractor, self).__init__(init_cfg=init_cfg)
         self.roi_layer = self.build_roi_layers(roi_layer)
 
     def build_roi_layers(self, layer_cfg):
diff --git a/mmdet3d/models/segmentors/encoder_decoder.py b/mmdet3d/models/segmentors/encoder_decoder.py
index b17dfc61c1..4841b4ba17 100644
--- a/mmdet3d/models/segmentors/encoder_decoder.py
+++ b/mmdet3d/models/segmentors/encoder_decoder.py
@@ -25,8 +25,9 @@ def __init__(self,
                  auxiliary_head=None,
                  train_cfg=None,
                  test_cfg=None,
-                 pretrained=None):
-        super(EncoderDecoder3D, self).__init__()
+                 pretrained=None,
+                 init_cfg=None):
+        super(EncoderDecoder3D, self).__init__(init_cfg=init_cfg)
         self.backbone = build_backbone(backbone)
         if neck is not None:
             self.neck = build_neck(neck)
@@ -35,9 +36,6 @@ def __init__(self,
 
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
-
-        self.init_weights(pretrained=pretrained)
-
         assert self.with_decode_head, \
             '3D EncoderDecoder Segmentor should have a decode_head'
 
@@ -56,24 +54,6 @@ def _init_auxiliary_head(self, auxiliary_head):
             else:
                 self.auxiliary_head = build_head(auxiliary_head)
 
-    def init_weights(self, pretrained=None):
-        """Initialize the weights in backbone and heads.
-
-        Args:
-            pretrained (str, optional): Path to pre-trained weights.
-                Defaults to None.
-        """
-
-        super(EncoderDecoder3D, self).init_weights(pretrained)
-        self.backbone.init_weights(pretrained=pretrained)
-        self.decode_head.init_weights()
-        if self.with_auxiliary_head:
-            if isinstance(self.auxiliary_head, nn.ModuleList):
-                for aux_head in self.auxiliary_head:
-                    aux_head.init_weights()
-            else:
-                self.auxiliary_head.init_weights()
-
     def extract_feat(self, points):
         """Extract features from points."""
         x = self.backbone(points)
diff --git a/mmdet3d/models/utils/mlp.py b/mmdet3d/models/utils/mlp.py
index 4bb91d8b33..ca3bf1b67f 100644
--- a/mmdet3d/models/utils/mlp.py
+++ b/mmdet3d/models/utils/mlp.py
@@ -1,8 +1,9 @@
 from mmcv.cnn import ConvModule
+from mmcv.runner import BaseModule
 from torch import nn as nn
 
 
-class MLP(nn.Module):
+class MLP(BaseModule):
     """A simple MLP module.
 
     Pass features (B, C, N) through an MLP.
@@ -25,8 +26,9 @@ def __init__(self,
                  conv_channels=(256, 256),
                  conv_cfg=dict(type='Conv1d'),
                  norm_cfg=dict(type='BN1d'),
-                 act_cfg=dict(type='ReLU')):
-        super().__init__()
+                 act_cfg=dict(type='ReLU'),
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
         self.mlp = nn.Sequential()
         prev_channels = in_channel
         for i, conv_channel in enumerate(conv_channels):
diff --git a/mmdet3d/ops/pointnet_modules/point_fp_module.py b/mmdet3d/ops/pointnet_modules/point_fp_module.py
index eb9414d1c0..212705baf7 100644
--- a/mmdet3d/ops/pointnet_modules/point_fp_module.py
+++ b/mmdet3d/ops/pointnet_modules/point_fp_module.py
@@ -1,13 +1,13 @@
 import torch
 from mmcv.cnn import ConvModule
-from mmcv.runner import force_fp32
+from mmcv.runner import BaseModule, force_fp32
 from torch import nn as nn
 from typing import List
 
 from mmdet3d.ops import three_interpolate, three_nn
 
 
-class PointFPModule(nn.Module):
+class PointFPModule(BaseModule):
     """Point feature propagation module used in PointNets.
 
     Propagate the features from one set to another.
@@ -20,8 +20,9 @@ class PointFPModule(nn.Module):
 
     def __init__(self,
                  mlp_channels: List[int],
-                 norm_cfg: dict = dict(type='BN2d')):
-        super().__init__()
+                 norm_cfg: dict = dict(type='BN2d'),
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
         self.fp16_enabled = False
         self.mlps = nn.Sequential()
         for i in range(len(mlp_channels) - 1):
diff --git a/tests/test_runtime/test_apis.py b/tests/test_runtime/test_apis.py
index 1a4c0a53aa..4513f30cf4 100644
--- a/tests/test_runtime/test_apis.py
+++ b/tests/test_runtime/test_apis.py
@@ -244,10 +244,13 @@ def test_show_result_meshlab():
 
 
 def test_inference_detector():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+
     pcd = 'tests/data/kitti/training/velodyne_reduced/000000.bin'
     detector_cfg = 'configs/pointpillars/hv_pointpillars_secfpn_' \
                    '6x8_160e_kitti-3d-3class.py'
-    detector = init_model(detector_cfg, device='cpu')
+    detector = init_model(detector_cfg, device='cuda:0')
     results = inference_detector(detector, pcd)
     bboxes_3d = results[0][0]['boxes_3d']
     scores_3d = results[0][0]['scores_3d']
diff --git a/tools/train.py b/tools/train.py
index c901575b53..622dbf6781 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -180,6 +180,7 @@ def main():
         cfg.model,
         train_cfg=cfg.get('train_cfg'),
         test_cfg=cfg.get('test_cfg'))
+    model.init_weights()
 
     logger.info(f'Model:\n{model}')
     datasets = [build_dataset(cfg.data.train)]