diff --git a/docs/source/models.rst b/docs/source/models.rst
index 66ebf0e211d..9443565bda0 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -22,7 +22,8 @@ architectures for image classification:
 -  `Inception`_ v3
 -  `GoogLeNet`_
 -  `ShuffleNet`_ v2
--  `MobileNet`_ v2
+-  `MobileNetV2`_
+-  `MobileNetV3`_
 -  `ResNeXt`_
 -  `Wide ResNet`_
 -  `MNASNet`_
@@ -40,7 +41,9 @@ You can construct a model with random weights by calling its constructor:
     inception = models.inception_v3()
     googlenet = models.googlenet()
     shufflenet = models.shufflenet_v2_x1_0()
-    mobilenet = models.mobilenet_v2()
+    mobilenet_v2 = models.mobilenet_v2()
+    mobilenet_v3_large = models.mobilenet_v3_large()
+    mobilenet_v3_small = models.mobilenet_v3_small()
     resnext50_32x4d = models.resnext50_32x4d()
     wide_resnet50_2 = models.wide_resnet50_2()
     mnasnet = models.mnasnet1_0()
@@ -59,7 +62,8 @@ These can be constructed by passing ``pretrained=True``:
     inception = models.inception_v3(pretrained=True)
     googlenet = models.googlenet(pretrained=True)
     shufflenet = models.shufflenet_v2_x1_0(pretrained=True)
-    mobilenet = models.mobilenet_v2(pretrained=True)
+    mobilenet_v2 = models.mobilenet_v2(pretrained=True)
+    mobilenet_v3_large = models.mobilenet_v3_large(pretrained=True)
     resnext50_32x4d = models.resnext50_32x4d(pretrained=True)
     wide_resnet50_2 = models.wide_resnet50_2(pretrained=True)
     mnasnet = models.mnasnet1_0(pretrained=True)
@@ -137,6 +141,7 @@ Inception v3                      22.55           6.44
 GoogleNet                         30.22           10.47
 ShuffleNet V2                     30.64           11.68
 MobileNet V2                      28.12           9.71
+MobileNet V3 Large                25.96           8.66
 ResNeXt-50-32x4d                  22.38           6.30
 ResNeXt-101-32x8d                 20.69           5.47
 Wide ResNet-50-2                  21.49           5.91
@@ -153,7 +158,8 @@ MNASNet 1.0                       26.49           8.456
 .. _Inception: https://arxiv.org/abs/1512.00567
 .. _GoogLeNet: https://arxiv.org/abs/1409.4842
 .. _ShuffleNet: https://arxiv.org/abs/1807.11164
-.. _MobileNet: https://arxiv.org/abs/1801.04381
+.. _MobileNetV2: https://arxiv.org/abs/1801.04381
+.. _MobileNetV3: https://arxiv.org/abs/1905.02244
 .. _ResNeXt: https://arxiv.org/abs/1611.05431
 .. _MNASNet: https://arxiv.org/abs/1807.11626
 
@@ -231,6 +237,12 @@ MobileNet v2
 
 .. autofunction:: mobilenet_v2
 
+MobileNet v3
+-------------
+
+.. autofunction:: mobilenet_v3_large
+.. autofunction:: mobilenet_v3_small
+
 ResNext
 -------
 
@@ -351,6 +363,7 @@ Network                           box AP   mask AP   keypoint AP
 ================================  =======  ========  ===========
 Faster R-CNN ResNet-50 FPN        37.0     -         -
 RetinaNet ResNet-50 FPN           36.4     -         -
+RetinaNet MobileNetV3-Large FPN   25.6     -         -
 Mask R-CNN ResNet-50 FPN          37.9     34.6      -
 ================================  =======  ========  ===========
 
@@ -407,6 +420,7 @@ Network                         train time (s / it)  test time (s / it)  memory
 ==============================  ===================  ==================  ===========
 Faster R-CNN ResNet-50 FPN      0.2288               0.0590              5.2
 RetinaNet ResNet-50 FPN         0.2514               0.0939              4.1
+RetinaNet MobileNetV3-Large FPN 0.0928               0.0547              1.4
 Mask R-CNN ResNet-50 FPN        0.2728               0.0903              5.4
 Keypoint R-CNN ResNet-50 FPN    0.3789               0.1242              6.8
 ==============================  ===================  ==================  ===========
@@ -422,6 +436,7 @@ RetinaNet
 ------------
 
 .. autofunction:: torchvision.models.detection.retinanet_resnet50_fpn
+.. autofunction:: torchvision.models.detection.retinanet_mobilenet_v3_large_fpn
 
 
 Mask R-CNN
diff --git a/references/classification/README.md b/references/classification/README.md
index bd00f2c7dd8..d18ab17bf73 100644
--- a/references/classification/README.md
+++ b/references/classification/README.md
@@ -53,6 +53,16 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
      --lr-step-size 1 --lr-gamma 0.98
 ```
 
+
+### MobileNetV3 Large
+```
+python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
+     --model mobilenet_v3_large --epochs 600 --opt rmsprop --batch-size 128 --lr 0.064\ 
+     --wd 0.00001 --lr-step-size 2 --lr-gamma 0.973 --auto-augment imagenet --random-erase 0.2
+```
+
+Then we averaged the parameters of the last 3 checkpoints that improved the Acc@1. See [#3182](https://github.com/pytorch/vision/pull/3182) for details.
+
 ## Mixed precision training
 Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex).
 
diff --git a/references/detection/README.md b/references/detection/README.md
index f89e8149a71..495a775df19 100644
--- a/references/detection/README.md
+++ b/references/detection/README.md
@@ -27,7 +27,8 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
     --lr-steps 16 22 --aspect-ratio-group-factor 3
 ```
 
-### RetinaNet
+
+### RetinaNet with ResNet50 FPN
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
     --dataset coco --model retinanet_resnet50_fpn --epochs 26\
@@ -35,6 +36,16 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
 ```
 
 
+### RetinaNet with MobileNetV3 Large FPN
+```
+python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
+    --dataset coco --model retinanet_mobilenet_v3_large_fpn --epochs 26 --lr-steps 16 22\
+    --aspect-ratio-group-factor 3 --lr 0.01
+```
+
+Then we averaged the parameters of the last 2 checkpoints that improved the AP. See [#3223](https://github.com/pytorch/vision/pull/3223) for details.
+
+
 ### Mask R-CNN
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py
index 26be5c7bfa4..8c33b74c29c 100644
--- a/torchvision/models/detection/retinanet.py
+++ b/torchvision/models/detection/retinanet.py
@@ -560,7 +560,7 @@ def forward(self, images, targets=None):
 # TODO: replace with pytorch links
 model_urls = {
     'retinanet_mobilenet_v3_large_fpn_coco':
-        'https://github.com/datumbox/torchvision-models/raw/main/retinanet_mobilenet_v3_large_fpn-41c847a4.pth',
+        'https://download.pytorch.org/models/retinanet_mobilenet_v3_large_fpn-41c847a4.pth',
     'retinanet_resnet50_fpn_coco':
         'https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth',
 }
diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py
index 27b9f7e10b8..671acbc4a57 100644
--- a/torchvision/models/mobilenetv3.py
+++ b/torchvision/models/mobilenetv3.py
@@ -14,7 +14,7 @@
 
 # TODO: add pretrained
 model_urls = {
-    "mobilenet_v3_large": "https://github.com/datumbox/torchvision-models/raw/main/mobilenet_v3_large-8738ca79.pth",
+    "mobilenet_v3_large": "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth",
     "mobilenet_v3_small": None,
 }
 
@@ -197,7 +197,7 @@ def _mobilenet_v3(
     **kwargs: Any
 ):
     model = MobileNetV3(inverted_residual_setting, last_channel, **kwargs)
-    if pretrained:
+    if pretrained and model_urls[arch] is not None:
         state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
         model.load_state_dict(state_dict)
     return model