diff --git a/docs/source/models.rst b/docs/source/models.rst index 66ebf0e211d..9443565bda0 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -22,7 +22,8 @@ architectures for image classification: - `Inception`_ v3 - `GoogLeNet`_ - `ShuffleNet`_ v2 -- `MobileNet`_ v2 +- `MobileNetV2`_ +- `MobileNetV3`_ - `ResNeXt`_ - `Wide ResNet`_ - `MNASNet`_ @@ -40,7 +41,9 @@ You can construct a model with random weights by calling its constructor: inception = models.inception_v3() googlenet = models.googlenet() shufflenet = models.shufflenet_v2_x1_0() - mobilenet = models.mobilenet_v2() + mobilenet_v2 = models.mobilenet_v2() + mobilenet_v3_large = models.mobilenet_v3_large() + mobilenet_v3_small = models.mobilenet_v3_small() resnext50_32x4d = models.resnext50_32x4d() wide_resnet50_2 = models.wide_resnet50_2() mnasnet = models.mnasnet1_0() @@ -59,7 +62,8 @@ These can be constructed by passing ``pretrained=True``: inception = models.inception_v3(pretrained=True) googlenet = models.googlenet(pretrained=True) shufflenet = models.shufflenet_v2_x1_0(pretrained=True) - mobilenet = models.mobilenet_v2(pretrained=True) + mobilenet_v2 = models.mobilenet_v2(pretrained=True) + mobilenet_v3_large = models.mobilenet_v3_large(pretrained=True) resnext50_32x4d = models.resnext50_32x4d(pretrained=True) wide_resnet50_2 = models.wide_resnet50_2(pretrained=True) mnasnet = models.mnasnet1_0(pretrained=True) @@ -137,6 +141,7 @@ Inception v3 22.55 6.44 GoogleNet 30.22 10.47 ShuffleNet V2 30.64 11.68 MobileNet V2 28.12 9.71 +MobileNet V3 Large 25.96 8.66 ResNeXt-50-32x4d 22.38 6.30 ResNeXt-101-32x8d 20.69 5.47 Wide ResNet-50-2 21.49 5.91 @@ -153,7 +158,8 @@ MNASNet 1.0 26.49 8.456 .. _Inception: https://arxiv.org/abs/1512.00567 .. _GoogLeNet: https://arxiv.org/abs/1409.4842 .. _ShuffleNet: https://arxiv.org/abs/1807.11164 -.. _MobileNet: https://arxiv.org/abs/1801.04381 +.. _MobileNetV2: https://arxiv.org/abs/1801.04381 +.. _MobileNetV3: https://arxiv.org/abs/1905.02244 .. _ResNeXt: https://arxiv.org/abs/1611.05431 .. _MNASNet: https://arxiv.org/abs/1807.11626 @@ -231,6 +237,12 @@ MobileNet v2 .. autofunction:: mobilenet_v2 +MobileNet v3 +------------- + +.. autofunction:: mobilenet_v3_large +.. autofunction:: mobilenet_v3_small + ResNext ------- @@ -351,6 +363,7 @@ Network box AP mask AP keypoint AP ================================ ======= ======== =========== Faster R-CNN ResNet-50 FPN 37.0 - - RetinaNet ResNet-50 FPN 36.4 - - +RetinaNet MobileNetV3-Large FPN 25.6 - - Mask R-CNN ResNet-50 FPN 37.9 34.6 - ================================ ======= ======== =========== @@ -407,6 +420,7 @@ Network train time (s / it) test time (s / it) memory ============================== =================== ================== =========== Faster R-CNN ResNet-50 FPN 0.2288 0.0590 5.2 RetinaNet ResNet-50 FPN 0.2514 0.0939 4.1 +RetinaNet MobileNetV3-Large FPN 0.0928 0.0547 1.4 Mask R-CNN ResNet-50 FPN 0.2728 0.0903 5.4 Keypoint R-CNN ResNet-50 FPN 0.3789 0.1242 6.8 ============================== =================== ================== =========== @@ -422,6 +436,7 @@ RetinaNet ------------ .. autofunction:: torchvision.models.detection.retinanet_resnet50_fpn +.. autofunction:: torchvision.models.detection.retinanet_mobilenet_v3_large_fpn Mask R-CNN diff --git a/references/classification/README.md b/references/classification/README.md index bd00f2c7dd8..d18ab17bf73 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -53,6 +53,16 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ --lr-step-size 1 --lr-gamma 0.98 ``` + +### MobileNetV3 Large +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --model mobilenet_v3_large --epochs 600 --opt rmsprop --batch-size 128 --lr 0.064\ + --wd 0.00001 --lr-step-size 2 --lr-gamma 0.973 --auto-augment imagenet --random-erase 0.2 +``` + +Then we averaged the parameters of the last 3 checkpoints that improved the Acc@1. See [#3182](https://github.com/pytorch/vision/pull/3182) for details. + ## Mixed precision training Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex). diff --git a/references/detection/README.md b/references/detection/README.md index f89e8149a71..495a775df19 100644 --- a/references/detection/README.md +++ b/references/detection/README.md @@ -27,7 +27,8 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ --lr-steps 16 22 --aspect-ratio-group-factor 3 ``` -### RetinaNet + +### RetinaNet with ResNet50 FPN ``` python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ --dataset coco --model retinanet_resnet50_fpn --epochs 26\ @@ -35,6 +36,16 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ ``` +### RetinaNet with MobileNetV3 Large FPN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model retinanet_mobilenet_v3_large_fpn --epochs 26 --lr-steps 16 22\ + --aspect-ratio-group-factor 3 --lr 0.01 +``` + +Then we averaged the parameters of the last 2 checkpoints that improved the AP. See [#3223](https://github.com/pytorch/vision/pull/3223) for details. + + ### Mask R-CNN ``` python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py index 26be5c7bfa4..8c33b74c29c 100644 --- a/torchvision/models/detection/retinanet.py +++ b/torchvision/models/detection/retinanet.py @@ -560,7 +560,7 @@ def forward(self, images, targets=None): # TODO: replace with pytorch links model_urls = { 'retinanet_mobilenet_v3_large_fpn_coco': - 'https://github.com/datumbox/torchvision-models/raw/main/retinanet_mobilenet_v3_large_fpn-41c847a4.pth', + 'https://download.pytorch.org/models/retinanet_mobilenet_v3_large_fpn-41c847a4.pth', 'retinanet_resnet50_fpn_coco': 'https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth', } diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py index 27b9f7e10b8..671acbc4a57 100644 --- a/torchvision/models/mobilenetv3.py +++ b/torchvision/models/mobilenetv3.py @@ -14,7 +14,7 @@ # TODO: add pretrained model_urls = { - "mobilenet_v3_large": "https://github.com/datumbox/torchvision-models/raw/main/mobilenet_v3_large-8738ca79.pth", + "mobilenet_v3_large": "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth", "mobilenet_v3_small": None, } @@ -197,7 +197,7 @@ def _mobilenet_v3( **kwargs: Any ): model = MobileNetV3(inverted_residual_setting, last_channel, **kwargs) - if pretrained: + if pretrained and model_urls[arch] is not None: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict) return model