[Feature] Support CUB dataset. (#703)

* support cub dataset * support cub dataset * fix train lint error * add docs * fix class label Co-authored-by: Ezra-Yu <1105212286@qq.com> * del debug code * skip docformatter problem * add unit tests * add CUB baseline configs and chpts * fix some typos * fix name style * update flops Co-authored-by: Ezra-Yu <1105212286@qq.com>
open-mmlab · Mar 16, 2022 · aa522f4 · aa522f4
1 parent c1534f9
commit aa522f4
Show file tree

Hide file tree

Showing 12 changed files with 441 additions and 18 deletions.
diff --git a/configs/_base_/datasets/cub_bs8_384.py b/configs/_base_/datasets/cub_bs8_384.py
@@ -0,0 +1,54 @@
+# dataset settings
+dataset_type = 'CUB'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', size=510),
+    dict(type='RandomCrop', size=384),
+    dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', size=510),
+    dict(type='CenterCrop', crop_size=384),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+
+data_root = 'data/CUB_200_2011/'
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'images.txt',
+        image_class_labels_file=data_root + 'image_class_labels.txt',
+        train_test_split_file=data_root + 'train_test_split.txt',
+        data_prefix=data_root + 'images',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'images.txt',
+        image_class_labels_file=data_root + 'image_class_labels.txt',
+        train_test_split_file=data_root + 'train_test_split.txt',
+        data_prefix=data_root + 'images',
+        test_mode=True,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'images.txt',
+        image_class_labels_file=data_root + 'image_class_labels.txt',
+        train_test_split_file=data_root + 'train_test_split.txt',
+        data_prefix=data_root + 'images',
+        test_mode=True,
+        pipeline=test_pipeline))
+
+evaluation = dict(
+    interval=1, metric='accuracy',
+    save_best='auto')  # save the checkpoint with highest accuracy
diff --git a/configs/_base_/datasets/cub_bs8_448.py b/configs/_base_/datasets/cub_bs8_448.py
@@ -0,0 +1,54 @@
+# dataset settings
+dataset_type = 'CUB'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', size=600),
+    dict(type='RandomCrop', size=448),
+    dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', size=600),
+    dict(type='CenterCrop', crop_size=448),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+
+data_root = 'data/CUB_200_2011/'
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'images.txt',
+        image_class_labels_file=data_root + 'image_class_labels.txt',
+        train_test_split_file=data_root + 'train_test_split.txt',
+        data_prefix=data_root + 'images',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'images.txt',
+        image_class_labels_file=data_root + 'image_class_labels.txt',
+        train_test_split_file=data_root + 'train_test_split.txt',
+        data_prefix=data_root + 'images',
+        test_mode=True,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'images.txt',
+        image_class_labels_file=data_root + 'image_class_labels.txt',
+        train_test_split_file=data_root + 'train_test_split.txt',
+        data_prefix=data_root + 'images',
+        test_mode=True,
+        pipeline=test_pipeline))
+
+evaluation = dict(
+    interval=1, metric='accuracy',
+    save_best='auto')  # save the checkpoint with highest accuracy
diff --git a/configs/_base_/schedules/cub_bs64.py b/configs/_base_/schedules/cub_bs64.py
@@ -0,0 +1,13 @@
+# optimizer
+optimizer = dict(
+    type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005, nesterov=True)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='CosineAnnealing',
+    min_lr=0,
+    warmup='linear',
+    warmup_iters=5,
+    warmup_ratio=0.01,
+    warmup_by_epoch=True)
+runner = dict(type='EpochBasedRunner', max_epochs=100)
diff --git a/configs/resnet/README.md b/configs/resnet/README.md
@@ -15,21 +15,29 @@ The depth of representations is of central importance for many visual recognitio
 
 ## Results and models
 
+The pre-trained models on ImageNet-21k are used to fine-tune, and therefore don't have evaluation results.
+
+|   Model         | resolution  | Params(M) |  Flops(G) | Download |
+|:---------------:|:-----------:|:---------:|:---------:|:--------:|
+| ResNet-50-mill  |   224x224   |   86.74   |   15.14   | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_mill_3rdparty_in21k_20220307-bdb3a68b.pth)|
+
+*The "mill" means using the mutil-label pretrain weight from [ImageNet-21K Pretraining for the Masses](https://github.com/Alibaba-MIIL/ImageNet21K).*
+
 ### Cifar10
 
 |         Model         | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
 |:---------------------:|:---------:|:--------:|:---------:|:---------:|:---------:|:--------:|
-| ResNet-18-b16x8 | 11.17 | 0.56 | 94.82 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.log.json) |
-| ResNet-34-b16x8 | 21.28 | 1.16 | 95.34 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.log.json) |
-| ResNet-50-b16x8 | 23.52 | 1.31 | 95.55 | 99.91 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.log.json) |
-| ResNet-101-b16x8 | 42.51 | 2.52 | 95.58 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet101_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.log.json) |
-| ResNet-152-b16x8 | 58.16 | 3.74 | 95.76 | 99.89 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet152_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.log.json) |
+| ResNet-18 | 11.17 | 0.56 | 94.82 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.log.json) |
+| ResNet-34 | 21.28 | 1.16 | 95.34 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.log.json) |
+| ResNet-50 | 23.52 | 1.31 | 95.55 | 99.91 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.log.json) |
+| ResNet-101 | 42.51 | 2.52 | 95.58 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet101_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.log.json) |
+| ResNet-152 | 58.16 | 3.74 | 95.76 | 99.89 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet152_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.log.json) |
 
 ### Cifar100
 
 |         Model         | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
 |:---------------------:|:---------:|:--------:|:---------:|:---------:|:---------:|:--------:|
-| ResNet-50-b16x8 | 23.71 | 1.31 | 79.90 | 95.19 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar100.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.log.json) |
+| ResNet-50 | 23.71 | 1.31 | 79.90 | 95.19 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar100.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.log.json) |
 
 ### ImageNet-1k
 
@@ -57,6 +65,13 @@ The depth of representations is of central importance for many visual recognitio
 
 *Models with \* are converted from the [official repo](https://github.com/pytorch/vision). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
 
+### CUB-200-2011
+
+|         Model         |   Pretrain   |  resolution  |  Params(M) | Flops(G) | Top-1 (%) |  Config |  Download  |
+|:---------------------:|:------------:|:---------:|:---------:|:--------:|:---------:|:---------:|:---------:|
+|    ResNet-50          | [ImageNet-21k-mill](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_mill_3rdparty_in21k_20220307-bdb3a68b.pth)  | 448x448 |   23.92   |   16.48   |   88.45   |  [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb8_cub.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.log.json) |
+
+
 ## Citation
 
 ```

diff --git a/configs/resnet/metafile.yml b/configs/resnet/metafile.yml
@@ -375,3 +375,16 @@ Models:
         Task: Image Classification
     Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c152_8xb32_in1k_20220214-c013291f.pth
     Config: configs/resnet/resnetv1c152_8xb32_in1k.py
+  - Name: resnet50_8xb8_cub
+    Metadata:
+      FLOPs: 16480000000
+      Parameters: 23920000
+    In Collection: ResNet
+    Results:
+      - Dataset: CUB-200-2011
+        Metrics:
+          Top 1 Accuracy: 88.45
+        Task: Image Classification
+    Pretrain: https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_mill_3rdparty_in21k_20220307-bdb3a68b.pth
+    Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.pth
+    Config: configs/resnet/resnet50_8xb8_cub.py
diff --git a/configs/resnet/resnet50_8xb8_cub.py b/configs/resnet/resnet50_8xb8_cub.py
@@ -0,0 +1,19 @@
+_base_ = [
+    '../_base_/models/resnet50.py', '../_base_/datasets/cub_bs8_448.py',
+    '../_base_/schedules/cub_bs64.py', '../_base_/default_runtime.py'
+]
+
+# use pre-train weight converted from https://github.com/Alibaba-MIIL/ImageNet21K # noqa
+checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_mill_3rdparty_in21k_20220307-bdb3a68b.pth'  # noqa
+
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint, prefix='backbone')),
+    head=dict(num_classes=200, ))
+
+log_config = dict(interval=20)  # log every 20 intervals
+
+checkpoint_config = dict(
+    interval=1, max_keep_ckpts=3)  # save last three checkpoints
diff --git a/configs/swin_transformer/README.md b/configs/swin_transformer/README.md
@@ -41,6 +41,13 @@ The pre-trained models on ImageNet-21k are used to fine-tune, and therefore don'
 
 *Models with \* are converted from the [official repo](https://github.com/microsoft/Swin-Transformer#main-results-on-imagenet-with-pretrained-models). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
 
+### CUB-200-2011
+
+|       Model      |  Pretrain   | resolution  | Params(M) | Flops(G) | Top-1 (%) |  Config | Download |
+|:----------------:|:------------:|:---------:|:---------:|:--------:|:---------:|:---------:|:---------:|
+|      Swin-L      |  [ImageNet-21k](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin-base_3rdparty_in21k-384px.pth) |   384x384   |  195.51   |   100.04   |   91.87   |  [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-large_8xb8_cub.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.log.json) |
+
+
 ## Citation
 
 ```

diff --git a/configs/swin_transformer/metafile.yml b/configs/swin_transformer/metafile.yml
@@ -186,3 +186,16 @@ Models:
       Weights: https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22kto1k.pth
       Code: https://github.com/microsoft/Swin-Transformer/blob/777f6c66604bb5579086c4447efe3620344d95a9/models/swin_transformer.py#L458
     Config: configs/swin_transformer/swin-large_16xb64_in1k-384px.py
+  - Name: swin-large_8xb8_cub_384px
+    Metadata:
+      FLOPs: 100040000000
+      Parameters: 195510000
+    In Collection: Swin-Transformer
+    Results:
+      - Dataset: CUB-200-2011
+        Metrics:
+          Top 1 Accuracy: 91.87
+        Task: Image Classification
+    Pretrain: https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin-large_3rdparty_in21k-384px.pth
+    Weights: https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.pth
+    Config: configs/swin_transformer/swin-large_8xb8_cub_384px.py
diff --git a/configs/swin_transformer/swin-large_8xb8_cub_384px.py b/configs/swin_transformer/swin-large_8xb8_cub_384px.py
@@ -0,0 +1,37 @@
+_base_ = [
+    '../_base_/models/swin_transformer/large_384.py',
+    '../_base_/datasets/cub_bs8_384.py', '../_base_/schedules/cub_bs64.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+checkpoint = 'https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin-large_3rdparty_in21k-384px.pth'  # noqa
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint, prefix='backbone')),
+    head=dict(num_classes=200, ))
+
+paramwise_cfg = dict(
+    norm_decay_mult=0.0,
+    bias_decay_mult=0.0,
+    custom_keys={
+        '.absolute_pos_embed': dict(decay_mult=0.0),
+        '.relative_position_bias_table': dict(decay_mult=0.0)
+    })
+
+optimizer = dict(
+    _delete_=True,
+    type='AdamW',
+    lr=5e-6,
+    weight_decay=0.0005,
+    eps=1e-8,
+    betas=(0.9, 0.999),
+    paramwise_cfg=paramwise_cfg)
+optimizer_config = dict(grad_clip=dict(max_norm=5.0), _delete_=True)
+
+log_config = dict(interval=20)  # log every 20 intervals
+
+checkpoint_config = dict(
+    interval=1, max_keep_ckpts=3)  # save last three checkpoints
diff --git a/mmcls/datasets/__init__.py b/mmcls/datasets/__init__.py
@@ -3,6 +3,7 @@
 from .builder import (DATASETS, PIPELINES, SAMPLERS, build_dataloader,
                       build_dataset, build_sampler)
 from .cifar import CIFAR10, CIFAR100
+from .cub import CUB
 from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset,
                                KFoldDataset, RepeatDataset)
 from .imagenet import ImageNet
@@ -17,5 +18,5 @@
     'VOC', 'MultiLabelDataset', 'build_dataloader', 'build_dataset',
     'DistributedSampler', 'ConcatDataset', 'RepeatDataset',
     'ClassBalancedDataset', 'DATASETS', 'PIPELINES', 'ImageNet21k', 'SAMPLERS',
-    'build_sampler', 'RepeatAugSampler', 'KFoldDataset'
+    'build_sampler', 'RepeatAugSampler', 'KFoldDataset', 'CUB'
 ]