Skip to content

Commit

Permalink
[Feature] Support CUB dataset. (#703)
Browse files Browse the repository at this point in the history
* support cub dataset

* support cub dataset

* fix train lint error

* add docs

* fix class label

Co-authored-by: Ezra-Yu <1105212286@qq.com>

* del debug code

* skip docformatter problem

* add unit tests

* add CUB baseline configs and chpts

* fix some typos

* fix name style

* update flops

Co-authored-by: Ezra-Yu <1105212286@qq.com>
  • Loading branch information
okotaku and Ezra-Yu authored Mar 16, 2022
1 parent c1534f9 commit aa522f4
Show file tree
Hide file tree
Showing 12 changed files with 441 additions and 18 deletions.
54 changes: 54 additions & 0 deletions configs/_base_/datasets/cub_bs8_384.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# dataset settings
dataset_type = 'CUB'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', size=510),
dict(type='RandomCrop', size=384),
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['gt_label']),
dict(type='Collect', keys=['img', 'gt_label'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', size=510),
dict(type='CenterCrop', crop_size=384),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
]

data_root = 'data/CUB_200_2011/'
data = dict(
samples_per_gpu=8,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'images.txt',
image_class_labels_file=data_root + 'image_class_labels.txt',
train_test_split_file=data_root + 'train_test_split.txt',
data_prefix=data_root + 'images',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'images.txt',
image_class_labels_file=data_root + 'image_class_labels.txt',
train_test_split_file=data_root + 'train_test_split.txt',
data_prefix=data_root + 'images',
test_mode=True,
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'images.txt',
image_class_labels_file=data_root + 'image_class_labels.txt',
train_test_split_file=data_root + 'train_test_split.txt',
data_prefix=data_root + 'images',
test_mode=True,
pipeline=test_pipeline))

evaluation = dict(
interval=1, metric='accuracy',
save_best='auto') # save the checkpoint with highest accuracy
54 changes: 54 additions & 0 deletions configs/_base_/datasets/cub_bs8_448.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# dataset settings
dataset_type = 'CUB'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', size=600),
dict(type='RandomCrop', size=448),
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['gt_label']),
dict(type='Collect', keys=['img', 'gt_label'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', size=600),
dict(type='CenterCrop', crop_size=448),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
]

data_root = 'data/CUB_200_2011/'
data = dict(
samples_per_gpu=8,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'images.txt',
image_class_labels_file=data_root + 'image_class_labels.txt',
train_test_split_file=data_root + 'train_test_split.txt',
data_prefix=data_root + 'images',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'images.txt',
image_class_labels_file=data_root + 'image_class_labels.txt',
train_test_split_file=data_root + 'train_test_split.txt',
data_prefix=data_root + 'images',
test_mode=True,
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'images.txt',
image_class_labels_file=data_root + 'image_class_labels.txt',
train_test_split_file=data_root + 'train_test_split.txt',
data_prefix=data_root + 'images',
test_mode=True,
pipeline=test_pipeline))

evaluation = dict(
interval=1, metric='accuracy',
save_best='auto') # save the checkpoint with highest accuracy
13 changes: 13 additions & 0 deletions configs/_base_/schedules/cub_bs64.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# optimizer
optimizer = dict(
type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005, nesterov=True)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=5,
warmup_ratio=0.01,
warmup_by_epoch=True)
runner = dict(type='EpochBasedRunner', max_epochs=100)
27 changes: 21 additions & 6 deletions configs/resnet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,29 @@ The depth of representations is of central importance for many visual recognitio

## Results and models

The pre-trained models on ImageNet-21k are used to fine-tune, and therefore don't have evaluation results.

| Model | resolution | Params(M) | Flops(G) | Download |
|:---------------:|:-----------:|:---------:|:---------:|:--------:|
| ResNet-50-mill | 224x224 | 86.74 | 15.14 | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_mill_3rdparty_in21k_20220307-bdb3a68b.pth)|

*The "mill" means using the mutil-label pretrain weight from [ImageNet-21K Pretraining for the Masses](https://github.com/Alibaba-MIIL/ImageNet21K).*

### Cifar10

| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
|:---------------------:|:---------:|:--------:|:---------:|:---------:|:---------:|:--------:|
| ResNet-18-b16x8 | 11.17 | 0.56 | 94.82 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.log.json) |
| ResNet-34-b16x8 | 21.28 | 1.16 | 95.34 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.log.json) |
| ResNet-50-b16x8 | 23.52 | 1.31 | 95.55 | 99.91 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.log.json) |
| ResNet-101-b16x8 | 42.51 | 2.52 | 95.58 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet101_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.log.json) |
| ResNet-152-b16x8 | 58.16 | 3.74 | 95.76 | 99.89 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet152_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.log.json) |
| ResNet-18 | 11.17 | 0.56 | 94.82 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.log.json) |
| ResNet-34 | 21.28 | 1.16 | 95.34 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.log.json) |
| ResNet-50 | 23.52 | 1.31 | 95.55 | 99.91 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.log.json) |
| ResNet-101 | 42.51 | 2.52 | 95.58 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet101_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.log.json) |
| ResNet-152 | 58.16 | 3.74 | 95.76 | 99.89 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet152_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.log.json) |

### Cifar100

| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
|:---------------------:|:---------:|:--------:|:---------:|:---------:|:---------:|:--------:|
| ResNet-50-b16x8 | 23.71 | 1.31 | 79.90 | 95.19 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar100.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.log.json) |
| ResNet-50 | 23.71 | 1.31 | 79.90 | 95.19 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar100.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.log.json) |

### ImageNet-1k

Expand Down Expand Up @@ -57,6 +65,13 @@ The depth of representations is of central importance for many visual recognitio

*Models with \* are converted from the [official repo](https://github.com/pytorch/vision). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

### CUB-200-2011

| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Config | Download |
|:---------------------:|:------------:|:---------:|:---------:|:--------:|:---------:|:---------:|:---------:|
| ResNet-50 | [ImageNet-21k-mill](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_mill_3rdparty_in21k_20220307-bdb3a68b.pth) | 448x448 | 23.92 | 16.48 | 88.45 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb8_cub.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.log.json) |


## Citation

```
Expand Down
13 changes: 13 additions & 0 deletions configs/resnet/metafile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -375,3 +375,16 @@ Models:
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c152_8xb32_in1k_20220214-c013291f.pth
Config: configs/resnet/resnetv1c152_8xb32_in1k.py
- Name: resnet50_8xb8_cub
Metadata:
FLOPs: 16480000000
Parameters: 23920000
In Collection: ResNet
Results:
- Dataset: CUB-200-2011
Metrics:
Top 1 Accuracy: 88.45
Task: Image Classification
Pretrain: https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_mill_3rdparty_in21k_20220307-bdb3a68b.pth
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.pth
Config: configs/resnet/resnet50_8xb8_cub.py
19 changes: 19 additions & 0 deletions configs/resnet/resnet50_8xb8_cub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
_base_ = [
'../_base_/models/resnet50.py', '../_base_/datasets/cub_bs8_448.py',
'../_base_/schedules/cub_bs64.py', '../_base_/default_runtime.py'
]

# use pre-train weight converted from https://github.com/Alibaba-MIIL/ImageNet21K # noqa
checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_mill_3rdparty_in21k_20220307-bdb3a68b.pth' # noqa

model = dict(
type='ImageClassifier',
backbone=dict(
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint, prefix='backbone')),
head=dict(num_classes=200, ))

log_config = dict(interval=20) # log every 20 intervals

checkpoint_config = dict(
interval=1, max_keep_ckpts=3) # save last three checkpoints
7 changes: 7 additions & 0 deletions configs/swin_transformer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ The pre-trained models on ImageNet-21k are used to fine-tune, and therefore don'

*Models with \* are converted from the [official repo](https://github.com/microsoft/Swin-Transformer#main-results-on-imagenet-with-pretrained-models). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

### CUB-200-2011

| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Config | Download |
|:----------------:|:------------:|:---------:|:---------:|:--------:|:---------:|:---------:|:---------:|
| Swin-L | [ImageNet-21k](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin-base_3rdparty_in21k-384px.pth) | 384x384 | 195.51 | 100.04 | 91.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-large_8xb8_cub.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.pth) &#124; [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.log.json) |


## Citation

```
Expand Down
13 changes: 13 additions & 0 deletions configs/swin_transformer/metafile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,16 @@ Models:
Weights: https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22kto1k.pth
Code: https://github.com/microsoft/Swin-Transformer/blob/777f6c66604bb5579086c4447efe3620344d95a9/models/swin_transformer.py#L458
Config: configs/swin_transformer/swin-large_16xb64_in1k-384px.py
- Name: swin-large_8xb8_cub_384px
Metadata:
FLOPs: 100040000000
Parameters: 195510000
In Collection: Swin-Transformer
Results:
- Dataset: CUB-200-2011
Metrics:
Top 1 Accuracy: 91.87
Task: Image Classification
Pretrain: https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin-large_3rdparty_in21k-384px.pth
Weights: https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.pth
Config: configs/swin_transformer/swin-large_8xb8_cub_384px.py
37 changes: 37 additions & 0 deletions configs/swin_transformer/swin-large_8xb8_cub_384px.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
_base_ = [
'../_base_/models/swin_transformer/large_384.py',
'../_base_/datasets/cub_bs8_384.py', '../_base_/schedules/cub_bs64.py',
'../_base_/default_runtime.py'
]

# model settings
checkpoint = 'https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin-large_3rdparty_in21k-384px.pth' # noqa
model = dict(
type='ImageClassifier',
backbone=dict(
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint, prefix='backbone')),
head=dict(num_classes=200, ))

paramwise_cfg = dict(
norm_decay_mult=0.0,
bias_decay_mult=0.0,
custom_keys={
'.absolute_pos_embed': dict(decay_mult=0.0),
'.relative_position_bias_table': dict(decay_mult=0.0)
})

optimizer = dict(
_delete_=True,
type='AdamW',
lr=5e-6,
weight_decay=0.0005,
eps=1e-8,
betas=(0.9, 0.999),
paramwise_cfg=paramwise_cfg)
optimizer_config = dict(grad_clip=dict(max_norm=5.0), _delete_=True)

log_config = dict(interval=20) # log every 20 intervals

checkpoint_config = dict(
interval=1, max_keep_ckpts=3) # save last three checkpoints
3 changes: 2 additions & 1 deletion mmcls/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .builder import (DATASETS, PIPELINES, SAMPLERS, build_dataloader,
build_dataset, build_sampler)
from .cifar import CIFAR10, CIFAR100
from .cub import CUB
from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset,
KFoldDataset, RepeatDataset)
from .imagenet import ImageNet
Expand All @@ -17,5 +18,5 @@
'VOC', 'MultiLabelDataset', 'build_dataloader', 'build_dataset',
'DistributedSampler', 'ConcatDataset', 'RepeatDataset',
'ClassBalancedDataset', 'DATASETS', 'PIPELINES', 'ImageNet21k', 'SAMPLERS',
'build_sampler', 'RepeatAugSampler', 'KFoldDataset'
'build_sampler', 'RepeatAugSampler', 'KFoldDataset', 'CUB'
]
Loading

0 comments on commit aa522f4

Please sign in to comment.