Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Update VAN #1017

Merged
merged 9 commits into from
Sep 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions configs/_base_/models/van/van_b0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='b0', drop_path_rate=0.1),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=256,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
21 changes: 21 additions & 0 deletions configs/_base_/models/van/van_b1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='b1', drop_path_rate=0.1),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
13 changes: 13 additions & 0 deletions configs/_base_/models/van/van_b2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='b2', drop_path_rate=0.1),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False))
13 changes: 13 additions & 0 deletions configs/_base_/models/van/van_b3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='b3', drop_path_rate=0.2),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False))
13 changes: 13 additions & 0 deletions configs/_base_/models/van/van_b4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='b4', drop_path_rate=0.2),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False))
13 changes: 13 additions & 0 deletions configs/_base_/models/van/van_b5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='b5', drop_path_rate=0.2),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False))
13 changes: 13 additions & 0 deletions configs/_base_/models/van/van_b6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='b6', drop_path_rate=0.3),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False))
14 changes: 1 addition & 13 deletions configs/_base_/models/van/van_base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='base', drop_path_rate=0.1),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False))
_base_ = ['./van-b2.py']
14 changes: 1 addition & 13 deletions configs/_base_/models/van/van_large.py
Original file line number Diff line number Diff line change
@@ -1,13 +1 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='large', drop_path_rate=0.2),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False))
_base_ = ['./van-b3.py']
22 changes: 1 addition & 21 deletions configs/_base_/models/van/van_small.py
Original file line number Diff line number Diff line change
@@ -1,21 +1 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='small', drop_path_rate=0.1),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
_base_ = ['./van-b1.py']
22 changes: 1 addition & 21 deletions configs/_base_/models/van/van_tiny.py
Original file line number Diff line number Diff line change
@@ -1,21 +1 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='VAN', arch='tiny', drop_path_rate=0.1),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=256,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
_base_ = ['./van-b0.py']
25 changes: 19 additions & 6 deletions configs/van/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,28 @@ While originally designed for natural language processing (NLP) tasks, the self-

### ImageNet-1k

| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------------------------------: | :-------------------------------------------------------------------: |
| VAN-T\* | From scratch | 224x224 | 4.11 | 0.88 | 75.41 | 93.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-tiny_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth) |
| VAN-S\* | From scratch | 224x224 | 13.86 | 2.52 | 81.01 | 95.63 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-small_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth) |
| VAN-B\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-base_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) |
| VAN-L\* | From scratch | 224x224 | 44.77 | 8.99 | 83.86 | 96.73 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-large_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth) |
| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------: | :-------------------------------------------------------------------: |
| VAN-B0\* | From scratch | 224x224 | 4.11 | 0.88 | 75.41 | 93.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b0_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth) |
| VAN-B1\* | From scratch | 224x224 | 13.86 | 2.52 | 81.01 | 95.63 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b1_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth) |
| VAN-B2\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b2_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) |
| VAN-B3\* | From scratch | 224x224 | 44.77 | 8.99 | 83.86 | 96.73 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b3_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth) |
| VAN-B4\* | From scratch | 224x224 | 60.28 | 12.22 | 84.13 | 96.86 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b4_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-b4_3rdparty_in1k_20220909-f4665b92.pth) |

\*Models with * are converted from [the official repo](https://github.com/Visual-Attention-Network/VAN-Classification). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.

### Pre-trained Models

The pre-trained models on ImageNet-21k are used to fine-tune on the downstream tasks.

| Model | Pretrain | resolution | Params(M) | Flops(G) | Download |
| :------: | :----------: | :--------: | :-------: | :------: | :---------------------------------------------------------------------------------------------------------: |
| VAN-B4\* | ImageNet-21k | 224x224 | 60.28 | 12.22 | [model](https://download.openmmlab.com/mmclassification/v0/van/van-b4_3rdparty_in21k_20220909-db926b18.pth) |
| VAN-B5\* | ImageNet-21k | 224x224 | 89.97 | 17.21 | [model](https://download.openmmlab.com/mmclassification/v0/van/van-b5_3rdparty_in21k_20220909-18e904e3.pth) |
| VAN-B6\* | ImageNet-21k | 224x224 | 283.9 | 55.28 | [model](https://download.openmmlab.com/mmclassification/v0/van/van-b6_3rdparty_in21k_20220909-96c2cb3a.pth) |

\*Models with * are converted from [the official repo](https://github.com/Visual-Attention-Network/VAN-Classification).

## Citation

```
Expand Down
46 changes: 30 additions & 16 deletions configs/van/metafile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Collections:
- Weight Decay
Architecture:
- Visual Attention Network
- LKA
Paper:
URL: https://arxiv.org/pdf/2202.09741v2.pdf
Title: "Visual Attention Network"
Expand All @@ -16,10 +17,10 @@ Collections:
Version: v0.23.0

Models:
- Name: van-tiny_8xb128_in1k
- Name: van-b0_3rdparty_in1k
Metadata:
FLOPs: 4110000 # 4.11M
Parameters: 880000000 # 0.88G
FLOPs: 880000000 # 0.88G
Parameters: 4110000 # 4.11M
In Collection: Visual-Attention-Network
Results:
- Dataset: ImageNet-1k
Expand All @@ -28,11 +29,11 @@ Models:
Top 5 Accuracy: 93.02
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth
Config: configs/van/van-tiny_8xb128_in1k.py
- Name: van-small_8xb128_in1k
Config: configs/van/van-b0_8xb128_in1k.py
- Name: van-b1_3rdparty_in1k
Metadata:
FLOPs: 13860000 # 13.86M
Parameters: 2520000000 # 2.52G
FLOPs: 2520000000 # 2.52G
Parameters: 13860000 # 13.86M
In Collection: Visual-Attention-Network
Results:
- Dataset: ImageNet-1k
Expand All @@ -41,11 +42,11 @@ Models:
Top 5 Accuracy: 95.63
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth
Config: configs/van/van-small_8xb128_in1k.py
- Name: van-base_8xb128_in1k
Config: configs/van/van-b1_8xb128_in1k.py
- Name: van-b2_3rdparty_in1k
Metadata:
FLOPs: 26580000 # 26.58M
Parameters: 5030000000 # 5.03G
FLOPs: 5030000000 # 5.03G
Parameters: 26580000 # 26.58M
In Collection: Visual-Attention-Network
Results:
- Dataset: ImageNet-1k
Expand All @@ -54,11 +55,11 @@ Models:
Top 5 Accuracy: 96.21
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth
Config: configs/van/van-base_8xb128_in1k.py
- Name: van-large_8xb128_in1k
Config: configs/van/van-b2_8xb128_in1k.py
- Name: van-b3_3rdparty_in1k
Metadata:
FLOPs: 44770000 # 44.77 M
Parameters: 8990000000 # 8.99G
FLOPs: 8990000000 # 8.99G
Parameters: 44770000 # 44.77M
In Collection: Visual-Attention-Network
Results:
- Dataset: ImageNet-1k
Expand All @@ -67,4 +68,17 @@ Models:
Top 5 Accuracy: 96.73
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth
Config: configs/van/van-large_8xb128_in1k.py
Config: configs/van/van-b3_8xb128_in1k.py
- Name: van-b4_3rdparty_in1k
Metadata:
FLOPs: 12220000000 # 12.22G
Parameters: 60280000 # 60.28M
In Collection: Visual-Attention-Network
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 84.13
Top 5 Accuracy: 96.86
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/van/van-b4_3rdparty_in1k_20220909-f4665b92.pth
Config: configs/van/van-b4_8xb128_in1k.py
61 changes: 61 additions & 0 deletions configs/van/van-b0_8xb128_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
_base_ = [
'../_base_/models/van/van_b0.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py'
]

# Note that the mean and variance used here are different from other configs
img_norm_cfg = dict(
mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
size=224,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
dict(
type='RandAugment',
policies={{_base_.rand_increasing_policies}},
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(
pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]],
interpolation='bicubic')),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(
type='RandomErasing',
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=1 / 3,
fill_color=img_norm_cfg['mean'][::-1],
fill_std=img_norm_cfg['std'][::-1]),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['gt_label']),
dict(type='Collect', keys=['img', 'gt_label'])
]

test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='Resize',
size=(248, -1),
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=224),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
]

data = dict(
samples_per_gpu=128,
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
Loading