From b0714262d825758c88566f5a4df81a0d43d46e0b Mon Sep 17 00:00:00 2001 From: Rist115 Date: Tue, 30 Aug 2022 12:26:31 +0900 Subject: [PATCH 1/9] update van --- .../models/van/{van_tiny.py => van_b0.py} | 2 +- .../models/van/{van_small.py => van_b1.py} | 2 +- .../models/van/{van_base.py => van_b2.py} | 2 +- .../models/van/{van_large.py => van_b3.py} | 2 +- configs/_base_/models/van/van_b4.py | 13 ++++ configs/_base_/models/van/van_b5.py | 13 ++++ configs/_base_/models/van/van_b6.py | 13 ++++ configs/van/README.md | 15 ++-- configs/van/metafile.yml | 16 ++-- ...e_8xb128_in1k.py => van-b0_8xb128_in1k.py} | 2 +- ...y_8xb128_in1k.py => van-b1_8xb128_in1k.py} | 2 +- ...e_8xb128_in1k.py => van-b2_8xb128_in1k.py} | 2 +- ...l_8xb128_in1k.py => van-b3_8xb128_in1k.py} | 2 +- configs/van/van-b4_8xb128_in1k.py | 61 +++++++++++++++ configs/van/van-b5_8xb128_in1k.py | 61 +++++++++++++++ configs/van/van-b6_8xb128_in1k.py | 61 +++++++++++++++ mmcls/models/backbones/van.py | 78 ++++++++++++++++--- 17 files changed, 314 insertions(+), 33 deletions(-) rename configs/_base_/models/van/{van_tiny.py => van_b0.py} (91%) rename configs/_base_/models/van/{van_small.py => van_b1.py} (91%) rename configs/_base_/models/van/{van_base.py => van_b2.py} (85%) rename configs/_base_/models/van/{van_large.py => van_b3.py} (85%) create mode 100644 configs/_base_/models/van/van_b4.py create mode 100644 configs/_base_/models/van/van_b5.py create mode 100644 configs/_base_/models/van/van_b6.py rename configs/van/{van-base_8xb128_in1k.py => van-b0_8xb128_in1k.py} (97%) rename configs/van/{van-tiny_8xb128_in1k.py => van-b1_8xb128_in1k.py} (97%) rename configs/van/{van-large_8xb128_in1k.py => van-b2_8xb128_in1k.py} (97%) rename configs/van/{van-small_8xb128_in1k.py => van-b3_8xb128_in1k.py} (97%) create mode 100644 configs/van/van-b4_8xb128_in1k.py create mode 100644 configs/van/van-b5_8xb128_in1k.py create mode 100644 configs/van/van-b6_8xb128_in1k.py diff --git a/configs/_base_/models/van/van_tiny.py b/configs/_base_/models/van/van_b0.py similarity index 91% rename from configs/_base_/models/van/van_tiny.py rename to configs/_base_/models/van/van_b0.py index 42791ac3beb..5fa977e7b2f 100644 --- a/configs/_base_/models/van/van_tiny.py +++ b/configs/_base_/models/van/van_b0.py @@ -1,7 +1,7 @@ # model settings model = dict( type='ImageClassifier', - backbone=dict(type='VAN', arch='tiny', drop_path_rate=0.1), + backbone=dict(type='VAN', arch='b0', drop_path_rate=0.1), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', diff --git a/configs/_base_/models/van/van_small.py b/configs/_base_/models/van/van_b1.py similarity index 91% rename from configs/_base_/models/van/van_small.py rename to configs/_base_/models/van/van_b1.py index 320e90afdc8..a27a50b11b8 100644 --- a/configs/_base_/models/van/van_small.py +++ b/configs/_base_/models/van/van_b1.py @@ -1,7 +1,7 @@ # model settings model = dict( type='ImageClassifier', - backbone=dict(type='VAN', arch='small', drop_path_rate=0.1), + backbone=dict(type='VAN', arch='b1', drop_path_rate=0.1), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', diff --git a/configs/_base_/models/van/van_base.py b/configs/_base_/models/van/van_b2.py similarity index 85% rename from configs/_base_/models/van/van_base.py rename to configs/_base_/models/van/van_b2.py index 006459255f8..41b0484f44f 100644 --- a/configs/_base_/models/van/van_base.py +++ b/configs/_base_/models/van/van_b2.py @@ -1,7 +1,7 @@ # model settings model = dict( type='ImageClassifier', - backbone=dict(type='VAN', arch='base', drop_path_rate=0.1), + backbone=dict(type='VAN', arch='b2', drop_path_rate=0.1), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', diff --git a/configs/_base_/models/van/van_large.py b/configs/_base_/models/van/van_b3.py similarity index 85% rename from configs/_base_/models/van/van_large.py rename to configs/_base_/models/van/van_b3.py index 4ebafabdaaf..d32b12cc1ee 100644 --- a/configs/_base_/models/van/van_large.py +++ b/configs/_base_/models/van/van_b3.py @@ -1,7 +1,7 @@ # model settings model = dict( type='ImageClassifier', - backbone=dict(type='VAN', arch='large', drop_path_rate=0.2), + backbone=dict(type='VAN', arch='b3', drop_path_rate=0.2), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', diff --git a/configs/_base_/models/van/van_b4.py b/configs/_base_/models/van/van_b4.py new file mode 100644 index 00000000000..d32b12cc1ee --- /dev/null +++ b/configs/_base_/models/van/van_b4.py @@ -0,0 +1,13 @@ +# model settings +model = dict( + type='ImageClassifier', + backbone=dict(type='VAN', arch='b3', drop_path_rate=0.2), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=512, + init_cfg=None, # suppress the default init_cfg of LinearClsHead. + loss=dict( + type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), + cal_acc=False)) diff --git a/configs/_base_/models/van/van_b5.py b/configs/_base_/models/van/van_b5.py new file mode 100644 index 00000000000..28f55d18979 --- /dev/null +++ b/configs/_base_/models/van/van_b5.py @@ -0,0 +1,13 @@ +# model settings +model = dict( + type='ImageClassifier', + backbone=dict(type='VAN', arch='b3', drop_path_rate=0.2), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=768, + init_cfg=None, # suppress the default init_cfg of LinearClsHead. + loss=dict( + type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), + cal_acc=False)) diff --git a/configs/_base_/models/van/van_b6.py b/configs/_base_/models/van/van_b6.py new file mode 100644 index 00000000000..28f55d18979 --- /dev/null +++ b/configs/_base_/models/van/van_b6.py @@ -0,0 +1,13 @@ +# model settings +model = dict( + type='ImageClassifier', + backbone=dict(type='VAN', arch='b3', drop_path_rate=0.2), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=768, + init_cfg=None, # suppress the default init_cfg of LinearClsHead. + loss=dict( + type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), + cal_acc=False)) diff --git a/configs/van/README.md b/configs/van/README.md index e39dfc445a1..ffb90fe9949 100644 --- a/configs/van/README.md +++ b/configs/van/README.md @@ -16,12 +16,15 @@ While originally designed for natural language processing (NLP) tasks, the self- ### ImageNet-1k -| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | -| :-----: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------------------------------: | :-------------------------------------------------------------------: | -| VAN-T\* | From scratch | 224x224 | 4.11 | 0.88 | 75.41 | 93.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-tiny_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth) | -| VAN-S\* | From scratch | 224x224 | 13.86 | 2.52 | 81.01 | 95.63 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-small_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth) | -| VAN-B\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-base_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) | -| VAN-L\* | From scratch | 224x224 | 44.77 | 8.99 | 83.86 | 96.73 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-large_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth) | +| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | +| :------: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------: | :-------------------------------------------------------------------: | +| VAN-B0\* | From scratch | 224x224 | 4.11 | 0.88 | 75.41 | 93.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b0_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth) | +| VAN-B1\* | From scratch | 224x224 | 13.86 | 2.52 | 81.01 | 95.63 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b1_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth) | +| VAN-B2\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b2_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) | +| VAN-B3\* | From scratch | 224x224 | 44.77 | 8.99 | 83.86 | 96.73 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b3_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth) | +| VAN-B4\* | From scratch | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b4_8xb128_in1k.py) | [model](<>) | +| VAN-B5\* | From scratch | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b5_8xb128_in1k.py) | [model](<>) | +| VAN-B6\* | From scratch | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b6_8xb128_in1k.py) | [model](<>) | \*Models with * are converted from [the official repo](https://github.com/Visual-Attention-Network/VAN-Classification). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results. diff --git a/configs/van/metafile.yml b/configs/van/metafile.yml index 13e28c16ec8..5782f302376 100644 --- a/configs/van/metafile.yml +++ b/configs/van/metafile.yml @@ -16,7 +16,7 @@ Collections: Version: v0.23.0 Models: - - Name: van-tiny_8xb128_in1k + - Name: van-b0_8xb128_in1k Metadata: FLOPs: 4110000 # 4.11M Parameters: 880000000 # 0.88G @@ -28,8 +28,8 @@ Models: Top 5 Accuracy: 93.02 Task: Image Classification Weights: https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth - Config: configs/van/van-tiny_8xb128_in1k.py - - Name: van-small_8xb128_in1k + Config: configs/van/van-b0_8xb128_in1k.py + - Name: van-b1_8xb128_in1k Metadata: FLOPs: 13860000 # 13.86M Parameters: 2520000000 # 2.52G @@ -41,8 +41,8 @@ Models: Top 5 Accuracy: 95.63 Task: Image Classification Weights: https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth - Config: configs/van/van-small_8xb128_in1k.py - - Name: van-base_8xb128_in1k + Config: configs/van/van-b1_8xb128_in1k.py + - Name: van-b2_8xb128_in1k Metadata: FLOPs: 26580000 # 26.58M Parameters: 5030000000 # 5.03G @@ -54,8 +54,8 @@ Models: Top 5 Accuracy: 96.21 Task: Image Classification Weights: https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth - Config: configs/van/van-base_8xb128_in1k.py - - Name: van-large_8xb128_in1k + Config: configs/van/van-b2_8xb128_in1k.py + - Name: van-b3_8xb128_in1k Metadata: FLOPs: 44770000 # 44.77 M Parameters: 8990000000 # 8.99G @@ -67,4 +67,4 @@ Models: Top 5 Accuracy: 96.73 Task: Image Classification Weights: https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth - Config: configs/van/van-large_8xb128_in1k.py + Config: configs/van/van-b3_8xb128_in1k.py diff --git a/configs/van/van-base_8xb128_in1k.py b/configs/van/van-b0_8xb128_in1k.py similarity index 97% rename from configs/van/van-base_8xb128_in1k.py rename to configs/van/van-b0_8xb128_in1k.py index 704f111bf51..1acb7af38eb 100644 --- a/configs/van/van-base_8xb128_in1k.py +++ b/configs/van/van-b0_8xb128_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/van/van_base.py', + '../_base_/models/van/van_b0.py', '../_base_/datasets/imagenet_bs64_swin_224.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' diff --git a/configs/van/van-tiny_8xb128_in1k.py b/configs/van/van-b1_8xb128_in1k.py similarity index 97% rename from configs/van/van-tiny_8xb128_in1k.py rename to configs/van/van-b1_8xb128_in1k.py index 1e001c1c329..64483db867d 100644 --- a/configs/van/van-tiny_8xb128_in1k.py +++ b/configs/van/van-b1_8xb128_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/van/van_tiny.py', + '../_base_/models/van/van_b1.py', '../_base_/datasets/imagenet_bs64_swin_224.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' diff --git a/configs/van/van-large_8xb128_in1k.py b/configs/van/van-b2_8xb128_in1k.py similarity index 97% rename from configs/van/van-large_8xb128_in1k.py rename to configs/van/van-b2_8xb128_in1k.py index b55aff165ef..88493dc2e0f 100644 --- a/configs/van/van-large_8xb128_in1k.py +++ b/configs/van/van-b2_8xb128_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/van/van_large.py', + '../_base_/models/van/van_b2.py', '../_base_/datasets/imagenet_bs64_swin_224.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' diff --git a/configs/van/van-small_8xb128_in1k.py b/configs/van/van-b3_8xb128_in1k.py similarity index 97% rename from configs/van/van-small_8xb128_in1k.py rename to configs/van/van-b3_8xb128_in1k.py index 3b83e25ab8c..6b415f656fb 100644 --- a/configs/van/van-small_8xb128_in1k.py +++ b/configs/van/van-b3_8xb128_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/van/van_small.py', + '../_base_/models/van/van_b3.py', '../_base_/datasets/imagenet_bs64_swin_224.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' diff --git a/configs/van/van-b4_8xb128_in1k.py b/configs/van/van-b4_8xb128_in1k.py new file mode 100644 index 00000000000..ba8914f8209 --- /dev/null +++ b/configs/van/van-b4_8xb128_in1k.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/models/van/van_b4.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', + '../_base_/schedules/imagenet_bs1024_adamw_swin.py', + '../_base_/default_runtime.py' +] + +# Note that the mean and variance used here are different from other configs +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='RandomResizedCrop', + size=224, + backend='pillow', + interpolation='bicubic'), + dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), + dict( + type='RandAugment', + policies={{_base_.rand_increasing_policies}}, + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict( + pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], + interpolation='bicubic')), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict( + type='RandomErasing', + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=1 / 3, + fill_color=img_norm_cfg['mean'][::-1], + fill_std=img_norm_cfg['std'][::-1]), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['gt_label']), + dict(type='Collect', keys=['img', 'gt_label']) +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='Resize', + size=(248, -1), + backend='pillow', + interpolation='bicubic'), + dict(type='CenterCrop', crop_size=224), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) +] + +data = dict( + samples_per_gpu=128, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/van/van-b5_8xb128_in1k.py b/configs/van/van-b5_8xb128_in1k.py new file mode 100644 index 00000000000..f07158fb3e7 --- /dev/null +++ b/configs/van/van-b5_8xb128_in1k.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/models/van/van_b5.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', + '../_base_/schedules/imagenet_bs1024_adamw_swin.py', + '../_base_/default_runtime.py' +] + +# Note that the mean and variance used here are different from other configs +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='RandomResizedCrop', + size=224, + backend='pillow', + interpolation='bicubic'), + dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), + dict( + type='RandAugment', + policies={{_base_.rand_increasing_policies}}, + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict( + pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], + interpolation='bicubic')), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict( + type='RandomErasing', + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=1 / 3, + fill_color=img_norm_cfg['mean'][::-1], + fill_std=img_norm_cfg['std'][::-1]), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['gt_label']), + dict(type='Collect', keys=['img', 'gt_label']) +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='Resize', + size=(248, -1), + backend='pillow', + interpolation='bicubic'), + dict(type='CenterCrop', crop_size=224), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) +] + +data = dict( + samples_per_gpu=128, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/van/van-b6_8xb128_in1k.py b/configs/van/van-b6_8xb128_in1k.py new file mode 100644 index 00000000000..e7d65f437aa --- /dev/null +++ b/configs/van/van-b6_8xb128_in1k.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/models/van/van_b6.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', + '../_base_/schedules/imagenet_bs1024_adamw_swin.py', + '../_base_/default_runtime.py' +] + +# Note that the mean and variance used here are different from other configs +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='RandomResizedCrop', + size=224, + backend='pillow', + interpolation='bicubic'), + dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), + dict( + type='RandAugment', + policies={{_base_.rand_increasing_policies}}, + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict( + pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], + interpolation='bicubic')), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict( + type='RandomErasing', + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=1 / 3, + fill_color=img_norm_cfg['mean'][::-1], + fill_std=img_norm_cfg['std'][::-1]), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['gt_label']), + dict(type='Collect', keys=['img', 'gt_label']) +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='Resize', + size=(248, -1), + backend='pillow', + interpolation='bicubic'), + dict(type='CenterCrop', crop_size=224), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) +] + +data = dict( + samples_per_gpu=128, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/mmcls/models/backbones/van.py b/mmcls/models/backbones/van.py index 1be52b68716..7d85dd767b8 100644 --- a/mmcls/models/backbones/van.py +++ b/mmcls/models/backbones/van.py @@ -1,10 +1,14 @@ # Copyright (c) OpenMMLab. All rights reserved. +from collections import OrderedDict + import torch import torch.nn as nn from mmcv.cnn import Conv2d, build_activation_layer, build_norm_layer from mmcv.cnn.bricks import DropPath from mmcv.cnn.bricks.transformer import PatchEmbed -from mmcv.runner import BaseModule, ModuleList +from mmcv.runner import (BaseModule, ModuleList, _load_checkpoint, + load_state_dict) +from mmcv.utils import get_logger from mmcv.utils.parrots_wrapper import _BatchNorm from ..builder import BACKBONES @@ -271,8 +275,10 @@ class VAN(BaseBackbone): - **depths** (List[int]): The number of blocks in each stage. - **ffn_ratios** (List[int]): The number of expansion ratio of feedforward network hidden layer channels. + - **convert_weights (bool) : Whether to convert pretrained weight + keys to use original weights. - Defaults to 'tiny'. + Defaults to 'b0'. patch_sizes (List[int | tuple]): The patch size in patch embeddings. Defaults to [7, 3, 3, 3]. in_channels (int): The num of input channels. Defaults to 3. @@ -304,26 +310,45 @@ class VAN(BaseBackbone): (1, 256, 7, 7) """ arch_zoo = { - **dict.fromkeys(['t', 'tiny'], + **dict.fromkeys(['b0'], {'embed_dims': [32, 64, 160, 256], 'depths': [3, 3, 5, 2], - 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['s', 'small'], + 'ffn_ratios': [8, 8, 4, 4], + 'convert_weights': False}), + **dict.fromkeys(['b1'], {'embed_dims': [64, 128, 320, 512], 'depths': [2, 2, 4, 2], - 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['b', 'base'], + 'ffn_ratios': [8, 8, 4, 4], + 'convert_weights': False}), + **dict.fromkeys(['b2'], {'embed_dims': [64, 128, 320, 512], 'depths': [3, 3, 12, 3], - 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['l', 'large'], + 'ffn_ratios': [8, 8, 4, 4], + 'convert_weights': False}), + **dict.fromkeys(['b3'], {'embed_dims': [64, 128, 320, 512], 'depths': [3, 5, 27, 3], - 'ffn_ratios': [8, 8, 4, 4]}), + 'ffn_ratios': [8, 8, 4, 4], + 'convert_weights': False}), + **dict.fromkeys(['b4'], + {'embed_dims': [64, 128, 320, 512], + 'depths': [3, 6, 40, 3], + 'ffn_ratios': [8, 8, 4, 4], + 'convert_weights': True}), + **dict.fromkeys(['b5'], + {'embed_dims': [96, 192, 480, 768], + 'depths': [3, 3, 24, 3], + 'ffn_ratios': [8, 8, 4, 4], + 'convert_weights': True}), + **dict.fromkeys(['b6'], + {'embed_dims': [96, 192, 480, 768], + 'depths': [6, 6, 90, 6], + 'ffn_ratios': [8, 8, 4, 4], + 'convert_weights': True}), } # yapf: disable def __init__(self, - arch='tiny', + arch='b0', patch_sizes=[7, 3, 3, 3], in_channels=3, drop_rate=0., @@ -350,6 +375,7 @@ def __init__(self, self.embed_dims = self.arch_settings['embed_dims'] self.depths = self.arch_settings['depths'] self.ffn_ratios = self.arch_settings['ffn_ratios'] + self.convert_weights = self.arch_settings['convert_weights'] self.num_stages = len(self.depths) self.out_indices = out_indices self.frozen_stages = frozen_stages @@ -386,6 +412,36 @@ def __init__(self, self.add_module(f'blocks{i + 1}', blocks) self.add_module(f'norm{i + 1}', norm) + def init_weights(self): + checkpoint_in_init_cfg = ( + self.init_cfg is not None and 'checkpoint' in self.init_cfg) + if self.convert_weights and checkpoint_in_init_cfg: + logger = get_logger(name='mmcls') + ckpt = _load_checkpoint( + self.init_cfg['checkpoint'], logger=logger, map_location='cpu') + if 'state_dict' in ckpt: + _state_dict = ckpt['state_dict'] + elif 'model' in ckpt: + _state_dict = ckpt['model'] + else: + _state_dict = ckpt + + state_dict = OrderedDict() + for k, v in _state_dict.items(): + k = k.replace('block', 'blocks') + k = k.replace('spatial_gating_unit.conv0', + 'spatial_gating_unit.DW_conv') + k = k.replace('spatial_gating_unit.conv_spatial', + 'spatial_gating_unit.DW_D_conv') + k = k.replace('mlp.dwconv.dwconv', 'mlp.dwconv') + k = k.replace('proj.', 'projection.') + state_dict[k] = v + + # load state_dict + load_state_dict(self, state_dict, strict=False, logger=logger) + else: + super().init_weights() + def train(self, mode=True): super(VAN, self).train(mode) self._freeze_stages() From bb73227ac962f134301cbb01393c3ef6454ee47e Mon Sep 17 00:00:00 2001 From: Rist115 Date: Tue, 30 Aug 2022 13:03:38 +0900 Subject: [PATCH 2/9] fix init --- configs/_base_/models/van/van_b4.py | 2 +- configs/_base_/models/van/van_b5.py | 2 +- configs/_base_/models/van/van_b6.py | 2 +- mmcls/models/backbones/van.py | 60 ++++------------------------- 4 files changed, 11 insertions(+), 55 deletions(-) diff --git a/configs/_base_/models/van/van_b4.py b/configs/_base_/models/van/van_b4.py index d32b12cc1ee..417835c9f5a 100644 --- a/configs/_base_/models/van/van_b4.py +++ b/configs/_base_/models/van/van_b4.py @@ -1,7 +1,7 @@ # model settings model = dict( type='ImageClassifier', - backbone=dict(type='VAN', arch='b3', drop_path_rate=0.2), + backbone=dict(type='VAN', arch='b4', drop_path_rate=0.2), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', diff --git a/configs/_base_/models/van/van_b5.py b/configs/_base_/models/van/van_b5.py index 28f55d18979..fe8b9236066 100644 --- a/configs/_base_/models/van/van_b5.py +++ b/configs/_base_/models/van/van_b5.py @@ -1,7 +1,7 @@ # model settings model = dict( type='ImageClassifier', - backbone=dict(type='VAN', arch='b3', drop_path_rate=0.2), + backbone=dict(type='VAN', arch='b5', drop_path_rate=0.2), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', diff --git a/configs/_base_/models/van/van_b6.py b/configs/_base_/models/van/van_b6.py index 28f55d18979..89d140096ec 100644 --- a/configs/_base_/models/van/van_b6.py +++ b/configs/_base_/models/van/van_b6.py @@ -1,7 +1,7 @@ # model settings model = dict( type='ImageClassifier', - backbone=dict(type='VAN', arch='b3', drop_path_rate=0.2), + backbone=dict(type='VAN', arch='b6', drop_path_rate=0.2), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', diff --git a/mmcls/models/backbones/van.py b/mmcls/models/backbones/van.py index 7d85dd767b8..da15332eeb6 100644 --- a/mmcls/models/backbones/van.py +++ b/mmcls/models/backbones/van.py @@ -1,14 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. -from collections import OrderedDict - import torch import torch.nn as nn from mmcv.cnn import Conv2d, build_activation_layer, build_norm_layer from mmcv.cnn.bricks import DropPath from mmcv.cnn.bricks.transformer import PatchEmbed -from mmcv.runner import (BaseModule, ModuleList, _load_checkpoint, - load_state_dict) -from mmcv.utils import get_logger +from mmcv.runner import BaseModule, ModuleList from mmcv.utils.parrots_wrapper import _BatchNorm from ..builder import BACKBONES @@ -275,8 +271,6 @@ class VAN(BaseBackbone): - **depths** (List[int]): The number of blocks in each stage. - **ffn_ratios** (List[int]): The number of expansion ratio of feedforward network hidden layer channels. - - **convert_weights (bool) : Whether to convert pretrained weight - keys to use original weights. Defaults to 'b0'. patch_sizes (List[int | tuple]): The patch size in patch embeddings. @@ -313,38 +307,31 @@ class VAN(BaseBackbone): **dict.fromkeys(['b0'], {'embed_dims': [32, 64, 160, 256], 'depths': [3, 3, 5, 2], - 'ffn_ratios': [8, 8, 4, 4], - 'convert_weights': False}), + 'ffn_ratios': [8, 8, 4, 4]}), **dict.fromkeys(['b1'], {'embed_dims': [64, 128, 320, 512], 'depths': [2, 2, 4, 2], - 'ffn_ratios': [8, 8, 4, 4], - 'convert_weights': False}), + 'ffn_ratios': [8, 8, 4, 4]}), **dict.fromkeys(['b2'], {'embed_dims': [64, 128, 320, 512], 'depths': [3, 3, 12, 3], - 'ffn_ratios': [8, 8, 4, 4], - 'convert_weights': False}), + 'ffn_ratios': [8, 8, 4, 4]}), **dict.fromkeys(['b3'], {'embed_dims': [64, 128, 320, 512], 'depths': [3, 5, 27, 3], - 'ffn_ratios': [8, 8, 4, 4], - 'convert_weights': False}), + 'ffn_ratios': [8, 8, 4, 4]}), **dict.fromkeys(['b4'], {'embed_dims': [64, 128, 320, 512], 'depths': [3, 6, 40, 3], - 'ffn_ratios': [8, 8, 4, 4], - 'convert_weights': True}), + 'ffn_ratios': [8, 8, 4, 4]}), **dict.fromkeys(['b5'], {'embed_dims': [96, 192, 480, 768], 'depths': [3, 3, 24, 3], - 'ffn_ratios': [8, 8, 4, 4], - 'convert_weights': True}), + 'ffn_ratios': [8, 8, 4, 4]}), **dict.fromkeys(['b6'], {'embed_dims': [96, 192, 480, 768], 'depths': [6, 6, 90, 6], - 'ffn_ratios': [8, 8, 4, 4], - 'convert_weights': True}), + 'ffn_ratios': [8, 8, 4, 4]}), } # yapf: disable def __init__(self, @@ -375,7 +362,6 @@ def __init__(self, self.embed_dims = self.arch_settings['embed_dims'] self.depths = self.arch_settings['depths'] self.ffn_ratios = self.arch_settings['ffn_ratios'] - self.convert_weights = self.arch_settings['convert_weights'] self.num_stages = len(self.depths) self.out_indices = out_indices self.frozen_stages = frozen_stages @@ -412,36 +398,6 @@ def __init__(self, self.add_module(f'blocks{i + 1}', blocks) self.add_module(f'norm{i + 1}', norm) - def init_weights(self): - checkpoint_in_init_cfg = ( - self.init_cfg is not None and 'checkpoint' in self.init_cfg) - if self.convert_weights and checkpoint_in_init_cfg: - logger = get_logger(name='mmcls') - ckpt = _load_checkpoint( - self.init_cfg['checkpoint'], logger=logger, map_location='cpu') - if 'state_dict' in ckpt: - _state_dict = ckpt['state_dict'] - elif 'model' in ckpt: - _state_dict = ckpt['model'] - else: - _state_dict = ckpt - - state_dict = OrderedDict() - for k, v in _state_dict.items(): - k = k.replace('block', 'blocks') - k = k.replace('spatial_gating_unit.conv0', - 'spatial_gating_unit.DW_conv') - k = k.replace('spatial_gating_unit.conv_spatial', - 'spatial_gating_unit.DW_D_conv') - k = k.replace('mlp.dwconv.dwconv', 'mlp.dwconv') - k = k.replace('proj.', 'projection.') - state_dict[k] = v - - # load state_dict - load_state_dict(self, state_dict, strict=False, logger=logger) - else: - super().init_weights() - def train(self, mode=True): super(VAN, self).train(mode) self._freeze_stages() From d4be6629c58c7a74846d17a99cd0912e87689cd4 Mon Sep 17 00:00:00 2001 From: Rist115 Date: Tue, 30 Aug 2022 13:59:27 +0900 Subject: [PATCH 3/9] b4 result --- configs/van/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/configs/van/README.md b/configs/van/README.md index ffb90fe9949..88c7c46c0f4 100644 --- a/configs/van/README.md +++ b/configs/van/README.md @@ -22,9 +22,10 @@ While originally designed for natural language processing (NLP) tasks, the self- | VAN-B1\* | From scratch | 224x224 | 13.86 | 2.52 | 81.01 | 95.63 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b1_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth) | | VAN-B2\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b2_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) | | VAN-B3\* | From scratch | 224x224 | 44.77 | 8.99 | 83.86 | 96.73 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b3_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth) | -| VAN-B4\* | From scratch | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b4_8xb128_in1k.py) | [model](<>) | -| VAN-B5\* | From scratch | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b5_8xb128_in1k.py) | [model](<>) | -| VAN-B6\* | From scratch | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b6_8xb128_in1k.py) | [model](<>) | +| VAN-B4\* | From scratch | 224x224 | 60.28 | 12.22 | 84.13 | 96.86 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b4_8xb128_in1k.py) | [model](<>) | +| VAN-B4\* | ImageNet-21k | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b4_8xb128_in1k.py) | [model](<>) | +| VAN-B5\* | ImageNet-21k | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b5_8xb128_in1k.py) | [model](<>) | +| VAN-B6\* | ImageNet-21k | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b6_8xb128_in1k.py) | [model](<>) | \*Models with * are converted from [the official repo](https://github.com/Visual-Attention-Network/VAN-Classification). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results. From 5c4c96877142f35ae955af1cbbc0b3ff146cbdf7 Mon Sep 17 00:00:00 2001 From: Rist115 Date: Wed, 7 Sep 2022 15:04:16 +0900 Subject: [PATCH 4/9] update van --- configs/_base_/models/van/van_b6.py | 2 +- configs/van/README.md | 15 ++++- configs/van/metafile.yml | 13 +++++ configs/van/van-b5_8xb128_in1k.py | 61 -------------------- configs/van/van-b6_8xb128_in1k.py | 61 -------------------- mmcls/models/backbones/van.py | 2 +- tests/test_models/test_backbones/test_van.py | 2 +- 7 files changed, 28 insertions(+), 128 deletions(-) delete mode 100644 configs/van/van-b5_8xb128_in1k.py delete mode 100644 configs/van/van-b6_8xb128_in1k.py diff --git a/configs/_base_/models/van/van_b6.py b/configs/_base_/models/van/van_b6.py index 89d140096ec..a0dfb3c7c6d 100644 --- a/configs/_base_/models/van/van_b6.py +++ b/configs/_base_/models/van/van_b6.py @@ -1,7 +1,7 @@ # model settings model = dict( type='ImageClassifier', - backbone=dict(type='VAN', arch='b6', drop_path_rate=0.2), + backbone=dict(type='VAN', arch='b6', drop_path_rate=0.3), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', diff --git a/configs/van/README.md b/configs/van/README.md index 88c7c46c0f4..e1f0284e519 100644 --- a/configs/van/README.md +++ b/configs/van/README.md @@ -23,12 +23,21 @@ While originally designed for natural language processing (NLP) tasks, the self- | VAN-B2\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b2_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) | | VAN-B3\* | From scratch | 224x224 | 44.77 | 8.99 | 83.86 | 96.73 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b3_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth) | | VAN-B4\* | From scratch | 224x224 | 60.28 | 12.22 | 84.13 | 96.86 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b4_8xb128_in1k.py) | [model](<>) | -| VAN-B4\* | ImageNet-21k | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b4_8xb128_in1k.py) | [model](<>) | -| VAN-B5\* | ImageNet-21k | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b5_8xb128_in1k.py) | [model](<>) | -| VAN-B6\* | ImageNet-21k | 224x224 | | | | | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b6_8xb128_in1k.py) | [model](<>) | \*Models with * are converted from [the official repo](https://github.com/Visual-Attention-Network/VAN-Classification). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results. +### Pre-trained Models + +The pre-trained models on ImageNet-21k are used to fine-tune on the downstream tasks. + +| Model | Pretrain | resolution | Params(M) | Flops(G) | Download | +| :------: | :----------: | :--------: | :-------: | :------: | :---------: | +| VAN-B4\* | ImageNet-21k | 224x224 | 60.28 | 12.22 | [model](<>) | +| VAN-B5\* | ImageNet-21k | 224x224 | 89.97 | 17.21 | [model](<>) | +| VAN-B6\* | ImageNet-21k | 224x224 | 283.9 | 55.28 | [model](<>) | + +\*Models with * are converted from [the official repo](https://github.com/Visual-Attention-Network/VAN-Classification). + ## Citation ``` diff --git a/configs/van/metafile.yml b/configs/van/metafile.yml index 5782f302376..f7a582de28c 100644 --- a/configs/van/metafile.yml +++ b/configs/van/metafile.yml @@ -68,3 +68,16 @@ Models: Task: Image Classification Weights: https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth Config: configs/van/van-b3_8xb128_in1k.py + - Name: van-b4_8xb128_in1k + Metadata: + FLOPs: 60280000 # 60.28 M + Parameters: 12220000000 # 12.22G + In Collection: Visual-Attention-Network + Results: + - Dataset: ImageNet-1k + Metrics: + Top 1 Accuracy: 84.13 + Top 5 Accuracy: 96.86 + Task: Image Classification + Weights: + Config: configs/van/van-b4_8xb128_in1k.py diff --git a/configs/van/van-b5_8xb128_in1k.py b/configs/van/van-b5_8xb128_in1k.py deleted file mode 100644 index f07158fb3e7..00000000000 --- a/configs/van/van-b5_8xb128_in1k.py +++ /dev/null @@ -1,61 +0,0 @@ -_base_ = [ - '../_base_/models/van/van_b5.py', - '../_base_/datasets/imagenet_bs64_swin_224.py', - '../_base_/schedules/imagenet_bs1024_adamw_swin.py', - '../_base_/default_runtime.py' -] - -# Note that the mean and variance used here are different from other configs -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='RandomResizedCrop', - size=224, - backend='pillow', - interpolation='bicubic'), - dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), - dict( - type='RandAugment', - policies={{_base_.rand_increasing_policies}}, - num_policies=2, - total_level=10, - magnitude_level=9, - magnitude_std=0.5, - hparams=dict( - pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], - interpolation='bicubic')), - dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), - dict( - type='RandomErasing', - erase_prob=0.25, - mode='rand', - min_area_ratio=0.02, - max_area_ratio=1 / 3, - fill_color=img_norm_cfg['mean'][::-1], - fill_std=img_norm_cfg['std'][::-1]), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['gt_label']), - dict(type='Collect', keys=['img', 'gt_label']) -] - -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='Resize', - size=(248, -1), - backend='pillow', - interpolation='bicubic'), - dict(type='CenterCrop', crop_size=224), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) -] - -data = dict( - samples_per_gpu=128, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/configs/van/van-b6_8xb128_in1k.py b/configs/van/van-b6_8xb128_in1k.py deleted file mode 100644 index e7d65f437aa..00000000000 --- a/configs/van/van-b6_8xb128_in1k.py +++ /dev/null @@ -1,61 +0,0 @@ -_base_ = [ - '../_base_/models/van/van_b6.py', - '../_base_/datasets/imagenet_bs64_swin_224.py', - '../_base_/schedules/imagenet_bs1024_adamw_swin.py', - '../_base_/default_runtime.py' -] - -# Note that the mean and variance used here are different from other configs -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='RandomResizedCrop', - size=224, - backend='pillow', - interpolation='bicubic'), - dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), - dict( - type='RandAugment', - policies={{_base_.rand_increasing_policies}}, - num_policies=2, - total_level=10, - magnitude_level=9, - magnitude_std=0.5, - hparams=dict( - pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], - interpolation='bicubic')), - dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), - dict( - type='RandomErasing', - erase_prob=0.25, - mode='rand', - min_area_ratio=0.02, - max_area_ratio=1 / 3, - fill_color=img_norm_cfg['mean'][::-1], - fill_std=img_norm_cfg['std'][::-1]), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['gt_label']), - dict(type='Collect', keys=['img', 'gt_label']) -] - -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='Resize', - size=(248, -1), - backend='pillow', - interpolation='bicubic'), - dict(type='CenterCrop', crop_size=224), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) -] - -data = dict( - samples_per_gpu=128, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/mmcls/models/backbones/van.py b/mmcls/models/backbones/van.py index da15332eeb6..12a4118797c 100644 --- a/mmcls/models/backbones/van.py +++ b/mmcls/models/backbones/van.py @@ -329,7 +329,7 @@ class VAN(BaseBackbone): 'depths': [3, 3, 24, 3], 'ffn_ratios': [8, 8, 4, 4]}), **dict.fromkeys(['b6'], - {'embed_dims': [96, 192, 480, 768], + {'embed_dims': [96, 192, 384, 768], 'depths': [6, 6, 90, 6], 'ffn_ratios': [8, 8, 4, 4]}), } # yapf: disable diff --git a/tests/test_models/test_backbones/test_van.py b/tests/test_models/test_backbones/test_van.py index 136ce973737..349c101cfad 100644 --- a/tests/test_models/test_backbones/test_van.py +++ b/tests/test_models/test_backbones/test_van.py @@ -23,7 +23,7 @@ def check_norm_state(modules, train_state): class TestVAN(TestCase): def setUp(self): - self.cfg = dict(arch='t', drop_path_rate=0.1) + self.cfg = dict(arch='b0', drop_path_rate=0.1) def test_arch(self): # Test invalid default arch From 555b79dd9cc4eb5e85aba665c42fa75f5d44141e Mon Sep 17 00:00:00 2001 From: Rist115 Date: Thu, 8 Sep 2022 18:40:17 +0900 Subject: [PATCH 5/9] keep old config --- configs/_base_/models/van/van_base.py | 13 +++++ configs/_base_/models/van/van_large.py | 13 +++++ configs/_base_/models/van/van_small.py | 21 +++++++ configs/_base_/models/van/van_tiny.py | 21 +++++++ configs/van/README.md | 4 ++ configs/van/metafile.yml | 52 +++++++++++++++++ configs/van/van-base_8xb128_in1k.py | 61 ++++++++++++++++++++ configs/van/van-large_8xb128_in1k.py | 61 ++++++++++++++++++++ configs/van/van-small_8xb128_in1k.py | 61 ++++++++++++++++++++ configs/van/van-tiny_8xb128_in1k.py | 61 ++++++++++++++++++++ mmcls/models/backbones/van.py | 16 +++++ tests/test_models/test_backbones/test_van.py | 2 +- 12 files changed, 385 insertions(+), 1 deletion(-) create mode 100644 configs/_base_/models/van/van_base.py create mode 100644 configs/_base_/models/van/van_large.py create mode 100644 configs/_base_/models/van/van_small.py create mode 100644 configs/_base_/models/van/van_tiny.py create mode 100644 configs/van/van-base_8xb128_in1k.py create mode 100644 configs/van/van-large_8xb128_in1k.py create mode 100644 configs/van/van-small_8xb128_in1k.py create mode 100644 configs/van/van-tiny_8xb128_in1k.py diff --git a/configs/_base_/models/van/van_base.py b/configs/_base_/models/van/van_base.py new file mode 100644 index 00000000000..006459255f8 --- /dev/null +++ b/configs/_base_/models/van/van_base.py @@ -0,0 +1,13 @@ +# model settings +model = dict( + type='ImageClassifier', + backbone=dict(type='VAN', arch='base', drop_path_rate=0.1), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=512, + init_cfg=None, # suppress the default init_cfg of LinearClsHead. + loss=dict( + type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), + cal_acc=False)) diff --git a/configs/_base_/models/van/van_large.py b/configs/_base_/models/van/van_large.py new file mode 100644 index 00000000000..4ebafabdaaf --- /dev/null +++ b/configs/_base_/models/van/van_large.py @@ -0,0 +1,13 @@ +# model settings +model = dict( + type='ImageClassifier', + backbone=dict(type='VAN', arch='large', drop_path_rate=0.2), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=512, + init_cfg=None, # suppress the default init_cfg of LinearClsHead. + loss=dict( + type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), + cal_acc=False)) diff --git a/configs/_base_/models/van/van_small.py b/configs/_base_/models/van/van_small.py new file mode 100644 index 00000000000..320e90afdc8 --- /dev/null +++ b/configs/_base_/models/van/van_small.py @@ -0,0 +1,21 @@ +# model settings +model = dict( + type='ImageClassifier', + backbone=dict(type='VAN', arch='small', drop_path_rate=0.1), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=512, + init_cfg=None, # suppress the default init_cfg of LinearClsHead. + loss=dict( + type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), + cal_acc=False), + init_cfg=[ + dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), + dict(type='Constant', layer='LayerNorm', val=1., bias=0.) + ], + train_cfg=dict(augments=[ + dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), + dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) + ])) diff --git a/configs/_base_/models/van/van_tiny.py b/configs/_base_/models/van/van_tiny.py new file mode 100644 index 00000000000..42791ac3beb --- /dev/null +++ b/configs/_base_/models/van/van_tiny.py @@ -0,0 +1,21 @@ +# model settings +model = dict( + type='ImageClassifier', + backbone=dict(type='VAN', arch='tiny', drop_path_rate=0.1), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=256, + init_cfg=None, # suppress the default init_cfg of LinearClsHead. + loss=dict( + type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), + cal_acc=False), + init_cfg=[ + dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), + dict(type='Constant', layer='LayerNorm', val=1., bias=0.) + ], + train_cfg=dict(augments=[ + dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), + dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) + ])) diff --git a/configs/van/README.md b/configs/van/README.md index e1f0284e519..27480500180 100644 --- a/configs/van/README.md +++ b/configs/van/README.md @@ -18,6 +18,10 @@ While originally designed for natural language processing (NLP) tasks, the self- | Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | | :------: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------: | :-------------------------------------------------------------------: | +| VAN-T\* | From scratch | 224x224 | 4.11 | 0.88 | 75.41 | 93.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-tiny_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth) | +| VAN-S\* | From scratch | 224x224 | 13.86 | 2.52 | 81.01 | 95.63 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-small_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth) | +| VAN-B\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-base_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) | +| VAN-L\* | From scratch | 224x224 | 44.77 | 8.99 | 83.86 | 96.73 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-large_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth) | | VAN-B0\* | From scratch | 224x224 | 4.11 | 0.88 | 75.41 | 93.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b0_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth) | | VAN-B1\* | From scratch | 224x224 | 13.86 | 2.52 | 81.01 | 95.63 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b1_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth) | | VAN-B2\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b2_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) | diff --git a/configs/van/metafile.yml b/configs/van/metafile.yml index f7a582de28c..69bd9697473 100644 --- a/configs/van/metafile.yml +++ b/configs/van/metafile.yml @@ -16,6 +16,58 @@ Collections: Version: v0.23.0 Models: + - Name: van-tiny_8xb128_in1k + Metadata: + FLOPs: 4110000 # 4.11M + Parameters: 880000000 # 0.88G + In Collection: Visual-Attention-Network + Results: + - Dataset: ImageNet-1k + Metrics: + Top 1 Accuracy: 75.41 + Top 5 Accuracy: 93.02 + Task: Image Classification + Weights: https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth + Config: configs/van/van-tiny_8xb128_in1k.py + - Name: van-small_8xb128_in1k + Metadata: + FLOPs: 13860000 # 13.86M + Parameters: 2520000000 # 2.52G + In Collection: Visual-Attention-Network + Results: + - Dataset: ImageNet-1k + Metrics: + Top 1 Accuracy: 81.01 + Top 5 Accuracy: 95.63 + Task: Image Classification + Weights: https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth + Config: configs/van/van-small_8xb128_in1k.py + - Name: van-base_8xb128_in1k + Metadata: + FLOPs: 26580000 # 26.58M + Parameters: 5030000000 # 5.03G + In Collection: Visual-Attention-Network + Results: + - Dataset: ImageNet-1k + Metrics: + Top 1 Accuracy: 82.80 + Top 5 Accuracy: 96.21 + Task: Image Classification + Weights: https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth + Config: configs/van/van-base_8xb128_in1k.py + - Name: van-large_8xb128_in1k + Metadata: + FLOPs: 44770000 # 44.77 M + Parameters: 8990000000 # 8.99G + In Collection: Visual-Attention-Network + Results: + - Dataset: ImageNet-1k + Metrics: + Top 1 Accuracy: 83.86 + Top 5 Accuracy: 96.73 + Task: Image Classification + Weights: https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth + Config: configs/van/van-large_8xb128_in1k.py - Name: van-b0_8xb128_in1k Metadata: FLOPs: 4110000 # 4.11M diff --git a/configs/van/van-base_8xb128_in1k.py b/configs/van/van-base_8xb128_in1k.py new file mode 100644 index 00000000000..704f111bf51 --- /dev/null +++ b/configs/van/van-base_8xb128_in1k.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/models/van/van_base.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', + '../_base_/schedules/imagenet_bs1024_adamw_swin.py', + '../_base_/default_runtime.py' +] + +# Note that the mean and variance used here are different from other configs +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='RandomResizedCrop', + size=224, + backend='pillow', + interpolation='bicubic'), + dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), + dict( + type='RandAugment', + policies={{_base_.rand_increasing_policies}}, + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict( + pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], + interpolation='bicubic')), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict( + type='RandomErasing', + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=1 / 3, + fill_color=img_norm_cfg['mean'][::-1], + fill_std=img_norm_cfg['std'][::-1]), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['gt_label']), + dict(type='Collect', keys=['img', 'gt_label']) +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='Resize', + size=(248, -1), + backend='pillow', + interpolation='bicubic'), + dict(type='CenterCrop', crop_size=224), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) +] + +data = dict( + samples_per_gpu=128, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/van/van-large_8xb128_in1k.py b/configs/van/van-large_8xb128_in1k.py new file mode 100644 index 00000000000..b55aff165ef --- /dev/null +++ b/configs/van/van-large_8xb128_in1k.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/models/van/van_large.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', + '../_base_/schedules/imagenet_bs1024_adamw_swin.py', + '../_base_/default_runtime.py' +] + +# Note that the mean and variance used here are different from other configs +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='RandomResizedCrop', + size=224, + backend='pillow', + interpolation='bicubic'), + dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), + dict( + type='RandAugment', + policies={{_base_.rand_increasing_policies}}, + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict( + pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], + interpolation='bicubic')), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict( + type='RandomErasing', + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=1 / 3, + fill_color=img_norm_cfg['mean'][::-1], + fill_std=img_norm_cfg['std'][::-1]), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['gt_label']), + dict(type='Collect', keys=['img', 'gt_label']) +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='Resize', + size=(248, -1), + backend='pillow', + interpolation='bicubic'), + dict(type='CenterCrop', crop_size=224), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) +] + +data = dict( + samples_per_gpu=128, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/van/van-small_8xb128_in1k.py b/configs/van/van-small_8xb128_in1k.py new file mode 100644 index 00000000000..3b83e25ab8c --- /dev/null +++ b/configs/van/van-small_8xb128_in1k.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/models/van/van_small.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', + '../_base_/schedules/imagenet_bs1024_adamw_swin.py', + '../_base_/default_runtime.py' +] + +# Note that the mean and variance used here are different from other configs +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='RandomResizedCrop', + size=224, + backend='pillow', + interpolation='bicubic'), + dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), + dict( + type='RandAugment', + policies={{_base_.rand_increasing_policies}}, + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict( + pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], + interpolation='bicubic')), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict( + type='RandomErasing', + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=1 / 3, + fill_color=img_norm_cfg['mean'][::-1], + fill_std=img_norm_cfg['std'][::-1]), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['gt_label']), + dict(type='Collect', keys=['img', 'gt_label']) +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='Resize', + size=(248, -1), + backend='pillow', + interpolation='bicubic'), + dict(type='CenterCrop', crop_size=224), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) +] + +data = dict( + samples_per_gpu=128, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/van/van-tiny_8xb128_in1k.py b/configs/van/van-tiny_8xb128_in1k.py new file mode 100644 index 00000000000..1e001c1c329 --- /dev/null +++ b/configs/van/van-tiny_8xb128_in1k.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/models/van/van_tiny.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', + '../_base_/schedules/imagenet_bs1024_adamw_swin.py', + '../_base_/default_runtime.py' +] + +# Note that the mean and variance used here are different from other configs +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='RandomResizedCrop', + size=224, + backend='pillow', + interpolation='bicubic'), + dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), + dict( + type='RandAugment', + policies={{_base_.rand_increasing_policies}}, + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict( + pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], + interpolation='bicubic')), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict( + type='RandomErasing', + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=1 / 3, + fill_color=img_norm_cfg['mean'][::-1], + fill_std=img_norm_cfg['std'][::-1]), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['gt_label']), + dict(type='Collect', keys=['img', 'gt_label']) +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='Resize', + size=(248, -1), + backend='pillow', + interpolation='bicubic'), + dict(type='CenterCrop', crop_size=224), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) +] + +data = dict( + samples_per_gpu=128, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/mmcls/models/backbones/van.py b/mmcls/models/backbones/van.py index 12a4118797c..9f43edd0cc8 100644 --- a/mmcls/models/backbones/van.py +++ b/mmcls/models/backbones/van.py @@ -304,6 +304,22 @@ class VAN(BaseBackbone): (1, 256, 7, 7) """ arch_zoo = { + **dict.fromkeys(['t', 'tiny'], + {'embed_dims': [32, 64, 160, 256], + 'depths': [3, 3, 5, 2], + 'ffn_ratios': [8, 8, 4, 4]}), + **dict.fromkeys(['s', 'small'], + {'embed_dims': [64, 128, 320, 512], + 'depths': [2, 2, 4, 2], + 'ffn_ratios': [8, 8, 4, 4]}), + **dict.fromkeys(['b', 'base'], + {'embed_dims': [64, 128, 320, 512], + 'depths': [3, 3, 12, 3], + 'ffn_ratios': [8, 8, 4, 4]}), + **dict.fromkeys(['l', 'large'], + {'embed_dims': [64, 128, 320, 512], + 'depths': [3, 5, 27, 3], + 'ffn_ratios': [8, 8, 4, 4]}), **dict.fromkeys(['b0'], {'embed_dims': [32, 64, 160, 256], 'depths': [3, 3, 5, 2], diff --git a/tests/test_models/test_backbones/test_van.py b/tests/test_models/test_backbones/test_van.py index 349c101cfad..136ce973737 100644 --- a/tests/test_models/test_backbones/test_van.py +++ b/tests/test_models/test_backbones/test_van.py @@ -23,7 +23,7 @@ def check_norm_state(modules, train_state): class TestVAN(TestCase): def setUp(self): - self.cfg = dict(arch='b0', drop_path_rate=0.1) + self.cfg = dict(arch='t', drop_path_rate=0.1) def test_arch(self): # Test invalid default arch From ff4e542548b7c8af06b9cf3295d2b91e05acf5c7 Mon Sep 17 00:00:00 2001 From: Rist115 Date: Thu, 8 Sep 2022 18:41:31 +0900 Subject: [PATCH 6/9] keep old config --- mmcls/models/backbones/van.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mmcls/models/backbones/van.py b/mmcls/models/backbones/van.py index 9f43edd0cc8..cb7f98cb731 100644 --- a/mmcls/models/backbones/van.py +++ b/mmcls/models/backbones/van.py @@ -272,7 +272,7 @@ class VAN(BaseBackbone): - **ffn_ratios** (List[int]): The number of expansion ratio of feedforward network hidden layer channels. - Defaults to 'b0'. + Defaults to 'tiny'. patch_sizes (List[int | tuple]): The patch size in patch embeddings. Defaults to [7, 3, 3, 3]. in_channels (int): The num of input channels. Defaults to 3. @@ -351,7 +351,7 @@ class VAN(BaseBackbone): } # yapf: disable def __init__(self, - arch='b0', + arch='tiny', patch_sizes=[7, 3, 3, 3], in_channels=3, drop_rate=0., From 7b5e6d484f1df443c4999d91a973659c588d82f3 Mon Sep 17 00:00:00 2001 From: Rist115 Date: Thu, 8 Sep 2022 20:55:46 +0900 Subject: [PATCH 7/9] fix metafile --- configs/van/metafile.yml | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/configs/van/metafile.yml b/configs/van/metafile.yml index 69bd9697473..99f16bd69c3 100644 --- a/configs/van/metafile.yml +++ b/configs/van/metafile.yml @@ -18,8 +18,8 @@ Collections: Models: - Name: van-tiny_8xb128_in1k Metadata: - FLOPs: 4110000 # 4.11M - Parameters: 880000000 # 0.88G + FLOPs: 880000000 # 0.88G + Parameters: 4110000 # 4.11M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -31,8 +31,8 @@ Models: Config: configs/van/van-tiny_8xb128_in1k.py - Name: van-small_8xb128_in1k Metadata: - FLOPs: 13860000 # 13.86M - Parameters: 2520000000 # 2.52G + FLOPs: 2520000000 # 2.52G + Parameters: 13860000 # 13.86M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -44,8 +44,8 @@ Models: Config: configs/van/van-small_8xb128_in1k.py - Name: van-base_8xb128_in1k Metadata: - FLOPs: 26580000 # 26.58M - Parameters: 5030000000 # 5.03G + FLOPs: 5030000000 # 5.03G + Parameters: 26580000 # 26.58M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -57,8 +57,8 @@ Models: Config: configs/van/van-base_8xb128_in1k.py - Name: van-large_8xb128_in1k Metadata: - FLOPs: 44770000 # 44.77 M - Parameters: 8990000000 # 8.99G + FLOPs: 8990000000 # 8.99G + Parameters: 44770000 # 44.77 M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -70,8 +70,8 @@ Models: Config: configs/van/van-large_8xb128_in1k.py - Name: van-b0_8xb128_in1k Metadata: - FLOPs: 4110000 # 4.11M - Parameters: 880000000 # 0.88G + FLOPs: 880000000 # 0.88G + Parameters: 4110000 # 4.11M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -83,8 +83,8 @@ Models: Config: configs/van/van-b0_8xb128_in1k.py - Name: van-b1_8xb128_in1k Metadata: - FLOPs: 13860000 # 13.86M - Parameters: 2520000000 # 2.52G + FLOPs: 2520000000 # 2.52G + Parameters: 13860000 # 13.86M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -96,8 +96,8 @@ Models: Config: configs/van/van-b1_8xb128_in1k.py - Name: van-b2_8xb128_in1k Metadata: - FLOPs: 26580000 # 26.58M - Parameters: 5030000000 # 5.03G + FLOPs: 5030000000 # 5.03G + Parameters: 26580000 # 26.58M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -109,8 +109,8 @@ Models: Config: configs/van/van-b2_8xb128_in1k.py - Name: van-b3_8xb128_in1k Metadata: - FLOPs: 44770000 # 44.77 M - Parameters: 8990000000 # 8.99G + FLOPs: 8990000000 # 8.99G + Parameters: 44770000 # 44.77 M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -122,8 +122,8 @@ Models: Config: configs/van/van-b3_8xb128_in1k.py - Name: van-b4_8xb128_in1k Metadata: - FLOPs: 60280000 # 60.28 M - Parameters: 12220000000 # 12.22G + FLOPs: 12220000000 # 12.22G + Parameters: 60280000 # 60.28 M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k From 398392358734e6e2b3dbefe3bf125b73fdff9dca Mon Sep 17 00:00:00 2001 From: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com> Date: Fri, 9 Sep 2022 17:51:24 +0800 Subject: [PATCH 8/9] update VAN configs --- configs/_base_/models/van/van_base.py | 14 +----- configs/_base_/models/van/van_large.py | 14 +----- configs/_base_/models/van/van_small.py | 22 +------- configs/_base_/models/van/van_tiny.py | 22 +------- configs/van/README.md | 16 +++--- configs/van/metafile.yml | 69 ++++---------------------- configs/van/van-base_8xb128_in1k.py | 65 ++---------------------- configs/van/van-large_8xb128_in1k.py | 65 ++---------------------- configs/van/van-small_8xb128_in1k.py | 65 ++---------------------- configs/van/van-tiny_8xb128_in1k.py | 65 ++---------------------- mmcls/models/backbones/van.py | 28 +++-------- 11 files changed, 45 insertions(+), 400 deletions(-) diff --git a/configs/_base_/models/van/van_base.py b/configs/_base_/models/van/van_base.py index 006459255f8..5c2bcf0edd7 100644 --- a/configs/_base_/models/van/van_base.py +++ b/configs/_base_/models/van/van_base.py @@ -1,13 +1 @@ -# model settings -model = dict( - type='ImageClassifier', - backbone=dict(type='VAN', arch='base', drop_path_rate=0.1), - neck=dict(type='GlobalAveragePooling'), - head=dict( - type='LinearClsHead', - num_classes=1000, - in_channels=512, - init_cfg=None, # suppress the default init_cfg of LinearClsHead. - loss=dict( - type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), - cal_acc=False)) +_base_ = ['./van-b2.py'] diff --git a/configs/_base_/models/van/van_large.py b/configs/_base_/models/van/van_large.py index 4ebafabdaaf..bc9536c6410 100644 --- a/configs/_base_/models/van/van_large.py +++ b/configs/_base_/models/van/van_large.py @@ -1,13 +1 @@ -# model settings -model = dict( - type='ImageClassifier', - backbone=dict(type='VAN', arch='large', drop_path_rate=0.2), - neck=dict(type='GlobalAveragePooling'), - head=dict( - type='LinearClsHead', - num_classes=1000, - in_channels=512, - init_cfg=None, # suppress the default init_cfg of LinearClsHead. - loss=dict( - type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), - cal_acc=False)) +_base_ = ['./van-b3.py'] diff --git a/configs/_base_/models/van/van_small.py b/configs/_base_/models/van/van_small.py index 320e90afdc8..3973c22a11f 100644 --- a/configs/_base_/models/van/van_small.py +++ b/configs/_base_/models/van/van_small.py @@ -1,21 +1 @@ -# model settings -model = dict( - type='ImageClassifier', - backbone=dict(type='VAN', arch='small', drop_path_rate=0.1), - neck=dict(type='GlobalAveragePooling'), - head=dict( - type='LinearClsHead', - num_classes=1000, - in_channels=512, - init_cfg=None, # suppress the default init_cfg of LinearClsHead. - loss=dict( - type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), - cal_acc=False), - init_cfg=[ - dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), - dict(type='Constant', layer='LayerNorm', val=1., bias=0.) - ], - train_cfg=dict(augments=[ - dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), - dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) - ])) +_base_ = ['./van-b1.py'] diff --git a/configs/_base_/models/van/van_tiny.py b/configs/_base_/models/van/van_tiny.py index 42791ac3beb..ace9ebbb172 100644 --- a/configs/_base_/models/van/van_tiny.py +++ b/configs/_base_/models/van/van_tiny.py @@ -1,21 +1 @@ -# model settings -model = dict( - type='ImageClassifier', - backbone=dict(type='VAN', arch='tiny', drop_path_rate=0.1), - neck=dict(type='GlobalAveragePooling'), - head=dict( - type='LinearClsHead', - num_classes=1000, - in_channels=256, - init_cfg=None, # suppress the default init_cfg of LinearClsHead. - loss=dict( - type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), - cal_acc=False), - init_cfg=[ - dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), - dict(type='Constant', layer='LayerNorm', val=1., bias=0.) - ], - train_cfg=dict(augments=[ - dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), - dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) - ])) +_base_ = ['./van-b0.py'] diff --git a/configs/van/README.md b/configs/van/README.md index 27480500180..a84cf329932 100644 --- a/configs/van/README.md +++ b/configs/van/README.md @@ -18,15 +18,11 @@ While originally designed for natural language processing (NLP) tasks, the self- | Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | | :------: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------: | :-------------------------------------------------------------------: | -| VAN-T\* | From scratch | 224x224 | 4.11 | 0.88 | 75.41 | 93.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-tiny_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth) | -| VAN-S\* | From scratch | 224x224 | 13.86 | 2.52 | 81.01 | 95.63 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-small_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth) | -| VAN-B\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-base_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) | -| VAN-L\* | From scratch | 224x224 | 44.77 | 8.99 | 83.86 | 96.73 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-large_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth) | | VAN-B0\* | From scratch | 224x224 | 4.11 | 0.88 | 75.41 | 93.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b0_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth) | | VAN-B1\* | From scratch | 224x224 | 13.86 | 2.52 | 81.01 | 95.63 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b1_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth) | | VAN-B2\* | From scratch | 224x224 | 26.58 | 5.03 | 82.80 | 96.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b2_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth) | | VAN-B3\* | From scratch | 224x224 | 44.77 | 8.99 | 83.86 | 96.73 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b3_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth) | -| VAN-B4\* | From scratch | 224x224 | 60.28 | 12.22 | 84.13 | 96.86 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b4_8xb128_in1k.py) | [model](<>) | +| VAN-B4\* | From scratch | 224x224 | 60.28 | 12.22 | 84.13 | 96.86 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/van/van-b4_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/van/van-b4_3rdparty_in1k_20220909-f4665b92.pth) | \*Models with * are converted from [the official repo](https://github.com/Visual-Attention-Network/VAN-Classification). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results. @@ -34,11 +30,11 @@ While originally designed for natural language processing (NLP) tasks, the self- The pre-trained models on ImageNet-21k are used to fine-tune on the downstream tasks. -| Model | Pretrain | resolution | Params(M) | Flops(G) | Download | -| :------: | :----------: | :--------: | :-------: | :------: | :---------: | -| VAN-B4\* | ImageNet-21k | 224x224 | 60.28 | 12.22 | [model](<>) | -| VAN-B5\* | ImageNet-21k | 224x224 | 89.97 | 17.21 | [model](<>) | -| VAN-B6\* | ImageNet-21k | 224x224 | 283.9 | 55.28 | [model](<>) | +| Model | Pretrain | resolution | Params(M) | Flops(G) | Download | +| :------: | :----------: | :--------: | :-------: | :------: | :---------------------------------------------------------------------------------------------------------: | +| VAN-B4\* | ImageNet-21k | 224x224 | 60.28 | 12.22 | [model](https://download.openmmlab.com/mmclassification/v0/van/van-b4_3rdparty_in21k_20220909-db926b18.pth) | +| VAN-B5\* | ImageNet-21k | 224x224 | 89.97 | 17.21 | [model](https://download.openmmlab.com/mmclassification/v0/van/van-b5_3rdparty_in21k_20220909-18e904e3.pth) | +| VAN-B6\* | ImageNet-21k | 224x224 | 283.9 | 55.28 | [model](https://download.openmmlab.com/mmclassification/v0/van/van-b6_3rdparty_in21k_20220909-96c2cb3a.pth) | \*Models with * are converted from [the official repo](https://github.com/Visual-Attention-Network/VAN-Classification). diff --git a/configs/van/metafile.yml b/configs/van/metafile.yml index 99f16bd69c3..c32df84abfd 100644 --- a/configs/van/metafile.yml +++ b/configs/van/metafile.yml @@ -7,6 +7,7 @@ Collections: - Weight Decay Architecture: - Visual Attention Network + - LKA Paper: URL: https://arxiv.org/pdf/2202.09741v2.pdf Title: "Visual Attention Network" @@ -16,59 +17,7 @@ Collections: Version: v0.23.0 Models: - - Name: van-tiny_8xb128_in1k - Metadata: - FLOPs: 880000000 # 0.88G - Parameters: 4110000 # 4.11M - In Collection: Visual-Attention-Network - Results: - - Dataset: ImageNet-1k - Metrics: - Top 1 Accuracy: 75.41 - Top 5 Accuracy: 93.02 - Task: Image Classification - Weights: https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth - Config: configs/van/van-tiny_8xb128_in1k.py - - Name: van-small_8xb128_in1k - Metadata: - FLOPs: 2520000000 # 2.52G - Parameters: 13860000 # 13.86M - In Collection: Visual-Attention-Network - Results: - - Dataset: ImageNet-1k - Metrics: - Top 1 Accuracy: 81.01 - Top 5 Accuracy: 95.63 - Task: Image Classification - Weights: https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth - Config: configs/van/van-small_8xb128_in1k.py - - Name: van-base_8xb128_in1k - Metadata: - FLOPs: 5030000000 # 5.03G - Parameters: 26580000 # 26.58M - In Collection: Visual-Attention-Network - Results: - - Dataset: ImageNet-1k - Metrics: - Top 1 Accuracy: 82.80 - Top 5 Accuracy: 96.21 - Task: Image Classification - Weights: https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth - Config: configs/van/van-base_8xb128_in1k.py - - Name: van-large_8xb128_in1k - Metadata: - FLOPs: 8990000000 # 8.99G - Parameters: 44770000 # 44.77 M - In Collection: Visual-Attention-Network - Results: - - Dataset: ImageNet-1k - Metrics: - Top 1 Accuracy: 83.86 - Top 5 Accuracy: 96.73 - Task: Image Classification - Weights: https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth - Config: configs/van/van-large_8xb128_in1k.py - - Name: van-b0_8xb128_in1k + - Name: van-b0_3rdparty_in1k Metadata: FLOPs: 880000000 # 0.88G Parameters: 4110000 # 4.11M @@ -81,7 +30,7 @@ Models: Task: Image Classification Weights: https://download.openmmlab.com/mmclassification/v0/van/van-tiny_8xb128_in1k_20220501-385941af.pth Config: configs/van/van-b0_8xb128_in1k.py - - Name: van-b1_8xb128_in1k + - Name: van-b1_3rdparty_in1k Metadata: FLOPs: 2520000000 # 2.52G Parameters: 13860000 # 13.86M @@ -94,7 +43,7 @@ Models: Task: Image Classification Weights: https://download.openmmlab.com/mmclassification/v0/van/van-small_8xb128_in1k_20220501-17bc91aa.pth Config: configs/van/van-b1_8xb128_in1k.py - - Name: van-b2_8xb128_in1k + - Name: van-b2_3rdparty_in1k Metadata: FLOPs: 5030000000 # 5.03G Parameters: 26580000 # 26.58M @@ -107,10 +56,10 @@ Models: Task: Image Classification Weights: https://download.openmmlab.com/mmclassification/v0/van/van-base_8xb128_in1k_20220501-6a4cc31b.pth Config: configs/van/van-b2_8xb128_in1k.py - - Name: van-b3_8xb128_in1k + - Name: van-b3_3rdparty_in1k Metadata: FLOPs: 8990000000 # 8.99G - Parameters: 44770000 # 44.77 M + Parameters: 44770000 # 44.77M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -120,10 +69,10 @@ Models: Task: Image Classification Weights: https://download.openmmlab.com/mmclassification/v0/van/van-large_8xb128_in1k_20220501-f212ba21.pth Config: configs/van/van-b3_8xb128_in1k.py - - Name: van-b4_8xb128_in1k + - Name: van-b4_3rdparty_in1k Metadata: FLOPs: 12220000000 # 12.22G - Parameters: 60280000 # 60.28 M + Parameters: 60280000 # 60.28M In Collection: Visual-Attention-Network Results: - Dataset: ImageNet-1k @@ -131,5 +80,5 @@ Models: Top 1 Accuracy: 84.13 Top 5 Accuracy: 96.86 Task: Image Classification - Weights: + Weights: https://download.openmmlab.com/mmclassification/v0/van/van-b4_3rdparty_in1k_20220909-f4665b92.pth Config: configs/van/van-b4_8xb128_in1k.py diff --git a/configs/van/van-base_8xb128_in1k.py b/configs/van/van-base_8xb128_in1k.py index 704f111bf51..e331980db2d 100644 --- a/configs/van/van-base_8xb128_in1k.py +++ b/configs/van/van-base_8xb128_in1k.py @@ -1,61 +1,6 @@ -_base_ = [ - '../_base_/models/van/van_base.py', - '../_base_/datasets/imagenet_bs64_swin_224.py', - '../_base_/schedules/imagenet_bs1024_adamw_swin.py', - '../_base_/default_runtime.py' -] +_base_ = ['./van-b2_8xb128_in1k.py'] -# Note that the mean and variance used here are different from other configs -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='RandomResizedCrop', - size=224, - backend='pillow', - interpolation='bicubic'), - dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), - dict( - type='RandAugment', - policies={{_base_.rand_increasing_policies}}, - num_policies=2, - total_level=10, - magnitude_level=9, - magnitude_std=0.5, - hparams=dict( - pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], - interpolation='bicubic')), - dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), - dict( - type='RandomErasing', - erase_prob=0.25, - mode='rand', - min_area_ratio=0.02, - max_area_ratio=1 / 3, - fill_color=img_norm_cfg['mean'][::-1], - fill_std=img_norm_cfg['std'][::-1]), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['gt_label']), - dict(type='Collect', keys=['img', 'gt_label']) -] - -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='Resize', - size=(248, -1), - backend='pillow', - interpolation='bicubic'), - dict(type='CenterCrop', crop_size=224), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) -] - -data = dict( - samples_per_gpu=128, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +_deprecation_ = dict( + expected='van-b2_8xb128_in1k.p', + reference='https://github.com/open-mmlab/mmclassification/pull/1017', +) diff --git a/configs/van/van-large_8xb128_in1k.py b/configs/van/van-large_8xb128_in1k.py index b55aff165ef..84f8c7eddd0 100644 --- a/configs/van/van-large_8xb128_in1k.py +++ b/configs/van/van-large_8xb128_in1k.py @@ -1,61 +1,6 @@ -_base_ = [ - '../_base_/models/van/van_large.py', - '../_base_/datasets/imagenet_bs64_swin_224.py', - '../_base_/schedules/imagenet_bs1024_adamw_swin.py', - '../_base_/default_runtime.py' -] +_base_ = ['./van-b3_8xb128_in1k.py'] -# Note that the mean and variance used here are different from other configs -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='RandomResizedCrop', - size=224, - backend='pillow', - interpolation='bicubic'), - dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), - dict( - type='RandAugment', - policies={{_base_.rand_increasing_policies}}, - num_policies=2, - total_level=10, - magnitude_level=9, - magnitude_std=0.5, - hparams=dict( - pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], - interpolation='bicubic')), - dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), - dict( - type='RandomErasing', - erase_prob=0.25, - mode='rand', - min_area_ratio=0.02, - max_area_ratio=1 / 3, - fill_color=img_norm_cfg['mean'][::-1], - fill_std=img_norm_cfg['std'][::-1]), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['gt_label']), - dict(type='Collect', keys=['img', 'gt_label']) -] - -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='Resize', - size=(248, -1), - backend='pillow', - interpolation='bicubic'), - dict(type='CenterCrop', crop_size=224), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) -] - -data = dict( - samples_per_gpu=128, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +_deprecation_ = dict( + expected='van-b3_8xb128_in1k.p', + reference='https://github.com/open-mmlab/mmclassification/pull/1017', +) diff --git a/configs/van/van-small_8xb128_in1k.py b/configs/van/van-small_8xb128_in1k.py index 3b83e25ab8c..75d3220b47c 100644 --- a/configs/van/van-small_8xb128_in1k.py +++ b/configs/van/van-small_8xb128_in1k.py @@ -1,61 +1,6 @@ -_base_ = [ - '../_base_/models/van/van_small.py', - '../_base_/datasets/imagenet_bs64_swin_224.py', - '../_base_/schedules/imagenet_bs1024_adamw_swin.py', - '../_base_/default_runtime.py' -] +_base_ = ['./van-b1_8xb128_in1k.py'] -# Note that the mean and variance used here are different from other configs -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='RandomResizedCrop', - size=224, - backend='pillow', - interpolation='bicubic'), - dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), - dict( - type='RandAugment', - policies={{_base_.rand_increasing_policies}}, - num_policies=2, - total_level=10, - magnitude_level=9, - magnitude_std=0.5, - hparams=dict( - pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], - interpolation='bicubic')), - dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), - dict( - type='RandomErasing', - erase_prob=0.25, - mode='rand', - min_area_ratio=0.02, - max_area_ratio=1 / 3, - fill_color=img_norm_cfg['mean'][::-1], - fill_std=img_norm_cfg['std'][::-1]), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['gt_label']), - dict(type='Collect', keys=['img', 'gt_label']) -] - -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='Resize', - size=(248, -1), - backend='pillow', - interpolation='bicubic'), - dict(type='CenterCrop', crop_size=224), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) -] - -data = dict( - samples_per_gpu=128, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +_deprecation_ = dict( + expected='van-b1_8xb128_in1k.py', + reference='https://github.com/open-mmlab/mmclassification/pull/1017', +) diff --git a/configs/van/van-tiny_8xb128_in1k.py b/configs/van/van-tiny_8xb128_in1k.py index 1e001c1c329..9f83e77c6ba 100644 --- a/configs/van/van-tiny_8xb128_in1k.py +++ b/configs/van/van-tiny_8xb128_in1k.py @@ -1,61 +1,6 @@ -_base_ = [ - '../_base_/models/van/van_tiny.py', - '../_base_/datasets/imagenet_bs64_swin_224.py', - '../_base_/schedules/imagenet_bs1024_adamw_swin.py', - '../_base_/default_runtime.py' -] +_base_ = ['./van-b0_8xb128_in1k.py'] -# Note that the mean and variance used here are different from other configs -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='RandomResizedCrop', - size=224, - backend='pillow', - interpolation='bicubic'), - dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), - dict( - type='RandAugment', - policies={{_base_.rand_increasing_policies}}, - num_policies=2, - total_level=10, - magnitude_level=9, - magnitude_std=0.5, - hparams=dict( - pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], - interpolation='bicubic')), - dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), - dict( - type='RandomErasing', - erase_prob=0.25, - mode='rand', - min_area_ratio=0.02, - max_area_ratio=1 / 3, - fill_color=img_norm_cfg['mean'][::-1], - fill_std=img_norm_cfg['std'][::-1]), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['gt_label']), - dict(type='Collect', keys=['img', 'gt_label']) -] - -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='Resize', - size=(248, -1), - backend='pillow', - interpolation='bicubic'), - dict(type='CenterCrop', crop_size=224), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) -] - -data = dict( - samples_per_gpu=128, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +_deprecation_ = dict( + expected='van-b0_8xb128_in1k.py', + reference='https://github.com/open-mmlab/mmclassification/pull/1017', +) diff --git a/mmcls/models/backbones/van.py b/mmcls/models/backbones/van.py index cb7f98cb731..127e07dceb6 100644 --- a/mmcls/models/backbones/van.py +++ b/mmcls/models/backbones/van.py @@ -264,8 +264,8 @@ class VAN(BaseBackbone): Args: arch (str | dict): Visual Attention Network architecture. - If use string, choose from 'tiny', 'small', 'base' and 'large'. - If use dict, it should have below keys: + If use string, choose from 'b0', 'b1', b2', b3' and etc., + if use dict, it should have below keys: - **embed_dims** (List[int]): The dimensions of embedding. - **depths** (List[int]): The number of blocks in each stage. @@ -304,35 +304,19 @@ class VAN(BaseBackbone): (1, 256, 7, 7) """ arch_zoo = { - **dict.fromkeys(['t', 'tiny'], + **dict.fromkeys(['b0', 't', 'tiny'], {'embed_dims': [32, 64, 160, 256], 'depths': [3, 3, 5, 2], 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['s', 'small'], + **dict.fromkeys(['b1', 's', 'small'], {'embed_dims': [64, 128, 320, 512], 'depths': [2, 2, 4, 2], 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['b', 'base'], + **dict.fromkeys(['b2', 'b', 'base'], {'embed_dims': [64, 128, 320, 512], 'depths': [3, 3, 12, 3], 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['l', 'large'], - {'embed_dims': [64, 128, 320, 512], - 'depths': [3, 5, 27, 3], - 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['b0'], - {'embed_dims': [32, 64, 160, 256], - 'depths': [3, 3, 5, 2], - 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['b1'], - {'embed_dims': [64, 128, 320, 512], - 'depths': [2, 2, 4, 2], - 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['b2'], - {'embed_dims': [64, 128, 320, 512], - 'depths': [3, 3, 12, 3], - 'ffn_ratios': [8, 8, 4, 4]}), - **dict.fromkeys(['b3'], + **dict.fromkeys(['b3', 'l', 'large'], {'embed_dims': [64, 128, 320, 512], 'depths': [3, 5, 27, 3], 'ffn_ratios': [8, 8, 4, 4]}), From 9b17accc7e7a9ce09d5a0e63aa8beabd2f253a16 Mon Sep 17 00:00:00 2001 From: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com> Date: Fri, 9 Sep 2022 17:57:20 +0800 Subject: [PATCH 9/9] update example --- mmcls/models/backbones/van.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mmcls/models/backbones/van.py b/mmcls/models/backbones/van.py index 127e07dceb6..925240ed80d 100644 --- a/mmcls/models/backbones/van.py +++ b/mmcls/models/backbones/van.py @@ -295,8 +295,7 @@ class VAN(BaseBackbone): Examples: >>> from mmcls.models import VAN >>> import torch - >>> cfg = dict(arch='tiny') - >>> model = VAN(**cfg) + >>> model = VAN(arch='b0') >>> inputs = torch.rand(1, 3, 224, 224) >>> outputs = model(inputs) >>> for out in outputs: