-
Notifications
You must be signed in to change notification settings - Fork 11
/
japan_PP-OCRv3_rec.yml
130 lines (122 loc) · 2.55 KB
/
japan_PP-OCRv3_rec.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
Global:
debug: false
use_gpu: true
#use_amp: true
epoch_num: 150
log_smooth_window: 20
print_batch_step: 100
save_model_dir: ./output/ja_JP/model/epoch
save_epoch_step: 3
eval_batch_step: [1000, 2000]
cal_metric_during_train: true
pretrained_model: ./pretrained_model/japan_PP-OCRv3_rec_train/best_accuracy
# checkpoints: ./output/ja_JP/model/epoch/latest
save_inference_dir: ./output/ja_JP/model/inference
use_visualdl: false
character_dict_path: ./output/ja_JP/keys.txt
max_text_length: &max_text_length 25
infer_mode: false
use_space_char: false
distributed: true
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.0002
warmup_epoch: 5
regularizer:
name: L2
factor: 3.0e-05
Architecture:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [1, 2]
last_pool_type: avg
Head:
name: MultiHead
head_list:
- CTCHead:
Neck:
name: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
fc_decay: 0.00001
- SARHead:
enc_dim: 512
max_text_length: *max_text_length
Loss:
name: MultiLoss
loss_config_list:
- CTCLoss:
- SARLoss:
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
ignore_space: False
Train:
dataset:
name: SimpleDataSet
data_dir: ./
ext_op_transform_idx: 1
label_file_list:
- ./output/ja_JP/rec_gt_train.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- RecConAug:
prob: 0.5
ext_data_num: 2
image_shape: [48, 320, 3]
- RecAug:
- MultiLabelEncode:
- RecResizeImg:
image_shape: [3, 48, 320]
- KeepKeys:
keep_keys:
- image
- label_ctc
- label_sar
- length
- valid_ratio
loader:
shuffle: true
batch_size_per_card: 96
drop_last: true
num_workers: 16
Eval:
dataset:
name: SimpleDataSet
data_dir: ./
label_file_list:
- ./output/ja_JP/rec_gt_test.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- MultiLabelEncode:
- RecResizeImg:
image_shape: [3, 48, 320]
- KeepKeys:
keep_keys:
- image
- label_ctc
- label_sar
- length
- valid_ratio
loader:
shuffle: false
drop_last: false
batch_size_per_card: 96
num_workers: 16