-
Notifications
You must be signed in to change notification settings - Fork 64
/
rte-adan.yaml
59 lines (49 loc) · 990 Bytes
/
rte-adan.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# @package _group_
common:
fp16: true
fp16_init_scale: 4
threshold_loss_scale: 1
fp16_scale_window: 128
log_format: json
log_interval: 200
task:
_name: sentence_prediction
data: ???
init_token: 0
separator_token: 2
num_classes: 2
max_positions: 512
checkpoint:
restore_file: ???
reset_optimizer: true
reset_dataloader: true
reset_meters: true
best_checkpoint_metric: accuracy
maximize_best_checkpoint_metric: true
no_epoch_checkpoints: true
distributed_training:
find_unused_parameters: true
distributed_world_size: 1
criterion:
_name: sentence_prediction
dataset:
batch_size: 16
required_batch_size_multiple: 1
max_tokens: 4400
optimizer:
_name: adan
weight_decay: 0.01
adan_betas: (0.98,0.99,0.99)
adan_eps: 1e-08
lr_scheduler:
_name: polynomial_decay
warmup_updates: 122
optimization:
clip_norm: 0.0
lr: [2e-05]
max_update: 2036
max_epoch: 10
model:
_name: roberta
dropout: 0.1
attention_dropout: 0.1