-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_token_classification.sh
121 lines (113 loc) · 4.27 KB
/
run_token_classification.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
################################
# IndoNLU
################################
for size in base large
do
python scripts/run_token_classification.py \
--model-checkpoint LazarusNLP/NusaBERT-$size \
--dataset-name indonlp/indonlu \
--dataset-config posp \
--input-column-name tokens \
--target-column-name pos_tags \
--output-dir outputs/nusabert-$size-posp \
--num-train-epochs 10 \
--optim adamw_torch_fused \
--learning-rate 2e-5 \
--weight-decay 0.01 \
--per-device-train-batch-size 16 \
--per-device-eval-batch-size 64 \
--hub-model-id LazarusNLP/NusaBERT-$size-POSP
python scripts/run_token_classification.py \
--model-checkpoint LazarusNLP/NusaBERT-$size \
--dataset-name indonlp/indonlu \
--dataset-config bapos \
--input-column-name tokens \
--target-column-name pos_tags \
--output-dir outputs/nusabert-$size-bapos \
--num-train-epochs 10 \
--optim adamw_torch_fused \
--learning-rate 2e-5 \
--weight-decay 0.01 \
--per-device-train-batch-size 16 \
--per-device-eval-batch-size 64 \
--hub-model-id LazarusNLP/NusaBERT-$size-BaPOS
python scripts/run_token_classification.py \
--model-checkpoint LazarusNLP/NusaBERT-$size \
--dataset-name indonlp/indonlu \
--dataset-config terma \
--input-column-name tokens \
--target-column-name seq_label \
--output-dir outputs/nusabert-$size-terma \
--num-train-epochs 10 \
--optim adamw_torch_fused \
--learning-rate 2e-5 \
--weight-decay 0.01 \
--per-device-train-batch-size 16 \
--per-device-eval-batch-size 64 \
--hub-model-id LazarusNLP/NusaBERT-$size-TermA
python scripts/run_token_classification.py \
--model-checkpoint LazarusNLP/NusaBERT-$size \
--dataset-name indonlp/indonlu \
--dataset-config keps \
--input-column-name tokens \
--target-column-name seq_label \
--output-dir outputs/nusabert-$size-keps \
--num-train-epochs 10 \
--optim adamw_torch_fused \
--learning-rate 2e-5 \
--weight-decay 0.01 \
--per-device-train-batch-size 16 \
--per-device-eval-batch-size 64 \
--hub-model-id LazarusNLP/NusaBERT-$size-KEPS
python scripts/run_token_classification.py \
--model-checkpoint LazarusNLP/NusaBERT-$size \
--dataset-name indonlp/indonlu \
--dataset-config nergrit \
--input-column-name tokens \
--target-column-name ner_tags \
--output-dir outputs/nusabert-$size-nergrit \
--num-train-epochs 10 \
--optim adamw_torch_fused \
--learning-rate 2e-5 \
--weight-decay 0.01 \
--per-device-train-batch-size 16 \
--per-device-eval-batch-size 64 \
--hub-model-id LazarusNLP/NusaBERT-$size-NERGrit
python scripts/run_token_classification.py \
--model-checkpoint LazarusNLP/NusaBERT-$size \
--dataset-name indonlp/indonlu \
--dataset-config nerp \
--input-column-name tokens \
--target-column-name ner_tags \
--output-dir outputs/nusabert-$size-nerp \
--num-train-epochs 10 \
--optim adamw_torch_fused \
--learning-rate 2e-5 \
--weight-decay 0.01 \
--per-device-train-batch-size 16 \
--per-device-eval-batch-size 64 \
--hub-model-id LazarusNLP/NusaBERT-$size-NERP
done
################################
# WikiANN
################################
for size in base large
do
for lang in ace id map-bms min ms jv su
do
python scripts/run_token_classification.py \
--model-checkpoint LazarusNLP/NusaBERT-$size \
--dataset-name wikiann \
--dataset-config $lang \
--input-column-name tokens \
--target-column-name ner_tags \
--output-dir outputs/nusabert-$size-wikiann-$lang \
--num-train-epochs 100 \
--optim adamw_torch_fused \
--learning-rate 2e-5 \
--weight-decay 0.01 \
--per-device-train-batch-size 8 \
--per-device-eval-batch-size 64 \
--hub-model-id LazarusNLP/NusaBERT-$size-WikiANN-$lang
done
done