-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yml
76 lines (71 loc) · 2.06 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
classifier_data:
# filenames
dataset: resources/classifier/dataset.csv
train: resources/classifier/train.csv
valid: resources/classifier/valid.csv
test: resources/classifier/test.csv
# key fields
text_field: text
label_field: category
# outputs
test_pred_scores: output/classifier/scores.txt
test_pred: output/classifier/test_pred.txt
# best model
log_dir: logdir/classifier/
extractor_data:
# filenames
positive_dataset: resources/extractor/positive_dataset.csv
positive_train: resources/extractor/positive_train.csv
positive_valid: resources/extractor/positive_valid.csv
positive_test: resources/extractor/positive_test.csv
iob2_train: resources/extractor/iob2_train.csv
iob2_valid: resources/extractor/iob2_valid.csv
iob2_test: resources/extractor/iob2_test.csv
master: resources/extractor/master.csv
# key fields
text_field: token
label_field: tag
# outputs
test_pred_scores: output/extractor/scores.txt
test_pred: output/extractor/test_pred.txt
# best model
log_dir: logdir/extractor/
normalizer_data:
# filenames
dataset: resources/normalizer/dataset.csv
train: resources/normalizer/train.csv
valid: resources/normalizer/valid.csv
test: resources/normalizer/test.csv
master: resources/normalizer/master.csv
meddra: resources/normalizer/MedAscii/llt.asc
# key fields
text_field: keywords
label_field: idmeddra
meddra_field: meddra
# outputs
test_pred_scores: output/normalizer/scores.txt
test_pred: output/normalizer/test_pred.txt
test_pred_sim: output/normalizer/test_pred_sim.txt
html: output/normalizer/cm
html_sim: output/normalizer/cm_sim
cm_output: output/normalizer/
test_pred_scores_sim: output/normalizer/scores_sim.csv
# similarity model
sim_model: paraphrase-MiniLM-L6-v2
# best model
log_dir: logdir/normalizer/
model:
model_name: distilbert-base-uncased
max_seq_length: 128
classifier_classes: 2
extractor_classes: 3
normalizer_classes: 32
training:
learn_rate: 3e-5
num_epochs: 3
accum_steps: 4
batch_size: 8
log_dir: logdir
fp16_params: None
general:
seed: 17