forked from speechbrain/speechbrain
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hparams_g2p_rnn.yaml
674 lines (585 loc) · 19.9 KB
/
hparams_g2p_rnn.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
# ################################
# Model: LSTM (encoder) + GRU (decoder) (tokenized)
# Authors:
# Loren Lugosch & Mirco Ravanelli 2020
# Artem Ploujnikov 2021
# ################################
# Seed needs to be set at top of yaml, before objects with parameters are made
seed: 1234
__set_seed: !apply:torch.manual_seed [!ref <seed>]
# Hyperparameter optimization (disabled by default)
hpopt: False
hpopt_mode: generic
trial_id: null
# Data paths
output_folder: !ref results/RNN/<seed>
data_folder: !PLACEHOLDER # e.g. /localscratch/librig2p
save_folder: !ref <output_folder>/save
train_log: !ref <output_folder>/train_log.txt
use_tensorboard: True
tensorboard_logs: !ref <output_folder>/logs/
enable_interim_reports: True
grapheme_tokenizer_output_folder: !ref <save_folder>/grapheme_tokenizer
phoneme_tokenizer_output_folder: !ref <save_folder>/phoneme_tokenizer
ckpt_frequency: 1
ckpt_enable: True
enable_metrics: True
pretrained_path: !ref <save_folder>/pretrained_models
dataset: flexthink/librig2p-nostress-space
lexicon_train_data: lexicon_train
lexicon_valid_data: lexicon_valid
lexicon_test_data: !ref lexicon_test
lexicon_sample: null
lexicon_sample_random: False
sentence_train_data: !ref sentence_train
sentence_valid_data: !ref sentence_valid
sentence_test_data: !ref sentence_test
sentence_sample: null
sentence_sample_random: False
homograph_train_data: !ref homograph_train
homograph_valid_data: !ref homograph_valid
homograph_test_data: !ref homograph_test
homograph_sample: null
homograph_sample_random: False
homograph_balance: True
# Tokenizers
char_tokenize: False
char_token_type: unigram # ["unigram", "bpe", "char"]
char_token_output: 512
char_token_wordwise: True
phn_tokenize: False
phn_token_type: unigram # ["unigram", "bpe", "char"]
phn_token_output: 512 # index(blank/eos/bos/unk) = 0
phn_token_wordwise: True
character_coverage: 1.0
tokenizer_train_data: !ref <save_folder>/tokenizer_annotation_train.json
tokenizer_valid_data: !ref <save_folder>/tokenizer_annotation_valid.json
skip_prep: False
sorting: random #ascending
origins: "*"
# skip_test can be a Boolean value or a list of steps for which
# the test stage can be skipped
skip_test: null
phonemes_count: 43
graphemes_count: 31
phonemes_enable_space: True
# Training Parameters
lexicon_epochs: 50
lexicon_ctc_epochs: 10
lexicon_limit_to_stop: !ref <lexicon_epochs> # No stopping by default, can override
lexicon_limit_warmup: !ref <lexicon_epochs> # No stopping by default, can override
sentence_epochs: 20
sentence_ctc_epochs: 10
sentence_limit_to_stop: 3
sentence_limit_warmup: 3
homograph_epochs: 20
homograph_ctc_epochs: 10
homograph_limit_to_stop: 5
homograph_limit_warmup: 10
lexicon_batch_size: 1024
sentence_batch_size: 32
homograph_batch_size: 32
ctc_weight: 0.5
homograph_loss_weight: 2.0
lr: 0.002
save_for_pretrained: True
####################### Model Parameters #######################################
output_neurons: !apply:speechbrain.utils.hparams.choice
value: !ref <phn_tokenize>
choices:
True: !ref <phn_token_output> + 1
False: !ref <phonemes_count>
enc_num_embeddings: !apply:speechbrain.utils.hparams.choice
value: !ref <char_tokenize>
choices:
True: !ref <char_token_output> + 1
False: !ref <graphemes_count>
enc_dropout: 0.5
enc_neurons: 512
enc_num_layers: 4
dec_dropout: 0.5
dec_neurons: 512
dec_att_neurons: 256
dec_num_layers: 4
embedding_dim: 512
# Determines whether to use BOS (beginning-of-sequence) or EOS (end-of-sequence) tokens
# Available modes:
# raw: no BOS/EOS tokens are added
# bos: a beginning-of-sequence token is added
# eos: an end-of-sequence token is added
grapheme_sequence_mode: bos
phoneme_sequence_mode: bos
# Special Token information
bos_index: 0
eos_index: 1
blank_index: 2
unk_index: 2
token_space_index: !ref <phn_token_output>
# Language Model
lm_emb_dim: 256 # dimension of the embeddings
lm_rnn_size: 512 # dimension of hidden layers
lm_layers: 2 # number of hidden layers
lm_output_neurons: !ref <phonemes_count>
# Beam Searcher
use_language_model: False
beam_search_min_decode_ratio: 0
beam_search_max_decode_ratio: 1.0
beam_search_beam_size: 16
beam_search_beam_size_valid: 16
beam_search_eos_threshold: 10.0
beam_search_using_max_attn_shift: False
beam_search_max_attn_shift: 10
beam_search_coverage_penalty: 5.0
beam_search_lm_weight: 0.5
beam_search_ctc_weight_decode: 0.4
beam_search_temperature: 1.25
beam_search_temperature_lm: 1.0
# Word embeddings
use_word_emb: False
word_emb_model: bert-base-uncased
word_emb_dim: 768
word_emb_enc_dim: 256
word_emb_norm_type: batch
# Evaluation parameters
eval_dataset: train
eval_train_step: sentence
eval_ckpt_step: !ref <eval_train_step>
eval_batch_size: 256
eval_batch_count: 1
eval_reporting: raw
eval_prediction_sample_size: 10
eval_mode: sentence
eval_output_wer_file: False
eval_wer_file: !ref <save_folder>/wer_eval.txt
select_n_sentences: None
graphemes:
- A
- B
- C
- D
- E
- F
- G
- H
- I
- J
- K
- L
- M
- N
- O
- P
- Q
- R
- S
- T
- U
- V
- W
- X
- Y
- Z
- "'"
- " "
phonemes:
- AA
- AE
- AH
- AO
- AW
- AY
- B
- CH
- D
- DH
- EH
- ER
- EY
- F
- G
- HH
- IH
- IY
- JH
- K
- L
- M
- N
- NG
- OW
- OY
- P
- R
- S
- SH
- T
- TH
- UH
- UW
- V
- W
- Y
- Z
- ZH
- " "
enc_input_dim: !apply:speechbrain.lobes.models.g2p.model.input_dim
use_word_emb: !ref <use_word_emb>
word_emb_enc_dim: !ref <word_emb_enc_dim>
embedding_dim: !ref <embedding_dim>
phn_char_map: !apply:speechbrain.lobes.models.g2p.dataio.build_token_char_map
tokens: !ref <phonemes>
char_phn_map: !apply:speechbrain.lobes.models.g2p.dataio.flip_map
map_dict: !ref <phn_char_map>
# Models
enc: !new:speechbrain.nnet.RNN.LSTM
input_shape: [null, null, !ref <enc_input_dim>]
bidirectional: True
hidden_size: !ref <enc_neurons>
num_layers: !ref <enc_num_layers>
dropout: !ref <enc_dropout>
lin: !new:speechbrain.nnet.linear.Linear
input_size: !ref <dec_neurons>
n_neurons: !ref <output_neurons>
bias: False
ctc_lin: !new:speechbrain.nnet.linear.Linear
input_size: !ref 2 * <enc_neurons>
n_neurons: !ref <output_neurons>
encoder_emb: !new:speechbrain.nnet.embedding.Embedding
num_embeddings: !ref <enc_num_embeddings>
embedding_dim: !ref <embedding_dim>
emb: !new:speechbrain.nnet.embedding.Embedding
num_embeddings: !ref <output_neurons>
embedding_dim: !ref <embedding_dim>
dec: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
enc_dim: !ref <enc_neurons> * 2
input_size: !ref <embedding_dim>
rnn_type: gru
attn_type: content
dropout: !ref <dec_dropout>
hidden_size: !ref <dec_neurons>
attn_dim: !ref <dec_att_neurons>
num_layers: !ref <dec_num_layers>
word_emb_enc: !new:speechbrain.lobes.models.g2p.model.WordEmbeddingEncoder
word_emb_dim: !ref <word_emb_dim>
word_emb_enc_dim: !ref <word_emb_enc_dim>
norm_type: !ref <word_emb_norm_type>
word_emb: !apply:speechbrain.lobes.models.g2p.dataio.lazy_init
init: !name:speechbrain.wordemb.transformer.TransformerWordEmbeddings
model: !ref <word_emb_model>
log_softmax: !new:speechbrain.nnet.activations.Softmax
apply_log: True
modules:
model: !ref <model>
enc: !ref <enc>
encoder_emb: !ref <encoder_emb>
emb: !ref <emb>
dec: !ref <dec>
lin: !ref <lin>
ctc_lin: !ref <ctc_lin>
out: !ref <log_softmax>
word_emb: null
word_emb_enc: !ref <word_emb_enc>
model: !new:speechbrain.lobes.models.g2p.model.AttentionSeq2Seq
enc: !ref <enc>
encoder_emb: !ref <encoder_emb>
emb: !ref <emb>
dec: !ref <dec>
lin: !ref <lin>
out: !ref <log_softmax>
use_word_emb: !ref <use_word_emb>
word_emb_enc: !ref <word_emb_enc>
lm_model: !new:speechbrain.lobes.models.RNNLM.RNNLM
embedding_dim: !ref <lm_emb_dim>
rnn_layers: !ref <lm_layers>
rnn_neurons: !ref <lm_rnn_size>
output_neurons: !ref <lm_output_neurons>
return_hidden: True
opt_class: !name:torch.optim.Adam
lr: !ref <lr>
# Scorer
ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
eos_index: !ref <eos_index>
blank_index: !ref <blank_index>
ctc_fc: !ref <ctc_lin>
transformerlm_scorer: !new:speechbrain.decoders.scorer.TransformerLMScorer
language_model: !ref <lm_model>
temperature: !ref <beam_search_temperature_lm>
# Scorer
coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
vocab_size: !ref <output_neurons>
scorer_lm: !new:speechbrain.decoders.scorer.ScorerBuilder
full_scorers: [!ref <transformerlm_scorer>, !ref <ctc_scorer>, !ref <coverage_scorer>]
weights:
ctc: !ref <beam_search_ctc_weight_decode>
transformerlm: !ref <beam_search_lm_weight>
coverage: !ref <beam_search_coverage_penalty>
scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
full_scorers: [!ref <ctc_scorer>, !ref <coverage_scorer>]
weights:
ctc: !ref <beam_search_ctc_weight_decode>
coverage: !ref <beam_search_coverage_penalty>
beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher
embedding: !ref <emb>
decoder: !ref <dec>
linear: !ref <lin>
bos_index: !ref <bos_index>
eos_index: !ref <eos_index>
min_decode_ratio: !ref <beam_search_min_decode_ratio>
max_decode_ratio: !ref <beam_search_max_decode_ratio>
beam_size: !ref <beam_search_beam_size>
eos_threshold: !ref <beam_search_eos_threshold>
using_max_attn_shift: !ref <beam_search_using_max_attn_shift>
max_attn_shift: !ref <beam_search_max_attn_shift>
temperature: !ref <beam_search_temperature>
scorer: !ref <scorer>
beam_searcher_valid: !new:speechbrain.decoders.S2SRNNBeamSearcher
embedding: !ref <emb>
decoder: !ref <dec>
linear: !ref <lin>
bos_index: !ref <bos_index>
eos_index: !ref <eos_index>
min_decode_ratio: !ref <beam_search_min_decode_ratio>
max_decode_ratio: !ref <beam_search_max_decode_ratio>
beam_size: !ref <beam_search_beam_size_valid>
eos_threshold: !ref <beam_search_eos_threshold>
using_max_attn_shift: !ref <beam_search_using_max_attn_shift>
max_attn_shift: !ref <beam_search_max_attn_shift>
temperature: !ref <beam_search_temperature>
scorer: !ref <scorer>
beam_searcher_lm: !new:speechbrain.decoders.seq2seq.S2SRNNBeamSearcher
embedding: !ref <emb>
decoder: !ref <dec>
linear: !ref <lin>
bos_index: !ref <bos_index>
eos_index: !ref <eos_index>
min_decode_ratio: !ref <beam_search_min_decode_ratio>
max_decode_ratio: !ref <beam_search_max_decode_ratio>
beam_size: !ref <beam_search_beam_size>
eos_threshold: !ref <beam_search_eos_threshold>
using_max_attn_shift: !ref <beam_search_using_max_attn_shift>
max_attn_shift: !ref <beam_search_max_attn_shift>
temperature: !ref <beam_search_temperature>
scorer: !ref <scorer_lm>
lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
initial_value: !ref <lr>
improvement_threshold: 0.0
annealing_factor: 0.8
patient: 0
homograph_extractor: !new:speechbrain.lobes.models.g2p.homograph.SubsequenceExtractor
seq_cost: !name:speechbrain.nnet.losses.nll_loss
label_smoothing: 0.1
ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
blank_index: !ref <blank_index>
seq_cost_metric: !name:speechbrain.nnet.losses.nll_loss
label_smoothing: 0.1
reduction: batch
homograph_cost: !new:speechbrain.lobes.models.g2p.homograph.SubsequenceLoss
seq_cost: !ref <seq_cost>
seq_stats: !name:speechbrain.utils.metric_stats.MetricStats
metric: !ref <seq_cost_metric>
seq_stats_homograph: !name:speechbrain.utils.metric_stats.MetricStats
metric: !ref <seq_cost_metric>
classification_stats_homograph: !name:speechbrain.utils.metric_stats.ClassificationStats
per_stats: !name:speechbrain.utils.metric_stats.ErrorRateStats
per_stats_homograph: !name:speechbrain.utils.metric_stats.ErrorRateStats
lexicon_epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounterWithStopper
limit: !ref <lexicon_epochs>
limit_to_stop: !ref <lexicon_limit_to_stop>
limit_warmup: !ref <lexicon_limit_warmup>
direction: min
sentence_epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounterWithStopper
limit: !ref <sentence_epochs>
limit_to_stop: !ref <sentence_limit_to_stop>
limit_warmup: !ref <sentence_limit_warmup>
direction: min
homograph_epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounterWithStopper
limit: !ref <homograph_epochs>
limit_to_stop: !ref <homograph_limit_to_stop>
limit_warmup: !ref <homograph_limit_warmup>
direction: min
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: !ref <train_log>
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: !ref <save_folder>
recoverables:
model: !ref <model>
ctc_lin: !ref <ctc_lin>
scheduler: !ref <lr_annealing>
lexicon_counter: !ref <lexicon_epoch_counter>
sentence_counter: !ref <sentence_epoch_counter>
homograph_counter: !ref <homograph_epoch_counter>
model_output_keys:
- p_seq
- char_lens
- encoder_out
grapheme_encoder: !new:speechbrain.dataio.encoder.TextEncoder
phoneme_encoder: !new:speechbrain.dataio.encoder.TextEncoder
grapheme_tokenizer: !apply:speechbrain.lobes.models.g2p.dataio.lazy_init
init: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
model_dir: !ref <grapheme_tokenizer_output_folder>
bos_id: !ref <bos_index>
eos_id: !ref <eos_index>
unk_id: !ref <unk_index>
vocab_size: !ref <char_token_output>
annotation_train: !ref <tokenizer_train_data>
annotation_read: char
model_type: !ref <char_token_type> # ["unigram", "bpe", "char"]
character_coverage: !ref <character_coverage>
annotation_format: json
text_file: !ref <save_folder>/grapheme_annotations.txt
phoneme_tokenizer: !apply:speechbrain.lobes.models.g2p.dataio.lazy_init
init: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
model_dir: !ref <phoneme_tokenizer_output_folder>
bos_id: !ref <bos_index>
eos_id: !ref <eos_index>
unk_id: !ref <unk_index>
vocab_size: !ref <phn_token_output>
annotation_train: !ref <tokenizer_train_data>
annotation_read: phn
model_type: !ref <phn_token_type> # ["unigram", "bpe", "char"]
character_coverage: !ref <character_coverage>
annotation_list_to_check: [!ref <tokenizer_valid_data>]
annotation_format: json
text_file: !ref <save_folder>/phoneme_annotations.txt
out_phoneme_decoder_tok: !apply:speechbrain.lobes.models.g2p.dataio.char_map_detokenize
tokenizer: !ref <phoneme_tokenizer>
char_map: !ref <char_phn_map>
token_space_index: !ref <token_space_index>
wordwise: !ref <phn_token_wordwise>
out_phoneme_decoder_raw: !name:speechbrain.lobes.models.g2p.dataio.text_decode
encoder: !ref <phoneme_encoder>
out_phoneme_decoder: !apply:speechbrain.utils.hparams.choice
value: !ref <phn_tokenize>
choices:
True: !ref <out_phoneme_decoder_tok>
False: !ref <out_phoneme_decoder_raw>
encode_pipeline:
batch: False
use_padded_data: True
output_keys:
- grapheme_list
- grapheme_encoded_list
- grapheme_encoded
init:
- func: !name:speechbrain.lobes.models.g2p.dataio.enable_eos_bos
encoder: !ref <grapheme_encoder>
tokens: !ref <graphemes>
bos_index: !ref <bos_index>
eos_index: !ref <eos_index>
- func: !name:speechbrain.lobes.models.g2p.dataio.enable_eos_bos
encoder: !ref <phoneme_encoder>
tokens: !ref <phonemes>
bos_index: !ref <bos_index>
eos_index: !ref <eos_index>
steps:
- func: !name:speechbrain.lobes.models.g2p.dataio.clean_pipeline
graphemes: !ref <graphemes>
takes: txt
provides: txt_cleaned
- func: !name:speechbrain.lobes.models.g2p.dataio.grapheme_pipeline
grapheme_encoder: !ref <grapheme_encoder>
takes: txt_cleaned
provides:
- grapheme_list
- grapheme_encoded_list
- grapheme_encoded_raw
- func: !name:speechbrain.lobes.models.g2p.dataio.add_bos_eos
encoder: !ref <grapheme_encoder>
takes: grapheme_encoded_list
provides:
- grapheme_encoded
- grapheme_len
- grapheme_encoded_eos
- grapheme_len_eos
- func: !name:speechbrain.lobes.models.g2p.dataio.word_emb_pipeline
word_emb: !ref <word_emb>
grapheme_encoder: !ref <grapheme_encoder>
use_word_emb: !ref <use_word_emb>
takes:
- txt
- grapheme_encoded
- grapheme_len
provides: word_emb
decode_pipeline:
batch: True
output_keys:
- phonemes
steps:
- func: !name:speechbrain.lobes.models.g2p.dataio.beam_search_pipeline
beam_searcher: !ref <beam_searcher>
takes:
- char_lens
- encoder_out
provides:
- hyps
- scores
- func: !apply:speechbrain.utils.hparams.choice
value: !ref <phn_tokenize>
choices:
True: !apply:speechbrain.lobes.models.g2p.dataio.char_map_detokenize
tokenizer: !ref <phoneme_tokenizer>
char_map: !ref <char_phn_map>
token_space_index: !ref <token_space_index>
wordwise: !ref <phn_token_wordwise>
False: !name:speechbrain.lobes.models.g2p.dataio.phoneme_decoder_pipeline
phoneme_encoder: !ref <phoneme_encoder>
takes:
- hyps
provides:
- phonemes
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
model: !ref <model>
ctc_lin: !ref <ctc_lin>
paths:
model: !ref <pretrained_path>/model.ckpt
ctc_lin: !ref <pretrained_path>/ctc_lin.ckpt
train_steps:
- name: lexicon
train_data: !ref <lexicon_train_data>
valid_data: !ref <lexicon_valid_data>
test_data: !ref <lexicon_test_data>
epoch_counter: !ref <lexicon_epoch_counter>
ctc_epochs: !ref <lexicon_ctc_epochs>
wer_file: !ref <save_folder>/wer_lexicon.txt
sample: !ref <lexicon_sample>
sample_random: !ref <lexicon_sample_random>
performance_key: PER
dataloader_opts:
batch_size: !ref <lexicon_batch_size>
- name: sentence
train_data: !ref <sentence_train_data>
valid_data: !ref <sentence_valid_data>
test_data: !ref <sentence_test_data>
epoch_counter: !ref <sentence_epoch_counter>
ctc_epochs: !ref <sentence_ctc_epochs>
wer_file: !ref <save_folder>/wer_sentence.txt
sample: !ref <sentence_sample>
sample_random: !ref <sentence_sample_random>
performance_key: PER
dataloader_opts:
batch_size: !ref <sentence_batch_size>
- name: homograph
train_data: !ref <homograph_train_data>
valid_data: !ref <homograph_valid_data>
test_data: !ref <homograph_test_data>
epoch_counter: !ref <homograph_epoch_counter>
ctc_epochs: !ref <homograph_ctc_epochs>
mode: homograph
balance: !ref <homograph_balance>
balance_on: homograph_wordid
sample: !ref <homograph_sample>
sample_random: !ref <homograph_sample_random>
wer_file: !ref <save_folder>/wer_homograph.txt
homograph_stats_file: !ref <save_folder>/homograph_stats.txt
performance_key: PER_homograph
dataloader_opts:
batch_size: !ref <homograph_batch_size>
deps_pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
collect_in: !ref <save_folder>
loadables:
lm: !ref <lm_model>
paths:
lm: !ref <pretrained_path>/lm.ckpt
conditions:
lm: !ref <use_language_model>