-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Some new functionality in nnet3; including drafts of example scripts
- Loading branch information
Showing
10 changed files
with
435 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#!/bin/bash | ||
|
||
# this script is called from scripts like run_ms.sh; it does the common stages | ||
# of the build, such as feature extraction. | ||
# This is actually the same as local/online/run_nnet2_common.sh, except | ||
# for the directory names. | ||
|
||
. cmd.sh | ||
mfccdir=mfcc | ||
|
||
stage=1 | ||
|
||
. cmd.sh | ||
. ./path.sh | ||
. ./utils/parse_options.sh | ||
|
||
|
||
if [ $stage -le 1 ]; then | ||
for datadir in train_si284 test_eval93 test_dev93 test_eval92; do | ||
utils/copy_data_dir.sh data/$datadir data/${datadir}_hires | ||
steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \ | ||
--cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; | ||
steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; | ||
done | ||
utils/subset_data_dir.sh --first data/train_si284_hires 7138 data/train_si84_hires || exit 1 | ||
fi | ||
|
||
1 | ||
if [ $stage -le 2 ]; then | ||
# We need to build a small system just because we need the LDA+MLLT transform | ||
# to train the diag-UBM on top of. We align the si84 data for this purpose. | ||
|
||
steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \ | ||
data/train_si84 data/lang exp/tri4b exp/nnet3/tri4b_ali_si84 | ||
fi | ||
|
||
if [ $stage -le 3 ]; then | ||
# Train a small system just for its LDA+MLLT transform. We use --num-iters 13 | ||
# because after we get the transform (12th iter is the last), any further | ||
# training is pointless. | ||
steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ | ||
--realign-iters "" \ | ||
--splice-opts "--left-context=3 --right-context=3" \ | ||
5000 10000 data/train_si84_hires data/lang \ | ||
exp/nnet3/tri4b_ali_si84 exp/nnet3/tri5b | ||
fi | ||
|
||
if [ $stage -le 4 ]; then | ||
mkdir -p exp/nnet3 | ||
|
||
steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \ | ||
--num-frames 400000 data/train_si84_hires 256 exp/nnet3/tri5b exp/nnet3/diag_ubm | ||
fi | ||
|
||
if [ $stage -le 5 ]; then | ||
# even though $nj is just 10, each job uses multiple processes and threads. | ||
steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ | ||
data/train_si284_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; | ||
fi | ||
|
||
if [ $stage -le 6 ]; then | ||
# We extract iVectors on all the train_si284 data, which will be what we | ||
# train the system on. | ||
|
||
# having a larger number of speakers is helpful for generalization, and to | ||
# handle per-utterance decoding well (iVector starts at zero). | ||
steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_si284_hires \ | ||
data/train_si284_hires_max2 | ||
|
||
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ | ||
data/train_si284_hires_max2 exp/nnet3/extractor exp/nnet3/ivectors_train_si284 || exit 1; | ||
fi | ||
|
||
if [ $stage -le 7 ]; then | ||
rm exp/nnet3/.error 2>/dev/null | ||
for data in test_eval92 test_dev93 test_eval93; do | ||
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 8 \ | ||
data/${data}_hires exp/nnet3/extractor exp/nnet3/ivectors_${data} || touch exp/nnet3/.error & | ||
done | ||
wait | ||
[ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1; | ||
fi | ||
|
||
exit 0; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#!/bin/bash | ||
|
||
# this is the standard "tdnn" system, built in nnet3; it's what we use to | ||
# call multi-splice. | ||
|
||
. cmd.sh | ||
|
||
|
||
# At this script level we don't support not running on GPU, as it would be painfully slow. | ||
# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, | ||
# --num-threads 16 and --minibatch-size 128. | ||
|
||
stage=0 | ||
train_stage=-10 | ||
dir=exp/nnet3/nnet_ms_a | ||
. cmd.sh | ||
. ./path.sh | ||
. ./utils/parse_options.sh | ||
|
||
|
||
if ! cuda-compiled; then | ||
cat <<EOF && exit 1 | ||
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA | ||
If you want to use GPUs (and have them), go to src/, and configure and make on a machine | ||
where "nvcc" is installed. | ||
EOF | ||
fi | ||
|
||
local/online/run_ivector_common.sh --stage $stage || exit 1; | ||
|
||
if [ $stage -le 8 ]; then | ||
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then | ||
utils/create_split_dir.pl \ | ||
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage | ||
fi | ||
|
||
|
||
steps/nnet3/train_tdnn.sh --stage $train_stage \ | ||
--num-epochs 8 --num-jobs-initial 2 --num-jobs-final 14 \ | ||
--num-hidden-layers 4 \ | ||
--splice-indexes "-4,-3,-2,-1,0,1,2,3,4 0 -2,2 0 -4,4 0" \ | ||
--feat-type raw \ | ||
--online-ivector-dir exp/nnet3/ivectors_train_si284 \ | ||
--cmvn-opts "--norm-means=false --norm-vars=false" \ | ||
--io-opts "-tc 12" \ | ||
--initial-effective-lrate 0.005 --final-effective-lrate 0.0005 \ | ||
--cmd "$decode_cmd" \ | ||
--pnorm-input-dim 2000 \ | ||
--pnorm-output-dim 250 \ | ||
--mix-up 12000 \ | ||
data/train_si284_hires data/lang exp/tri4b_ali_si284 $dir || exit 1; | ||
fi | ||
|
||
|
||
if [ $stage -le 9 ]; then | ||
# this does offline decoding that should give the same results as the real | ||
# online decoding. | ||
for lm_suffix in tgpr bd_tgpr; do | ||
graph_dir=exp/tri4b/graph_${lm_suffix} | ||
# use already-built graphs. | ||
for year in eval92 dev93; do | ||
steps/nnet3/decode.sh --nj 8 --cmd "$decode_cmd" \ | ||
--online-ivector-dir exp/nnet2_online/ivectors_test_$year \ | ||
$graph_dir data/test_${year}_hires $dir/decode_${lm_suffix}_${year} || exit 1; | ||
done | ||
done | ||
fi | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.