diff --git a/egs/ami/s5/RESULTS_sdm b/egs/ami/s5/RESULTS_sdm index 919a5a02248..04fd3734669 100644 --- a/egs/ami/s5/RESULTS_sdm +++ b/egs/ami/s5/RESULTS_sdm @@ -17,3 +17,49 @@ exp/sdm1/tri2a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_13/eval_o4.ctm.filt.dtl:P exp/sdm1/tri3a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_12/eval_o4.ctm.filt.dtl:Percent Total Error = 69.5% (62576) exp/sdm1/tri3a_mmi_b0.1/decode_eval_3.mdl_ami_fsh.o3g.kn.pr1-7/ascore_10/eval_o4.ctm.filt.dtl:Percent Total Error = 67.2% (60447) + + +# TDNN results +for x in exp/$mic/nnet2_online/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep Sum $x/ascore_*/*.sys | utils/best_wer.sh; done +# Cross entropy training +%WER 46.8 | 15053 94502 | 59.3 27.6 13.0 6.2 46.8 67.0 | -23.602 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys +%WER 46.4 | 14210 94496 | 59.0 26.6 14.4 5.4 46.4 70.7 | -23.844 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_dev_utt_offline/ascore_13/dev_hires_o4.ctm.filt.sys + +%WER 50.7 | 13180 89643 | 54.7 29.6 15.7 5.3 50.7 72.6 | -23.104 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_eval_utt/ascore_12/eval_hires_o4.ctm.filt.sys +%WER 50.5 | 13099 89806 | 54.7 29.3 15.9 5.2 50.5 73.5 | -23.149 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_eval_utt_offline/ascore_12/eval_hires_o4.ctm.filt.sys + + +# sMBR training +# dev set + # epoch 0 + %WER 46.8 | 15053 94502 | 59.3 27.6 13.0 6.2 46.8 67.0 | -23.602 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys + %WER 46.4 | 14210 94496 | 59.0 26.6 14.4 5.4 46.4 70.7 | -23.844 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_dev_utt_offline/ascore_13/dev_hires_o4.ctm.filt.sys + #epoch 1 + %WER 45.7 | 14207 94490 | 59.5 22.9 17.6 5.3 45.7 70.5 | -24.681 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_dev_utt/ascore_11/dev_hires_o4.ctm.filt.sys + %WER 45.9 | 15232 94491 | 59.9 23.2 17.0 5.7 45.9 65.9 | -24.541 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_dev_utt_offline/ascore_10/dev_hires_o4.ctm.filt.sys + #epoch 2 + %WER 45.9 | 14543 94497 | 59.3 22.5 18.2 5.3 45.9 68.8 | -24.748 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys + %WER 46.1 | 14125 94492 | 59.6 22.7 17.7 5.7 46.1 71.1 | -24.626 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_dev_utt_offline/ascore_11/dev_hires_o4.ctm.filt.sys + #epoch 3 + %WER 46.0 | 15128 94502 | 59.6 23.1 17.3 5.6 46.0 66.2 | -24.565 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys + %WER 46.2 | 14764 94498 | 59.3 22.3 18.4 5.5 46.2 68.0 | -24.723 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_dev_utt_offline/ascore_12/dev_hires_o4.ctm.filt.sys + #epoch 4 + %WER 46.1 | 15193 94485 | 58.5 21.4 20.1 4.6 46.1 65.8 | -25.114 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_dev_utt/ascore_15/dev_hires_o4.ctm.filt.sys + %WER 46.5 | 15169 94494 | 59.2 22.8 18.0 5.7 46.5 66.3 | -24.554 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_dev_utt_offline/ascore_12/dev_hires_o4.ctm.filt.sys + +# eval set + #epoch 0 + %WER 50.7 | 13180 89643 | 54.7 29.6 15.7 5.3 50.7 72.6 | -23.104 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_eval_utt/ascore_12/eval_hires_o4.ctm.filt.sys + %WER 50.5 | 13099 89806 | 54.7 29.3 15.9 5.2 50.5 73.5 | -23.149 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_eval_utt_offline/ascore_12/eval_hires_o4.ctm.filt.sys + #epoch 1 + %WER 49.3 | 13432 89977 | 55.4 25.2 19.4 4.7 49.3 70.7 | -23.885 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_eval_utt/ascore_12/eval_hires_o4.ctm.filt.sys + %WER 49.2 | 13497 89975 | 55.5 24.9 19.5 4.7 49.2 70.7 | -23.937 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_eval_utt_offline/ascore_12/eval_hires_o4.ctm.filt.sys + #epoch 2 + %WER 49.2 | 13372 89987 | 55.6 25.3 19.0 4.9 49.2 71.0 | -23.850 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_eval_utt/ascore_13/eval_hires_o4.ctm.filt.sys + %WER 48.9 | 13318 89796 | 55.9 25.2 18.9 4.8 48.9 71.3 | -23.901 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_eval_utt_offline/ascore_13/eval_hires_o4.ctm.filt.sys + #epoch 3 + %WER 49.0 | 14307 89984 | 55.7 25.3 19.0 4.8 49.0 66.3 | -23.885 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_eval_utt/ascore_14/eval_hires_o4.ctm.filt.sys + %WER 48.9 | 14084 89798 | 55.9 25.3 18.8 4.8 48.9 67.4 | -23.884 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_eval_utt_offline/ascore_14/eval_hires_o4.ctm.filt.sys + #epoch 4 + %WER 49.1 | 13948 89977 | 55.6 25.2 19.2 4.8 49.1 68.2 | -23.902 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_eval_utt/ascore_15/eval_hires_o4.ctm.filt.sys + %WER 49.0 | 14259 89798 | 55.8 25.4 18.8 4.8 49.0 66.6 | -23.873 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_eval_utt_offline/ascore_15/eval_hires_o4.ctm.filt.sys diff --git a/egs/ami/s5/conf/mfcc_hires.conf b/egs/ami/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..434834a6725 --- /dev/null +++ b/egs/ami/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/ami/s5/conf/online_cmvn.conf b/egs/ami/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/ami/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/ami/s5/conf/queue_no_k20.conf b/egs/ami/s5/conf/queue_no_k20.conf new file mode 100644 index 00000000000..f0cba4df971 --- /dev/null +++ b/egs/ami/s5/conf/queue_no_k20.conf @@ -0,0 +1,13 @@ +# Default configuration +command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* +option mem=* -l mem_free=$0,ram_free=$0 +option mem=0 # Do not add anything to qsub_opts +option num_threads=* -pe smp $0 +option num_threads=1 # Do not add anything to qsub_opts +option max_jobs_run=* -tc $0 +default gpu=0 +option gpu=0 -q all.q +option gpu=* -l gpu=$0 -q g.q +default allow_k20=true +option allow_k20=true +option allow_k20=false -l 'hostname=!g01*&!g02*&!b06*' diff --git a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh index 55d0380b5a8..4f6212f4b42 100755 --- a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh +++ b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh @@ -18,7 +18,9 @@ has_fisher=true mic=ihm nj=70 affix= +hidden_dim=950 num_threads_ubm=32 +use_sat_alignments=true . ./path.sh . ./utils/parse_options.sh @@ -33,6 +35,16 @@ EOF parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 + if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then + parallel_opts="$parallel_opts --config conf/queue_no_k20.conf --allow-k20 false" + # that config is like the default config in the text of queue.pl, but adding the following lines. + # default allow_k20=true + # option allow_k20=true + # option allow_k20=false -l 'hostname=!g01&!g02&!b06' + # It's a workaround for an NVidia CUDA library bug for our currently installed version + # of the CUDA toolkit, that only shows up on k20's + fi + # the _a is in case I want to change the parameters. else # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be @@ -43,12 +55,21 @@ else fi dir=exp/$mic/nnet2_online/nnet_ms_sp${affix:+_$affix} + +if [ "$use_sat_alignments" == "true" ] ; then + gmm_dir=exp/$mic/tri4a + align_script=steps/align_fmllr.sh +else + gmm_dir=exp/$mic/tri3a + align_script=steps/align_si.sh +fi final_lm=`cat data/local/lm/final_lm` LM=$final_lm.pr1-7 -graph_dir=exp/$mic/tri4a/graph_${LM} +graph_dir=$gmm_dir/graph_${LM} # Run the common stages of training, including training the iVector extractor local/online/run_nnet2_common.sh --stage $stage --mic $mic \ + --use-sat-alignments $use_sat_alignments \ --num-threads-ubm $num_threads_ubm|| exit 1; if [ $stage -le 6 ]; then @@ -70,8 +91,8 @@ if [ $stage -le 6 ]; then fi if [ $stage -le 7 ]; then - steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ - data/$mic/train_sp data/lang exp/$mic/tri4a exp/$mic/tri4a_sp_ali || exit 1 + $align_script --nj $nj --cmd "$train_cmd" \ + data/$mic/train_sp data/lang $gmm_dir ${gmm_dir}_sp_ali || exit 1 fi if [ $stage -le 8 ]; then @@ -118,9 +139,9 @@ if [ $stage -le 10 ]; then --initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \ --cmd "$decode_cmd" \ --egs-dir "$common_egs_dir" \ - --pnorm-input-dim 950 \ - --pnorm-output-dim 950 \ - data/$mic/train_hires_sp data/lang exp/$mic/tri4a_sp_ali $dir || exit 1; + --pnorm-input-dim $hidden_dim \ + --pnorm-output-dim $hidden_dim \ + data/$mic/train_hires_sp data/lang ${gmm_dir}_sp_ali $dir || exit 1; fi if [ $stage -le 11 ]; then diff --git a/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh b/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh new file mode 100755 index 00000000000..c80edeb7dbe --- /dev/null +++ b/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh @@ -0,0 +1,175 @@ +#!/bin/bash + + +# This script does discriminative training on top of the online, multi-splice +# system trained in run_nnet2_ms.sh. +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the lattice generation runs in about real-time, so takes of the order of +# 1000 hours of CPU time. +# +# Note: rather than using any features we have dumped on disk, this script +# regenerates them from the wav data three times-- when we do lattice +# generation, numerator alignment and discriminative training. This made the +# script easier to write and more generic, because we don't have to know where +# the features and the iVectors are, but of course it's a little inefficient. +# The time taken is dominated by the lattice generation anyway, so this isn't +# a huge deal. + +. cmd.sh + + +stage=0 +train_stage=-10 +use_gpu=true +criterion=smbr +drop_frames=false # only matters for MMI anyway. +effective_lrate=0.000005 +srcdir= +mic=ihm +num_jobs_nnet=6 +train_stage=-10 # can be used to start training in the middle. +decode_start_epoch=0 # can be used to avoid decoding all epochs, e.g. if we decided to run more. +num_epochs=4 +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like denlats, + # alignments and degs). +gmm_dir=exp/$mic/tri4a + +set -e +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if $use_gpu; then + if ! cuda-compiled; then + cat <