From 6a93702e00ad54da91684ad77b7b32cfaa9bb86e Mon Sep 17 00:00:00 2001 From: aarora8 Date: Thu, 16 Nov 2017 13:36:17 -0500 Subject: [PATCH] modifications for adding resnet --- egs/iam/s5/local/chain/run_cnn_1a.sh | 12 ++++++------ egs/iam/s5/local/chain/run_cnn_chainali_1a.sh | 14 +++++++------- egs/iam/s5/local/chain/run_cnn_chainali_1b.sh | 13 +++++-------- egs/iam/s5/local/chain/run_cnn_chainali_1c.sh | 11 ++++------- egs/iam/s5/local/chain/run_cnn_chainali_1d.sh | 12 ++++-------- 5 files changed, 26 insertions(+), 36 deletions(-) diff --git a/egs/iam/s5/local/chain/run_cnn_1a.sh b/egs/iam/s5/local/chain/run_cnn_1a.sh index 02df03d1cd6..bc2fc4a0b47 100755 --- a/egs/iam/s5/local/chain/run_cnn_1a.sh +++ b/egs/iam/s5/local/chain/run_cnn_1a.sh @@ -29,8 +29,8 @@ alignment_subsampling_factor=1 chunk_width=340,300,200,100 num_leaves=500 # we don't need extra left/right context for TDNN systems. -chunk_left_context=32 -chunk_right_context=32 +chunk_left_context=0 +chunk_right_context=0 tdnn_dim=450 # training options srand=0 @@ -127,8 +127,8 @@ if [ $stage -le 4 ]; then num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - common1="required-time-offsets=0 height-offsets=-2,-1,0,1,2 num-filters-out=36" - common2="required-time-offsets=0 height-offsets=-2,-1,0,1,2 num-filters-out=70" + common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" + common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs cat < $dir/configs/network.xconfig input dim=40 name=input @@ -223,8 +223,8 @@ if [ $stage -le 7 ]; then steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --extra-left-context $chunk_left_context \ --extra-right-context $chunk_right_context \ - --extra-left-context-initial 32 \ - --extra-right-context-final 32 \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ --frames-per-chunk $frames_per_chunk \ --nj $nj --cmd "$decode_cmd" \ $dir/graph data/test $dir/decode_test || exit 1; diff --git a/egs/iam/s5/local/chain/run_cnn_chainali_1a.sh b/egs/iam/s5/local/chain/run_cnn_chainali_1a.sh index 0f0296d7af6..ea62d88c3b0 100755 --- a/egs/iam/s5/local/chain/run_cnn_chainali_1a.sh +++ b/egs/iam/s5/local/chain/run_cnn_chainali_1a.sh @@ -33,8 +33,8 @@ alignment_subsampling_factor=1 chunk_width=340,300,200,100 num_leaves=500 # we don't need extra left/right context for TDNN systems. -chunk_left_context=32 -chunk_right_context=32 +chunk_left_context=0 +chunk_right_context=0 tdnn_dim=450 # training options srand=0 @@ -131,9 +131,9 @@ if [ $stage -le 4 ]; then num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - common1="required-time-offsets=0 height-offsets=-2,-1,0,1,2 num-filters-out=36" - common2="required-time-offsets=0 height-offsets=-2,-1,0,1,2 num-filters-out=70" - common3="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=70" + common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" + common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" + common3="height-offsets=-1,0,1 num-filters-out=70" mkdir -p $dir/configs cat < $dir/configs/network.xconfig input dim=40 name=input @@ -228,8 +228,8 @@ if [ $stage -le 7 ]; then steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --extra-left-context $chunk_left_context \ --extra-right-context $chunk_right_context \ - --extra-left-context-initial 32 \ - --extra-right-context-final 32 \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ --frames-per-chunk $frames_per_chunk \ --nj $nj --cmd "$decode_cmd" \ $dir/graph data/test $dir/decode_test || exit 1; diff --git a/egs/iam/s5/local/chain/run_cnn_chainali_1b.sh b/egs/iam/s5/local/chain/run_cnn_chainali_1b.sh index d14e4d9b44a..35bbc4c0601 100755 --- a/egs/iam/s5/local/chain/run_cnn_chainali_1b.sh +++ b/egs/iam/s5/local/chain/run_cnn_chainali_1b.sh @@ -1,13 +1,10 @@ #!/bin/bash -# chainali_1a uses chain model for lattice instead of gmm-hmm model. It has more cnn layers as compared to 1a -# (18.34% -> 13.68%) +# chainali_1b uses chain model for lattice instead of gmm-hmm model. It has more cnn layers as compared to 1a +# (15.17% -> 14.54%) -# steps/info/chain_dir_info.pl exp/chain/cnn1a_chainali/ -# exp/chain/cnn_chainali_1a/: num-iters=21 nj=2..4 num-params=3.8M dim=40->380 combine=-0.009->-0.006 xent:train/valid[13,20,final]=(-0.870,-0.593,-0.568/-1.08,-0.889,-0.874) logprob:train/valid[13,20,final]=(-0.035,-0.003,-0.001/-0.077,-0.055,-0.054) - -# head exp/chain/cnn_chainali_1a/decode_test/scoring_kaldi/best_wer -# %WER 13.68 [ 2410 / 17616, 243 ins, 633 del, 1534 sub ] exp/chain/cnn_chainali_1a/decode_test/wer_8_1.0 +# steps/info/chain_dir_info.pl exp/chain/chainali_cnn_1b/ +# exp/chain/chainali_cnn_1b/: num-iters=21 nj=2..4 num-params=4.0M dim=40->364 combine=-0.009->-0.006 xent:train/valid[13,20,final]=(-0.870,-0.593,-0.568/-1.08,-0.889,-0.874) logprob:train/valid[13,20,final]=(-0.035,-0.003,-0.001/-0.077,-0.055,-0.054) set -e -o pipefail @@ -18,7 +15,7 @@ train_set=train gmm=tri3 # this is the source gmm-dir that we'll use for alignments; it # should have alignments for the specified training data. nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. -affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +affix=_1b #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. ali=tri3_ali chain_model_dir=exp/chain${nnet3_affix}/cnn${affix} common_egs_dir= diff --git a/egs/iam/s5/local/chain/run_cnn_chainali_1c.sh b/egs/iam/s5/local/chain/run_cnn_chainali_1c.sh index d158a69edc7..ccd24c9f807 100755 --- a/egs/iam/s5/local/chain/run_cnn_chainali_1c.sh +++ b/egs/iam/s5/local/chain/run_cnn_chainali_1c.sh @@ -1,13 +1,10 @@ #!/bin/bash # chainali_1a uses chain model for lattice instead of gmm-hmm model. It has more cnn layers as compared to 1a -# (18.34% -> 13.68%) +# (15.17% -> 14.51%) -# steps/info/chain_dir_info.pl exp/chain/cnn1a_chainali/ -# exp/chain/cnn_chainali_1a/: num-iters=21 nj=2..4 num-params=3.8M dim=40->380 combine=-0.009->-0.006 xent:train/valid[13,20,final]=(-0.870,-0.593,-0.568/-1.08,-0.889,-0.874) logprob:train/valid[13,20,final]=(-0.035,-0.003,-0.001/-0.077,-0.055,-0.054) - -# head exp/chain/cnn_chainali_1a/decode_test/scoring_kaldi/best_wer -# %WER 13.68 [ 2410 / 17616, 243 ins, 633 del, 1534 sub ] exp/chain/cnn_chainali_1a/decode_test/wer_8_1.0 +# steps/info/chain_dir_info.pl exp/chain/chainali_cnn_1c/ +# exp/chain/chainali_cnn_1c/: num-iters=21 nj=2..4 num-params=3.9M dim=40->364 combine=-0.009->-0.006 xent:train/valid[13,20,final]=(-0.870,-0.593,-0.568/-1.08,-0.889,-0.874) logprob:train/valid[13,20,final]=(-0.035,-0.003,-0.001/-0.077,-0.055,-0.054) set -e -o pipefail @@ -18,7 +15,7 @@ train_set=train gmm=tri3 # this is the source gmm-dir that we'll use for alignments; it # should have alignments for the specified training data. nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. -affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +affix=_1c #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. ali=tri3_ali chain_model_dir=exp/chain${nnet3_affix}/cnn${affix} common_egs_dir= diff --git a/egs/iam/s5/local/chain/run_cnn_chainali_1d.sh b/egs/iam/s5/local/chain/run_cnn_chainali_1d.sh index 12dd6ea0dad..5260ee6448a 100755 --- a/egs/iam/s5/local/chain/run_cnn_chainali_1d.sh +++ b/egs/iam/s5/local/chain/run_cnn_chainali_1d.sh @@ -1,14 +1,10 @@ #!/bin/bash # chainali_1a uses chain model for lattice instead of gmm-hmm model. It has more cnn layers as compared to 1a -# (18.34% -> 13.68%) - -# steps/info/chain_dir_info.pl exp/chain/cnn1a_chainali/ -# exp/chain/cnn_chainali_1a/: num-iters=21 nj=2..4 num-params=3.8M dim=40->380 combine=-0.009->-0.006 xent:train/valid[13,20,final]=(-0.870,-0.593,-0.568/-1.08,-0.889,-0.874) logprob:train/valid[13,20,final]=(-0.035,-0.003,-0.001/-0.077,-0.055,-0.054) - -# head exp/chain/cnn_chainali_1a/decode_test/scoring_kaldi/best_wer -# %WER 13.68 [ 2410 / 17616, 243 ins, 633 del, 1534 sub ] exp/chain/cnn_chainali_1a/decode_test/wer_8_1.0 +# (19.10% -> 14.51%) +# steps/info/chain_dir_info.pl exp/chain/chainali_cnn_1d/ +# exp/chain/chainali_cnn_1d/: num-iters=21 nj=2..4 num-params=3.6M dim=40->364 combine=-0.009->-0.006 xent:train/valid[13,20,final]=(-0.870,-0.593,-0.568/-1.08,-0.889,-0.874) logprob:train/valid[13,20,final]=(-0.035,-0.003,-0.001/-0.077,-0.055,-0.054) set -e -o pipefail stage=0 @@ -18,7 +14,7 @@ train_set=train gmm=tri3 # this is the source gmm-dir that we'll use for alignments; it # should have alignments for the specified training data. nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. -affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +affix=_1d #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. ali=tri3_ali chain_model_dir=exp/chain${nnet3_affix}/cnn${affix} common_egs_dir=