From b7efabaafdf3e1c17e75c7e476636deb46921bbb Mon Sep 17 00:00:00 2001 From: Nickolay Shmyrev Date: Sat, 17 Nov 2018 08:43:32 +0100 Subject: [PATCH] Allow segmentation with nnet3 chain models --- .../cleanup/clean_and_segment_data_nnet3.sh | 17 +++++++++++------ .../cleanup/segment_long_utterances_nnet3.sh | 9 +++++++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh index 467d1e92c7a..35b07d184f4 100755 --- a/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh +++ b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh @@ -26,6 +26,10 @@ graph_opts= beam=15.0 lattice_beam=1.0 +acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. +post_decode_acwt=1.0 # can be used in 'chain' systems to scale acoustics by 10 so the + # regular scoring script works. + # Contexts must ideally match training extra_left_context=0 # Set to some large value, typically 40 for LSTM (must match training) extra_right_context=0 @@ -36,7 +40,7 @@ frames_per_chunk=150 # i-vector options extractor= # i-Vector extractor. If provided, will extract i-vectors. # Required if the network was trained with i-vector extractor. -use_vad= # Use energy-based VAD for i-vector extraction +use_vad=false # Use energy-based VAD for i-vector extraction segmentation_opts= @@ -119,18 +123,18 @@ fi online_ivector_dir= if [ ! -z "$extractor" ]; then - online_ivector_dir=$dir/ivectors_$(basename $data_uniform_seg) + online_ivector_dir=$dir/ivectors_$(basename $data) if [ $stage -le 2 ]; then # Compute energy-based VAD if $use_vad; then - steps/compute_vad_decision.sh $data_uniform_seg \ - $data_uniform_seg/log $data_uniform_seg/data + steps/compute_vad_decision.sh $data \ + $data/log $data/data fi steps/online/nnet2/extract_ivectors_online.sh \ --nj $nj --cmd "$cmd --mem 4G" --use-vad $use_vad \ - $data_uniform_seg $extractor $online_ivector_dir + $data $extractor $online_ivector_dir fi fi @@ -138,6 +142,7 @@ if [ $stage -le 3 ]; then echo "$0: Decoding with biased language models..." steps/cleanup/decode_segmentation_nnet3.sh \ + --acwt $acwt --post-decode-acwt $post_decode_acwt \ --beam $beam --lattice-beam $lattice_beam --nj $nj --cmd "$cmd --mem 4G" \ --skip-scoring true --allow-partial false \ --extra-left-context $extra_left_context \ @@ -154,7 +159,7 @@ fi frame_shift_opt= if [ -f $srcdir/frame_subsampling_factor ]; then - frame_shift_opt="--frame-shift=0.0$(cat $srcdir/frame_subsampling_factor)" + frame_shift_opt="--frame-shift 0.0$(cat $srcdir/frame_subsampling_factor)" fi if [ $stage -le 4 ]; then diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh index d21b94fc5fb..ae355c9f753 100755 --- a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh +++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh @@ -43,6 +43,10 @@ beam=15.0 lattice_beam=1.0 lmwt=10 +acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. +post_decode_acwt=1.0 # can be used in 'chain' systems to scale acoustics by 10 so the + # regular scoring script works. + # Contexts must ideally match training extra_left_context=0 # Set to some large value, typically 40 for LSTM (must match training) extra_right_context=0 @@ -53,7 +57,7 @@ frames_per_chunk=150 # i-vector options extractor= # i-Vector extractor. If provided, will extract i-vectors. # Required if the network was trained with i-vector extractor. -use_vad= # Use energy-based VAD for i-vector extraction +use_vad=false # Use energy-based VAD for i-vector extraction # TF-IDF similarity search options max_words=1000 @@ -263,6 +267,7 @@ if [ $stage -le 5 ]; then echo "$0: Decoding with biased language models..." steps/cleanup/decode_segmentation_nnet3.sh \ + --acwt $acwt --post-decode-acwt $post_decode_acwt \ --beam $beam --lattice-beam $lattice_beam --nj $nj --cmd "$cmd --mem 4G" \ --skip-scoring true --allow-partial false \ --extra-left-context $extra_left_context \ @@ -276,7 +281,7 @@ fi frame_shift_opt= if [ -f $srcdir/frame_subsampling_factor ]; then - frame_shift_opt="--frame-shift=0.0$(cat $srcdir/frame_subsampling_factor)" + frame_shift_opt="--frame-shift 0.0$(cat $srcdir/frame_subsampling_factor)" fi if [ $stage -le 6 ]; then