From b7efabaafdf3e1c17e75c7e476636deb46921bbb Mon Sep 17 00:00:00 2001
From: Nickolay Shmyrev <nshmyrev@gmail.com>
Date: Sat, 17 Nov 2018 08:43:32 +0100
Subject: [PATCH] Allow segmentation with nnet3 chain models

---
 .../cleanup/clean_and_segment_data_nnet3.sh     | 17 +++++++++++------
 .../cleanup/segment_long_utterances_nnet3.sh    |  9 +++++++--
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh
index 467d1e92c7a..35b07d184f4 100755
--- a/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh
+++ b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh
@@ -26,6 +26,10 @@ graph_opts=
 beam=15.0
 lattice_beam=1.0
 
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+post_decode_acwt=1.0  # can be used in 'chain' systems to scale acoustics by 10 so the
+                      # regular scoring script works.
+
 # Contexts must ideally match training
 extra_left_context=0  # Set to some large value, typically 40 for LSTM (must match training)
 extra_right_context=0
@@ -36,7 +40,7 @@ frames_per_chunk=150
 # i-vector options
 extractor=    # i-Vector extractor. If provided, will extract i-vectors.
               # Required if the network was trained with i-vector extractor.
-use_vad=   # Use energy-based VAD for i-vector extraction
+use_vad=false # Use energy-based VAD for i-vector extraction
 
 segmentation_opts=
 
@@ -119,18 +123,18 @@ fi
 
 online_ivector_dir=
 if [ ! -z "$extractor" ]; then
-  online_ivector_dir=$dir/ivectors_$(basename $data_uniform_seg)
+  online_ivector_dir=$dir/ivectors_$(basename $data)
 
   if [ $stage -le 2 ]; then
     # Compute energy-based VAD
     if $use_vad; then
-      steps/compute_vad_decision.sh $data_uniform_seg \
-        $data_uniform_seg/log $data_uniform_seg/data
+      steps/compute_vad_decision.sh $data \
+        $data/log $data/data
     fi
 
     steps/online/nnet2/extract_ivectors_online.sh \
       --nj $nj --cmd "$cmd --mem 4G" --use-vad $use_vad \
-      $data_uniform_seg $extractor $online_ivector_dir
+      $data $extractor $online_ivector_dir
   fi
 fi
 
@@ -138,6 +142,7 @@ if [ $stage -le 3 ]; then
   echo "$0: Decoding with biased language models..."
 
   steps/cleanup/decode_segmentation_nnet3.sh \
+    --acwt $acwt --post-decode-acwt $post_decode_acwt \
     --beam $beam --lattice-beam $lattice_beam --nj $nj --cmd "$cmd --mem 4G" \
     --skip-scoring true --allow-partial false \
     --extra-left-context $extra_left_context \
@@ -154,7 +159,7 @@ fi
 
 frame_shift_opt=
 if [ -f $srcdir/frame_subsampling_factor ]; then
-  frame_shift_opt="--frame-shift=0.0$(cat $srcdir/frame_subsampling_factor)"
+  frame_shift_opt="--frame-shift 0.0$(cat $srcdir/frame_subsampling_factor)"
 fi
 
 if [ $stage -le 4 ]; then
diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh
index d21b94fc5fb..ae355c9f753 100755
--- a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh
+++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh
@@ -43,6 +43,10 @@ beam=15.0
 lattice_beam=1.0
 lmwt=10
 
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+post_decode_acwt=1.0  # can be used in 'chain' systems to scale acoustics by 10 so the
+                      # regular scoring script works.
+
 # Contexts must ideally match training
 extra_left_context=0  # Set to some large value, typically 40 for LSTM (must match training)
 extra_right_context=0  
@@ -53,7 +57,7 @@ frames_per_chunk=150
 # i-vector options
 extractor=    # i-Vector extractor. If provided, will extract i-vectors. 
               # Required if the network was trained with i-vector extractor. 
-use_vad=   # Use energy-based VAD for i-vector extraction
+use_vad=false # Use energy-based VAD for i-vector extraction
 
 # TF-IDF similarity search options
 max_words=1000
@@ -263,6 +267,7 @@ if [ $stage -le 5 ]; then
   echo "$0: Decoding with biased language models..."
 
   steps/cleanup/decode_segmentation_nnet3.sh \
+    --acwt $acwt --post-decode-acwt $post_decode_acwt \
     --beam $beam --lattice-beam $lattice_beam --nj $nj --cmd "$cmd --mem 4G" \
     --skip-scoring true --allow-partial false \
     --extra-left-context $extra_left_context \
@@ -276,7 +281,7 @@ fi
 
 frame_shift_opt=
 if [ -f $srcdir/frame_subsampling_factor ]; then
-  frame_shift_opt="--frame-shift=0.0$(cat $srcdir/frame_subsampling_factor)"
+  frame_shift_opt="--frame-shift 0.0$(cat $srcdir/frame_subsampling_factor)"
 fi
 
 if [ $stage -le 6 ]; then