[egs] wake-word recipes: use python3, add README (#4052)

kaldi-asr · Apr 27, 2020 · d4791c0 · d4791c0
1 parent 03ae8c1
commit d4791c0
Show file tree

Hide file tree

Showing 11 changed files with 37 additions and 16 deletions.
diff --git a/egs/mobvoi/README.txt b/egs/mobvoi/README.txt
@@ -0,0 +1,15 @@
+
+ The Mobvoi dataset is a ~67-hour corpus of wake word corpus
+ in Chinese covering 523 speakers. It is currently not publicly available.
+ The wake word is "Hi Xiaowen" (in Pinyin).
+ Each speaker’s collection includes positive utterances and negative utterances
+ recorded with different speaker-to-microphone distance and different
+ signal-to-noise (SNR) ratio where noises are from typical home environments.
+ The dataset is provided by Mobvoi. Inc.
+
+ The recipe is in v1/
+
+ The E2E LF-MMI recipe does not require any prior alignments for training
+ LF-MMI, making the alignment more flexible during training. It can be optionally
+ followed by a regular LF-MMI training to further improve the performance.
+
diff --git a/egs/mobvoi/v1/local/chain/tuning/run_e2e_tdnn_1a.sh b/egs/mobvoi/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
@@ -97,7 +97,7 @@ fi
 if [ $stage -le 2 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
   affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
   tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
   linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"

diff --git a/egs/mobvoi/v1/local/chain/tuning/run_tdnn_1a.sh b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_1a.sh
@@ -142,7 +142,7 @@ fi
 if [ $stage -le 5 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
   affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
   tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
   linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"

diff --git a/egs/mobvoi/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
@@ -131,7 +131,7 @@ fi
 if [ $stage -le 4 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
   affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
   tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
   linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"

diff --git a/egs/mobvoi/v1/local/score_online.sh b/egs/mobvoi/v1/local/score_online.sh
@@ -31,6 +31,7 @@ done
 
 
 utils/data/get_utt2dur.sh $data
+rm $data/utt2dur_negative 2>/dev/null || true
 utils/filter_scp.pl <(grep -v $wake_word $data/text) $data/utt2dur > $data/utt2dur_negative && dur=`awk '{a+=$2} END{print a}' $data/utt2dur_negative`
 echo "total duration (in seconds) of negative examples in $data: $dur"
 

diff --git a/egs/mobvoi/v1/run.sh b/egs/mobvoi/v1/run.sh
@@ -172,11 +172,6 @@ if [ $stage -le 10 ]; then
   (
     utils/mkgraph.sh data/lang \
       exp/mono exp/mono/graph
-    for test in dev eval; do
-      steps/decode.sh --nj 20 --cmd "$decode_cmd" \
-        --scoring-opts "--wake-word 嗨小问" \
-        exp/mono/graph data/$test exp/mono/decode_$test
-    done
   )&
 
   steps/align_si.sh --nj 50 --cmd "$train_cmd" \

diff --git a/egs/snips/README.txt b/egs/snips/README.txt
@@ -0,0 +1,15 @@
+
+ The SNIPS dataset is a ~54-hour corpus of wake word corpus covering 3300 speakers.
+ The wake word is "Hey Snips" pronounced with no pause between the two words.
+ It contains a large variety of English accents and recording environments.
+ Negative samples have been recorded in the same conditions than wake-word utterances.
+ To download the dataset you need to follow the instructions on
+ https://github.com/snipsco/keyword-spotting-research-datasets. It is provided
+ by Snips, Paris, France (https://snips.ai)
+
+ The recipe is in v1/
+
+ The E2E LF-MMI recipe does not require any prior alignments for training
+ LF-MMI, making the alignment more flexible during training. It can be optionally
+ followed by a regular LF-MMI training to further improve the performance.
+
diff --git a/egs/snips/v1/local/chain/tuning/run_e2e_tdnn_1a.sh b/egs/snips/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
@@ -95,7 +95,7 @@ fi
 if [ $stage -le 2 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
   affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
   tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
   linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"

diff --git a/egs/snips/v1/local/chain/tuning/run_tdnn_1a.sh b/egs/snips/v1/local/chain/tuning/run_tdnn_1a.sh
@@ -139,7 +139,7 @@ fi
 if [ $stage -le 5 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
   affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
   tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
   linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"

diff --git a/egs/snips/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh b/egs/snips/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
@@ -129,7 +129,7 @@ fi
 if [ $stage -le 4 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
   affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
   tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
   linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"

diff --git a/egs/snips/v1/run.sh b/egs/snips/v1/run.sh
@@ -162,11 +162,6 @@ if [ $stage -le 8 ]; then
   (
     utils/mkgraph.sh data/lang \
       exp/mono exp/mono/graph
-    for test in dev eval; do
-      steps/decode.sh --nj 20 --cmd "$decode_cmd" \
-        --scoring-opts "--wake-word HeySnips" \
-        exp/mono/graph data/$test exp/mono/decode_$test
-    done
   )&
 
   steps/align_si.sh --nj 20 --cmd "$train_cmd" \