Merge branch 'discophone' of https://github.com/pzelasko/kaldi into p…

…zelasko-discophone
syfengcuhk · Sep 29, 2020 · 684e643 · 684e643
2 parents 30f9e17 + feb6d3e
commit 684e643
Show file tree

Hide file tree

Showing 206 changed files with 58,624 additions and 0 deletions.
diff --git a/egs/babel/s5d/conf/corpora_paths.sh b/egs/babel/s5d/conf/corpora_paths.sh
@@ -0,0 +1,9 @@
+BABEL_ROOT="/export/corpora5/Babel"
+CANTONESE_ROOT="${BABEL_ROOT}/IARPA_BABEL_BP_101"
+BENGALI_ROOT="${BABEL_ROOT}/BABEL_OP1_103"
+VIETNAMESE_ROOT="${BABEL_ROOT}/BABEL_BP_107"
+LAO_ROOT="${BABEL_ROOT}/IARPA_Babel_203"
+ZULU_ROOT="${BABEL_ROOT}/IARPA_BABEL_OP1_206"
+AMHARIC_ROOT="${BABEL_ROOT}/IARPA-babel307b-v1.0b-build"
+JAVANESE_ROOT="${BABEL_ROOT}/IARPA-babel402b-v1.0b-build"
+GEORGIAN_ROOT="${BABEL_ROOT}/IARPA-babel404b-v1.0a-build"
diff --git a/egs/discophone/v1/cmd.sh b/egs/discophone/v1/cmd.sh
@@ -0,0 +1,30 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd=run.pl
+export decode_cmd=run.pl
+export cuda_cmd=run.pl
+export mkgraph_cmd=run.pl
+
+if [ "$(hostname -d)" == "clsp.jhu.edu" ]; then
+  export train_cmd=queue.pl
+  export decode_cmd="queue.pl --mem 2G"
+  # the use of cuda_cmd is deprecated, used only in 'nnet1',
+  export cuda_cmd="queue.pl --gpu 1"
+fi
+
+if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then
+  queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+  export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+  export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+  export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
diff --git a/egs/discophone/v1/conf/common.fullLP b/egs/discophone/v1/conf/common.fullLP
@@ -0,0 +1,124 @@
+# BNF training parameters
+bnf_num_hidden_layers=6
+bottleneck_dim=42
+bnf_hidden_layer_dim=2048
+bnf_minibatch_size=512
+bnf_init_learning_rate=0.008
+bnf_final_learning_rate=0.0008
+bnf_max_change=40
+bnf_num_jobs=4
+bnf_num_threads=1
+bnf_mixup=10000
+bnf_mpe_learning_rate=0.00009
+bnf_mpe_last_layer_factor=0.1
+bnf_num_gauss_ubm=550 # use fewer UBM Gaussians than the
+                      # non-bottleneck system (which has 800)
+bnf_num_gauss_sgmm=50000 # use fewer SGMM sub-states than the
+                         # non-bottleneck system (which has 80000).
+bnf_decode_acwt=0.066666
+
+
+# DNN hybrid system training parameters
+dnn_num_hidden_layers=4
+dnn_input_dim=4000
+dnn_output_dim=400
+dnn_init_learning_rate=0.008
+dnn_final_learning_rate=0.0008
+dnn_mixup=12000
+
+dnn_mpe_learning_rate=0.00008
+dnn_mpe_last_layer_factor=0.1
+dnn_mpe_retroactive=true
+
+bnf_every_nth_frame=2 # take every 2nd frame.
+babel_type=full
+
+use_pitch=true
+
+lmwt_plp_extra_opts=( --min-lmwt 9 --max-lmwt 13 )
+lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 )
+lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 )
+lmwt_chain_extra_opts=( --min-lmwt 9 --max-lmwt 13 )
+
+dnn_beam=16.0
+dnn_lat_beam=8.5
+
+icu_opt=(--use-icu true --icu-transform Any-Lower)
+
+if [[ `hostname` == *.tacc.utexas.edu ]] ; then
+  decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" )
+  sgmm_train_extra_opts=( )
+  sgmm_group_extra_opts=( --num_iters 25 )
+  sgmm_denlats_extra_opts=( --num-threads 2 )
+  sgmm_mmi_extra_opts=(--cmd "local/lonestar.py -pe smp 2")
+  dnn_denlats_extra_opts=( --num-threads 2 )
+
+  dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
+                         --parallel-opts "-pe smp 16" )
+  dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1)
+
+  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
+  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
+  dnn_parallel_opts="-l gpu=1"
+else
+  decode_extra_opts=(--num-threads 6 --parallel-opts "--num-threads 6 --mem 4G")
+  sgmm_train_extra_opts=( --num-iters 25 )
+  sgmm_group_extra_opts=(--group 3 --parallel-opts "--num-threads 7 --mem 6G")
+  sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4" )
+  sgmm_mmi_extra_opts=()
+  dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4")
+
+  dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
+                         --parallel-opts "--num-threads 16")
+  dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1 \
+                         --parallel-opts "--gpu 1" )
+  dnn_parallel_opts="--gpu 1"
+  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1 \
+                             --parallel-opts "--gpu 1")
+fi
+
+icu_transform="Any-Lower"
+case_insensitive=true
+
+
+max_states=150000
+wip=0.5
+
+
+phoneme_mapping=
+
+minimize=true
+
+proxy_phone_beam=-1
+proxy_phone_nbest=-1
+proxy_beam=5
+proxy_nbest=500
+
+extlex_proxy_phone_beam=5
+extlex_proxy_phone_nbest=300
+extlex_proxy_beam=-1
+extlex_proxy_nbest=-1
+
+
+# Acoustic model parameters
+numLeavesTri1=1000
+numGaussTri1=10000
+numLeavesTri2=1000
+numGaussTri2=20000
+numLeavesTri3=6000
+numGaussTri3=75000
+numLeavesMLLT=6000
+numGaussMLLT=75000
+numLeavesSAT=6000
+numGaussSAT=75000
+numGaussUBM=800
+numLeavesSGMM=10000
+numGaussSGMM=80000
+
+# Lexicon and Language Model parameters
+oovSymbol="<unk>"
+lexiconFlags="--oov <unk>"
+
+#keyword search settings
+duptime=0.5
+case_insensitive=true
diff --git a/egs/discophone/v1/conf/common_vars.sh b/egs/discophone/v1/conf/common_vars.sh
@@ -0,0 +1,29 @@
+#keyword search default
+glmFile=conf/glm
+duptime=0.5
+case_insensitive=false
+use_pitch=true
+# Lexicon and Language Model parameters
+oovSymbol="<unk>"
+lexiconFlags="-oov <unk>"
+boost_sil=1.5 #  note from Dan: I expect 1.0 might be better (equivalent to not
+# having the option)... should test.
+cer=0
+
+#Declaring here to make the definition inside the language conf files more
+# transparent and nice
+declare -A train_kwlists
+declare -A dev10h_kwlists
+declare -A dev2h_kwlists
+declare -A evalpart1_kwlists
+declare -A eval_kwlists
+declare -A shadow_kwlists
+
+# just for back-compatibility
+declare -A dev10h_more_kwlists
+declare -A dev2h_more_kwlists
+declare -A evalpart1_more_kwlists
+declare -A eval_more_kwlists
+declare -A shadow_more_kwlists
+[ -f ./path.sh ] && . ./path.sh # source the path.
+[ -f ./cmd.sh ] && . ./cmd.sh   # source train and decode cmds.
diff --git a/egs/discophone/v1/conf/corpora_paths.sh b/egs/discophone/v1/conf/corpora_paths.sh
@@ -0,0 +1,9 @@
+BABEL_ROOT="/export/corpora5/Babel"
+CANTONESE_ROOT="${BABEL_ROOT}/IARPA_BABEL_BP_101"
+BENGALI_ROOT="${BABEL_ROOT}/BABEL_OP1_103"
+VIETNAMESE_ROOT="${BABEL_ROOT}/BABEL_BP_107"
+LAO_ROOT="${BABEL_ROOT}/IARPA_Babel_203"
+ZULU_ROOT="${BABEL_ROOT}/IARPA_BABEL_OP1_206"
+AMHARIC_ROOT="${BABEL_ROOT}/IARPA-babel307b-v1.0b-build/BABEL_OP3_307"
+JAVANESE_ROOT="${BABEL_ROOT}/IARPA-babel402b-v1.0b-build/BABEL_OP3_402"
+GEORGIAN_ROOT="${BABEL_ROOT}/IARPA-babel404b-v1.0a-build/BABEL_OP3_404"
diff --git a/egs/discophone/v1/conf/fbank.conf b/egs/discophone/v1/conf/fbank.conf
@@ -0,0 +1,2 @@
+--sample-frequency=16000 
+--num-mel-bins=80
diff --git a/egs/discophone/v1/conf/glm b/egs/discophone/v1/conf/glm
@@ -0,0 +1,13 @@
+;;
+;;  File: ma970904.glm
+;;  Desc: This file contains the transcript filtering rules for the ARPA
+;;        Mandarin Hub5-NE Evaluation.
+;;
+;;  Date: 970904
+;;         - initial creation 
+;;
+;;  Hesitation mappings
+<hes>        => %HESITATION     / [ ] __ [ ]
+<v-noise>    => %HESITATION     / [ ] __ [ ]
+<noise>      => %HESITATION     / [ ] __ [ ]
+