Skip to content

Commit

Permalink
Merge branch 'discophone' of https://github.com/pzelasko/kaldi into p…
Browse files Browse the repository at this point in the history
…zelasko-discophone
  • Loading branch information
syfengcuhk committed Sep 29, 2020
2 parents 30f9e17 + feb6d3e commit 684e643
Show file tree
Hide file tree
Showing 206 changed files with 58,624 additions and 0 deletions.
9 changes: 9 additions & 0 deletions egs/babel/s5d/conf/corpora_paths.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
BABEL_ROOT="/export/corpora5/Babel"
CANTONESE_ROOT="${BABEL_ROOT}/IARPA_BABEL_BP_101"
BENGALI_ROOT="${BABEL_ROOT}/BABEL_OP1_103"
VIETNAMESE_ROOT="${BABEL_ROOT}/BABEL_BP_107"
LAO_ROOT="${BABEL_ROOT}/IARPA_Babel_203"
ZULU_ROOT="${BABEL_ROOT}/IARPA_BABEL_OP1_206"
AMHARIC_ROOT="${BABEL_ROOT}/IARPA-babel307b-v1.0b-build"
JAVANESE_ROOT="${BABEL_ROOT}/IARPA-babel402b-v1.0b-build"
GEORGIAN_ROOT="${BABEL_ROOT}/IARPA-babel404b-v1.0a-build"
30 changes: 30 additions & 0 deletions egs/discophone/v1/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export train_cmd=run.pl
export decode_cmd=run.pl
export cuda_cmd=run.pl
export mkgraph_cmd=run.pl

if [ "$(hostname -d)" == "clsp.jhu.edu" ]; then
export train_cmd=queue.pl
export decode_cmd="queue.pl --mem 2G"
# the use of cuda_cmd is deprecated, used only in 'nnet1',
export cuda_cmd="queue.pl --gpu 1"
fi

if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then
queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
fi
124 changes: 124 additions & 0 deletions egs/discophone/v1/conf/common.fullLP
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# BNF training parameters
bnf_num_hidden_layers=6
bottleneck_dim=42
bnf_hidden_layer_dim=2048
bnf_minibatch_size=512
bnf_init_learning_rate=0.008
bnf_final_learning_rate=0.0008
bnf_max_change=40
bnf_num_jobs=4
bnf_num_threads=1
bnf_mixup=10000
bnf_mpe_learning_rate=0.00009
bnf_mpe_last_layer_factor=0.1
bnf_num_gauss_ubm=550 # use fewer UBM Gaussians than the
# non-bottleneck system (which has 800)
bnf_num_gauss_sgmm=50000 # use fewer SGMM sub-states than the
# non-bottleneck system (which has 80000).
bnf_decode_acwt=0.066666


# DNN hybrid system training parameters
dnn_num_hidden_layers=4
dnn_input_dim=4000
dnn_output_dim=400
dnn_init_learning_rate=0.008
dnn_final_learning_rate=0.0008
dnn_mixup=12000

dnn_mpe_learning_rate=0.00008
dnn_mpe_last_layer_factor=0.1
dnn_mpe_retroactive=true

bnf_every_nth_frame=2 # take every 2nd frame.
babel_type=full

use_pitch=true

lmwt_plp_extra_opts=( --min-lmwt 9 --max-lmwt 13 )
lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 )
lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 )
lmwt_chain_extra_opts=( --min-lmwt 9 --max-lmwt 13 )

dnn_beam=16.0
dnn_lat_beam=8.5

icu_opt=(--use-icu true --icu-transform Any-Lower)

if [[ `hostname` == *.tacc.utexas.edu ]] ; then
decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" )
sgmm_train_extra_opts=( )
sgmm_group_extra_opts=( --num_iters 25 )
sgmm_denlats_extra_opts=( --num-threads 2 )
sgmm_mmi_extra_opts=(--cmd "local/lonestar.py -pe smp 2")
dnn_denlats_extra_opts=( --num-threads 2 )

dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "-pe smp 16" )
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1)

dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
dnn_parallel_opts="-l gpu=1"
else
decode_extra_opts=(--num-threads 6 --parallel-opts "--num-threads 6 --mem 4G")
sgmm_train_extra_opts=( --num-iters 25 )
sgmm_group_extra_opts=(--group 3 --parallel-opts "--num-threads 7 --mem 6G")
sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4" )
sgmm_mmi_extra_opts=()
dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4")

dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "--num-threads 16")
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1 \
--parallel-opts "--gpu 1" )
dnn_parallel_opts="--gpu 1"
dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1 \
--parallel-opts "--gpu 1")
fi

icu_transform="Any-Lower"
case_insensitive=true


max_states=150000
wip=0.5


phoneme_mapping=

minimize=true

proxy_phone_beam=-1
proxy_phone_nbest=-1
proxy_beam=5
proxy_nbest=500

extlex_proxy_phone_beam=5
extlex_proxy_phone_nbest=300
extlex_proxy_beam=-1
extlex_proxy_nbest=-1


# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
numLeavesTri2=1000
numGaussTri2=20000
numLeavesTri3=6000
numGaussTri3=75000
numLeavesMLLT=6000
numGaussMLLT=75000
numLeavesSAT=6000
numGaussSAT=75000
numGaussUBM=800
numLeavesSGMM=10000
numGaussSGMM=80000

# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"

#keyword search settings
duptime=0.5
case_insensitive=true
29 changes: 29 additions & 0 deletions egs/discophone/v1/conf/common_vars.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#keyword search default
glmFile=conf/glm
duptime=0.5
case_insensitive=false
use_pitch=true
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="-oov <unk>"
boost_sil=1.5 # note from Dan: I expect 1.0 might be better (equivalent to not
# having the option)... should test.
cer=0

#Declaring here to make the definition inside the language conf files more
# transparent and nice
declare -A train_kwlists
declare -A dev10h_kwlists
declare -A dev2h_kwlists
declare -A evalpart1_kwlists
declare -A eval_kwlists
declare -A shadow_kwlists

# just for back-compatibility
declare -A dev10h_more_kwlists
declare -A dev2h_more_kwlists
declare -A evalpart1_more_kwlists
declare -A eval_more_kwlists
declare -A shadow_more_kwlists
[ -f ./path.sh ] && . ./path.sh # source the path.
[ -f ./cmd.sh ] && . ./cmd.sh # source train and decode cmds.
9 changes: 9 additions & 0 deletions egs/discophone/v1/conf/corpora_paths.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
BABEL_ROOT="/export/corpora5/Babel"
CANTONESE_ROOT="${BABEL_ROOT}/IARPA_BABEL_BP_101"
BENGALI_ROOT="${BABEL_ROOT}/BABEL_OP1_103"
VIETNAMESE_ROOT="${BABEL_ROOT}/BABEL_BP_107"
LAO_ROOT="${BABEL_ROOT}/IARPA_Babel_203"
ZULU_ROOT="${BABEL_ROOT}/IARPA_BABEL_OP1_206"
AMHARIC_ROOT="${BABEL_ROOT}/IARPA-babel307b-v1.0b-build/BABEL_OP3_307"
JAVANESE_ROOT="${BABEL_ROOT}/IARPA-babel402b-v1.0b-build/BABEL_OP3_402"
GEORGIAN_ROOT="${BABEL_ROOT}/IARPA-babel404b-v1.0a-build/BABEL_OP3_404"
2 changes: 2 additions & 0 deletions egs/discophone/v1/conf/fbank.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--sample-frequency=16000
--num-mel-bins=80
13 changes: 13 additions & 0 deletions egs/discophone/v1/conf/glm
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
;;
;; File: ma970904.glm
;; Desc: This file contains the transcript filtering rules for the ARPA
;; Mandarin Hub5-NE Evaluation.
;;
;; Date: 970904
;; - initial creation
;;
;; Hesitation mappings
<hes> => %HESITATION / [ ] __ [ ]
<v-noise> => %HESITATION / [ ] __ [ ]
<noise> => %HESITATION / [ ] __ [ ]

Loading

0 comments on commit 684e643

Please sign in to comment.