kaldi-asr · KarelVesely84 · Jun 16, 2015 · Jun 16, 2015 · Jun 23, 2015 · Jun 23, 2015
diff --git a/egs/ami/s5/cmd.sh b/egs/ami/s5/cmd.sh
@@ -15,4 +15,16 @@
 export train_cmd=run.pl
 export decode_cmd=run.pl
 export highmem_cmd=run.pl
-export cuda_cmd=run.pl
+export cuda_cmd=run.pl
+
+host=$(hostname -f)
+if [ ${host#*.} == "fit.vutbr.cz" ]; then
+  # BUT cluster:
+  queue="all.q@@blade,all.q@@speech"
+  gpu_queue="long.q@supergpu*,long.q@dellgpu*,long.q@pcspeech-gpu,long.q@pcgpu*"
+  storage="matylda5"
+  export train_cmd="queue.pl -q $queue -l ram_free=1500M,mem_free=1500M,${storage}=1"
+  export decode_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,${storage}=0.5"
+  export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1"
+fi 
+
diff --git a/egs/ami/s5/conf/decode_dnn.conf b/egs/ami/s5/conf/decode_dnn.conf
@@ -0,0 +1,2 @@
+beam=13.0 # beam for decoding.  Was 13.0 in the scripts.
+lattice_beam=8.0 # this has most effect on size of the lattices.
diff --git a/egs/ami/s5/conf/decode_tandem.conf b/egs/ami/s5/conf/decode_tandem.conf
@@ -0,0 +1,3 @@
+beam=16.0
+lattice_beam=8.0
+scoring_opts="--min-lmwt 20 --max-lmwt 30"
diff --git a/egs/ami/s5/local/ami_train_lms.sh b/egs/ami/s5/local/ami_train_lms.sh
@@ -98,14 +98,14 @@ if [ ! -z "$swbd" ]; then
 fi
 
 if [ ! -z "$fisher" ]; then
-  [ ! -d "$fisher/part1/data/trans" ] \
+  [ ! -d "$fisher/data/trans" ] \
     && echo "Cannot find transcripts in Fisher directory: '$fisher'" \
     && exit 1;
   mkdir -p $dir/fisher
 
   find $fisher -follow -path '*/trans/*fe*.txt' -exec cat {} \; | grep -v ^# | grep -v ^$ \
     | cut -d' ' -f4- | gzip -c > $dir/fisher/text0.gz
-  gunzip -c $dir/fisher/text0.gz | fisher_map_words.pl \
+  gunzip -c $dir/fisher/text0.gz | local/fisher_map_words.pl \
     | gzip -c > $dir/fisher/text1.gz
   ngram-count -debug 0 -text $dir/fisher/text1.gz -order $order -limit-vocab \
     -vocab $dir/wordlist -unk -map-unk "<unk>" -kndiscount -interpolate \

diff --git a/egs/ami/s5/local/nnet/run_dnn.sh b/egs/ami/s5/local/nnet/run_dnn.sh
@@ -0,0 +1,117 @@
+#!/bin/bash -u
+
+. ./cmd.sh
+. ./path.sh
+
+# DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
+# Shinji Watanabe, Karel Vesely,
+
+# Config:
+nj=80
+nj_decode=30
+stage=0 # resume training with --stage=N
+. utils/parse_options.sh || exit 1;
+#
+
+if [ $# -ne 1 ]; then
+  printf "\nUSAGE: %s <mic condition(ihm|sdm|mdm)>\n\n" `basename $0`
+  exit 1;
+fi
+mic=$1
+
+gmmdir=exp/$mic/tri4a
+data_fmllr=data-fmllr-tri4
+
+final_lm=`cat data/local/lm/final_lm`
+LM=$final_lm.pr1-7
+graph_dir=exp/$mic/tri4a/graph_${LM}
+
+# Set bash to 'debug' mode, it will exit on : 
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+set -x
+
+# Store fMLLR features, so we can train on them easily,
+if [ $stage -le 0 ]; then
+  # eval
+  dir=$data_fmllr/$mic/eval
+  steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
+     --transform-dir $gmmdir/decode_eval_${LM} \
+     $dir data/$mic/eval $gmmdir $dir/log $dir/data
+  # dev
+  dir=$data_fmllr/$mic/dev
+  steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
+     --transform-dir $gmmdir/decode_dev_${LM} \
+     $dir data/$mic/dev $gmmdir $dir/log $dir/data
+  # train
+  dir=$data_fmllr/$mic/train
+  steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
+     --transform-dir ${gmmdir}_ali \
+     $dir data/$mic/train $gmmdir $dir/log $dir/data
+  # split the data : 90% train 10% cross-validation (held-out)
+  utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10
+fi
+
+# Pre-train DBN, i.e. a stack of RBMs,
+if [ $stage -le 1 ]; then
+  dir=exp/$mic/dnn4_pretrain-dbn
+  $cuda_cmd $dir/log/pretrain_dbn.log \
+    steps/nnet/pretrain_dbn.sh --rbm-iter 1 $data_fmllr/$mic/train $dir
+fi
+
+# Train the DNN optimizing per-frame cross-entropy,
+if [ $stage -le 2 ]; then
+  dir=exp/$mic/dnn4_pretrain-dbn_dnn
+  ali=${gmmdir}_ali
+  feature_transform=exp/$mic/dnn4_pretrain-dbn/final.feature_transform
+  dbn=exp/$mic/dnn4_pretrain-dbn/6.dbn
+  # Train
+  $cuda_cmd $dir/log/train_nnet.log \
+    steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
+    $data_fmllr/$mic/train_tr90 $data_fmllr/$mic/train_cv10 data/lang $ali $ali $dir
+  # Decode (reuse HCLG graph)
+  steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
+    --num-threads 3 \
+    $graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${LM}
+  steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
+    --num-threads 3 \
+    $graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${LM}
+fi
+
+
+# Sequence training using sMBR criterion, we do Stochastic-GD with 
+# per-utterance updates. We use usually good acwt 0.1.
+# Lattices are not regenerated (it is faster).
+
+dir=exp/$mic/dnn4_pretrain-dbn_dnn_smbr
+srcdir=exp/$mic/dnn4_pretrain-dbn_dnn
+acwt=0.1
+
+# Generate lattices and alignments,
+if [ $stage -le 3 ]; then
+  steps/nnet/align.sh --nj $nj --cmd "$train_cmd" \
+    $data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali
+  steps/nnet/make_denlats.sh --nj $nj --cmd "$decode_cmd" --config conf/decode_dnn.conf \
+    --acwt $acwt $data_fmllr/$mic/train data/lang $srcdir ${srcdir}_denlats
+fi
+
+# Re-train the DNN by 4 epochs of sMBR,
+if [ $stage -le 4 ]; then
+  steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \
+    $data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir
+  # Decode (reuse HCLG graph)
+  for ITER in 4 3 2 1; do
+    steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf \
+      --num-threads 3 --nnet $dir/${ITER}.nnet --acwt $acwt \
+      $graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${LM}
+    steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf \
+      --num-threads 3 --nnet $dir/${ITER}.nnet --acwt $acwt \
+      $graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${LM}
+  done
+fi
+
+# Getting results [see RESULTS file]
+# for x in exp/$mic/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
+
diff --git a/egs/ami/s5/local/remove_dup_utts.sh b/egs/ami/s5/local/remove_dup_utts.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Remove excess utterances once they appear  more than a specified
+# number of times with the same transcription, in a data set.
+# E.g. useful for removing excess "uh-huh" from training.
+
+if [ $# != 3 ]; then
+  echo "Usage: remove_dup_utts.sh max-count src-data-dir dest-data-dir"
+  exit 1;
+fi
+
+maxcount=$1
+srcdir=$2
+destdir=$3
+mkdir -p $destdir
+
+[ ! -f $srcdir/text ] && echo "Invalid input directory $srcdir" && exit 1;
+
+cp $srcdir/* $destdir
+cat $srcdir/text | \
+  perl -e '
+  $maxcount = shift @ARGV; 
+  @all = ();
+   $p1 = 103349; $p2 = 71147; $k = 0;
+   sub random { # our own random number generator: predictable.
+     $k = ($k + $p1) % $p2;
+     return ($k / $p2);
+  }
+  while(<>) {
+    push @all, $_;
+    @A = split(" ", $_);
+    shift @A;
+    $text = join(" ", @A);
+    $count{$text} ++;
+  }
+  foreach $line (@all) {
+    @A = split(" ", $line);
+    shift @A;
+    $text = join(" ", @A);
+    $n = $count{$text};
+    if ($n < $maxcount || random() < ($maxcount / $n)) {
+      print $line;
+    }
+  }'  $maxcount >$destdir/text 
+
+echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`"
+
+echo "Using fix_data_dir.sh to reconcile the other files."
+utils/fix_data_dir.sh $destdir
+rm -r $destdir/.backup
diff --git a/egs/ami/s5/local/run_dnn.sh b/egs/ami/s5/local/run_dnn.sh
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		beam=13.0 # beam for decoding. Was 13.0 in the scripts.
		lattice_beam=8.0 # this has most effect on size of the lattices.