Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync from official repo #11

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion egs/ami/s5/cmd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,16 @@
export train_cmd=run.pl
export decode_cmd=run.pl
export highmem_cmd=run.pl
export cuda_cmd=run.pl
export cuda_cmd=run.pl

host=$(hostname -f)
if [ ${host#*.} == "fit.vutbr.cz" ]; then
# BUT cluster:
queue="all.q@@blade,all.q@@speech"
gpu_queue="long.q@supergpu*,long.q@dellgpu*,long.q@pcspeech-gpu,long.q@pcgpu*"
storage="matylda5"
export train_cmd="queue.pl -q $queue -l ram_free=1500M,mem_free=1500M,${storage}=1"
export decode_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,${storage}=0.5"
export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1"
fi

2 changes: 2 additions & 0 deletions egs/ami/s5/conf/decode_dnn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
beam=13.0 # beam for decoding. Was 13.0 in the scripts.
lattice_beam=8.0 # this has most effect on size of the lattices.
3 changes: 3 additions & 0 deletions egs/ami/s5/conf/decode_tandem.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
beam=16.0
lattice_beam=8.0
scoring_opts="--min-lmwt 20 --max-lmwt 30"
4 changes: 2 additions & 2 deletions egs/ami/s5/local/ami_train_lms.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,14 @@ if [ ! -z "$swbd" ]; then
fi

if [ ! -z "$fisher" ]; then
[ ! -d "$fisher/part1/data/trans" ] \
[ ! -d "$fisher/data/trans" ] \
&& echo "Cannot find transcripts in Fisher directory: '$fisher'" \
&& exit 1;
mkdir -p $dir/fisher

find $fisher -follow -path '*/trans/*fe*.txt' -exec cat {} \; | grep -v ^# | grep -v ^$ \
| cut -d' ' -f4- | gzip -c > $dir/fisher/text0.gz
gunzip -c $dir/fisher/text0.gz | fisher_map_words.pl \
gunzip -c $dir/fisher/text0.gz | local/fisher_map_words.pl \
| gzip -c > $dir/fisher/text1.gz
ngram-count -debug 0 -text $dir/fisher/text1.gz -order $order -limit-vocab \
-vocab $dir/wordlist -unk -map-unk "<unk>" -kndiscount -interpolate \
Expand Down
117 changes: 117 additions & 0 deletions egs/ami/s5/local/nnet/run_dnn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/bin/bash -u

. ./cmd.sh
. ./path.sh

# DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
# Shinji Watanabe, Karel Vesely,

# Config:
nj=80
nj_decode=30
stage=0 # resume training with --stage=N
. utils/parse_options.sh || exit 1;
#

if [ $# -ne 1 ]; then
printf "\nUSAGE: %s <mic condition(ihm|sdm|mdm)>\n\n" `basename $0`
exit 1;
fi
mic=$1

gmmdir=exp/$mic/tri4a
data_fmllr=data-fmllr-tri4

final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
graph_dir=exp/$mic/tri4a/graph_${LM}

# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail
set -x

# Store fMLLR features, so we can train on them easily,
if [ $stage -le 0 ]; then
# eval
dir=$data_fmllr/$mic/eval
steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode_eval_${LM} \
$dir data/$mic/eval $gmmdir $dir/log $dir/data
# dev
dir=$data_fmllr/$mic/dev
steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode_dev_${LM} \
$dir data/$mic/dev $gmmdir $dir/log $dir/data
# train
dir=$data_fmllr/$mic/train
steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
--transform-dir ${gmmdir}_ali \
$dir data/$mic/train $gmmdir $dir/log $dir/data
# split the data : 90% train 10% cross-validation (held-out)
utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10
fi

# Pre-train DBN, i.e. a stack of RBMs,
if [ $stage -le 1 ]; then
dir=exp/$mic/dnn4_pretrain-dbn
$cuda_cmd $dir/log/pretrain_dbn.log \
steps/nnet/pretrain_dbn.sh --rbm-iter 1 $data_fmllr/$mic/train $dir
fi

# Train the DNN optimizing per-frame cross-entropy,
if [ $stage -le 2 ]; then
dir=exp/$mic/dnn4_pretrain-dbn_dnn
ali=${gmmdir}_ali
feature_transform=exp/$mic/dnn4_pretrain-dbn/final.feature_transform
dbn=exp/$mic/dnn4_pretrain-dbn/6.dbn
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
$data_fmllr/$mic/train_tr90 $data_fmllr/$mic/train_cv10 data/lang $ali $ali $dir
# Decode (reuse HCLG graph)
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
--num-threads 3 \
$graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${LM}
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
--num-threads 3 \
$graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${LM}
fi


# Sequence training using sMBR criterion, we do Stochastic-GD with
# per-utterance updates. We use usually good acwt 0.1.
# Lattices are not regenerated (it is faster).

dir=exp/$mic/dnn4_pretrain-dbn_dnn_smbr
srcdir=exp/$mic/dnn4_pretrain-dbn_dnn
acwt=0.1

# Generate lattices and alignments,
if [ $stage -le 3 ]; then
steps/nnet/align.sh --nj $nj --cmd "$train_cmd" \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali
steps/nnet/make_denlats.sh --nj $nj --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--acwt $acwt $data_fmllr/$mic/train data/lang $srcdir ${srcdir}_denlats
fi

# Re-train the DNN by 4 epochs of sMBR,
if [ $stage -le 4 ]; then
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir
# Decode (reuse HCLG graph)
for ITER in 4 3 2 1; do
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--num-threads 3 --nnet $dir/${ITER}.nnet --acwt $acwt \
$graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${LM}
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--num-threads 3 --nnet $dir/${ITER}.nnet --acwt $acwt \
$graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${LM}
done
fi

# Getting results [see RESULTS file]
# for x in exp/$mic/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done

50 changes: 50 additions & 0 deletions egs/ami/s5/local/remove_dup_utts.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

# Remove excess utterances once they appear more than a specified
# number of times with the same transcription, in a data set.
# E.g. useful for removing excess "uh-huh" from training.

if [ $# != 3 ]; then
echo "Usage: remove_dup_utts.sh max-count src-data-dir dest-data-dir"
exit 1;
fi

maxcount=$1
srcdir=$2
destdir=$3
mkdir -p $destdir

[ ! -f $srcdir/text ] && echo "Invalid input directory $srcdir" && exit 1;

cp $srcdir/* $destdir
cat $srcdir/text | \
perl -e '
$maxcount = shift @ARGV;
@all = ();
$p1 = 103349; $p2 = 71147; $k = 0;
sub random { # our own random number generator: predictable.
$k = ($k + $p1) % $p2;
return ($k / $p2);
}
while(<>) {
push @all, $_;
@A = split(" ", $_);
shift @A;
$text = join(" ", @A);
$count{$text} ++;
}
foreach $line (@all) {
@A = split(" ", $line);
shift @A;
$text = join(" ", @A);
$n = $count{$text};
if ($n < $maxcount || random() < ($maxcount / $n)) {
print $line;
}
}' $maxcount >$destdir/text

echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`"

echo "Using fix_data_dir.sh to reconcile the other files."
utils/fix_data_dir.sh $destdir
rm -r $destdir/.backup
142 changes: 0 additions & 142 deletions egs/ami/s5/local/run_dnn.sh

This file was deleted.

Loading