Skip to content

Commit

Permalink
Some new functionality in nnet3; including drafts of example scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
danpovey committed Jul 29, 2015
1 parent e150228 commit 53baf84
Show file tree
Hide file tree
Showing 10 changed files with 435 additions and 30 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

# emacs saves
[#]*[#]
.[#]*
*~
# .depend.mk files
.depend.mk
Expand Down Expand Up @@ -59,6 +60,7 @@
/tools/CLAPACK_include
/tools/kaldi_lm
/tools/env.sh
/tools/rnnlm-hs-0.1b/rnnlm

# /src/
/src/kaldi.mk
Expand Down
84 changes: 84 additions & 0 deletions egs/wsj/s5/local/nnet3/run_ivector_common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash

# this script is called from scripts like run_ms.sh; it does the common stages
# of the build, such as feature extraction.
# This is actually the same as local/online/run_nnet2_common.sh, except
# for the directory names.

. cmd.sh
mfccdir=mfcc

stage=1

. cmd.sh
. ./path.sh
. ./utils/parse_options.sh


if [ $stage -le 1 ]; then
for datadir in train_si284 test_eval93 test_dev93 test_eval92; do
utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
done
utils/subset_data_dir.sh --first data/train_si284_hires 7138 data/train_si84_hires || exit 1
fi

1
if [ $stage -le 2 ]; then
# We need to build a small system just because we need the LDA+MLLT transform
# to train the diag-UBM on top of. We align the si84 data for this purpose.

steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
data/train_si84 data/lang exp/tri4b exp/nnet3/tri4b_ali_si84
fi

if [ $stage -le 3 ]; then
# Train a small system just for its LDA+MLLT transform. We use --num-iters 13
# because after we get the transform (12th iter is the last), any further
# training is pointless.
steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
--realign-iters "" \
--splice-opts "--left-context=3 --right-context=3" \
5000 10000 data/train_si84_hires data/lang \
exp/nnet3/tri4b_ali_si84 exp/nnet3/tri5b
fi

if [ $stage -le 4 ]; then
mkdir -p exp/nnet3

steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \
--num-frames 400000 data/train_si84_hires 256 exp/nnet3/tri5b exp/nnet3/diag_ubm
fi

if [ $stage -le 5 ]; then
# even though $nj is just 10, each job uses multiple processes and threads.
steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
data/train_si284_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1;
fi

if [ $stage -le 6 ]; then
# We extract iVectors on all the train_si284 data, which will be what we
# train the system on.

# having a larger number of speakers is helpful for generalization, and to
# handle per-utterance decoding well (iVector starts at zero).
steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_si284_hires \
data/train_si284_hires_max2

steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
data/train_si284_hires_max2 exp/nnet3/extractor exp/nnet3/ivectors_train_si284 || exit 1;
fi

if [ $stage -le 7 ]; then
rm exp/nnet3/.error 2>/dev/null
for data in test_eval92 test_dev93 test_eval93; do
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 8 \
data/${data}_hires exp/nnet3/extractor exp/nnet3/ivectors_${data} || touch exp/nnet3/.error &
done
wait
[ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1;
fi

exit 0;
68 changes: 68 additions & 0 deletions egs/wsj/s5/local/nnet3/run_tdnn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash

# this is the standard "tdnn" system, built in nnet3; it's what we use to
# call multi-splice.

. cmd.sh


# At this script level we don't support not running on GPU, as it would be painfully slow.
# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
# --num-threads 16 and --minibatch-size 128.

stage=0
train_stage=-10
dir=exp/nnet3/nnet_ms_a
. cmd.sh
. ./path.sh
. ./utils/parse_options.sh


if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi

local/online/run_ivector_common.sh --stage $stage || exit 1;

if [ $stage -le 8 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
fi


steps/nnet3/train_tdnn.sh --stage $train_stage \
--num-epochs 8 --num-jobs-initial 2 --num-jobs-final 14 \
--num-hidden-layers 4 \
--splice-indexes "-4,-3,-2,-1,0,1,2,3,4 0 -2,2 0 -4,4 0" \
--feat-type raw \
--online-ivector-dir exp/nnet3/ivectors_train_si284 \
--cmvn-opts "--norm-means=false --norm-vars=false" \
--io-opts "-tc 12" \
--initial-effective-lrate 0.005 --final-effective-lrate 0.0005 \
--cmd "$decode_cmd" \
--pnorm-input-dim 2000 \
--pnorm-output-dim 250 \
--mix-up 12000 \
data/train_si284_hires data/lang exp/tri4b_ali_si284 $dir || exit 1;
fi


if [ $stage -le 9 ]; then
# this does offline decoding that should give the same results as the real
# online decoding.
for lm_suffix in tgpr bd_tgpr; do
graph_dir=exp/tri4b/graph_${lm_suffix}
# use already-built graphs.
for year in eval92 dev93; do
steps/nnet3/decode.sh --nj 8 --cmd "$decode_cmd" \
--online-ivector-dir exp/nnet2_online/ivectors_test_$year \
$graph_dir data/test_${year}_hires $dir/decode_${lm_suffix}_${year} || exit 1;
done
done
fi

1 change: 0 additions & 1 deletion egs/wsj/s5/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ for x in test_eval92 test_eval93 test_dev93 train_si284; do
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
done


utils/subset_data_dir.sh --first data/train_si284 7138 data/train_si84 || exit 1

# Now make subset with the shortest 2k utterances from si-84.
Expand Down
43 changes: 22 additions & 21 deletions egs/wsj/s5/steps/nnet3/train_tdnn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ stage=-6
exit_stage=-100 # you can set this to terminate the training early. Exits before running this stage

# count space-separated fields in splice_indexes to get num-hidden-layers.
splice_indexes="-4,-3,-2,-1,0,1,2,3,4 0 -2,2 0 -4,4 0"
splice_indexes="-4,-3,-2,-1,0,1,2,3,4 0 -2,2 0 -4,4 0"
# Format : layer<hidden_layer>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
# note: hidden layers which are composed of one or more components,
# so hidden layer indexing is different from component count
Expand All @@ -62,7 +62,7 @@ randprune=4.0 # speeds up LDA.
affine_opts=

gpu=true # if true, we run on GPU.
cpu_num_threads=16 # if using CPU, the number of threads we use.
num_threads=16 # if using CPU, the number of threads we use.
combine_num_threads=8 # number of threads for the "combine" operation
cleanup=true
egs_dir=
Expand Down Expand Up @@ -257,7 +257,7 @@ if [ $stage -le -3 ]; then

# Write stats with the same format as stats for LDA.
$cmd JOB=1:$num_lda_jobs $dir/log/get_lda_stats.JOB.log \
nnet3-get-lda-stats --rand-prune=$rand_prune \
nnet3-acc-lda-stats --rand-prune=$rand_prune \
$dir/init.raw $egs_dir/egs.JOB.ark $dir/JOB.lda_stats || exit 1;

all_lda_accs=$(for n in $(seq $num_lda_jobs); do echo $dir/$n.lda_stats; done)
Expand Down Expand Up @@ -327,25 +327,25 @@ finish_add_layers_iter=$[$num_hidden_layers * $add_layers_period]

echo "$0: Will train for $num_epochs epochs = $num_iters iterations"

if [ $num_threads -eq 1 ]; then
if $gpu; then
parallel_suffix="-simple" # this enables us to use GPU code if
# we have just one thread.
train_queue_opt="--gpu 1"
parallel_train_opts=
if !$gpu; then
train_gpu_opt="--gpu 1"
if ! cuda-compiled; then
echo "$0: WARNING: you are running with one thread but you have not compiled"
echo " for CUDA. You may be running a setup optimized for GPUs. If you have"
echo " GPUs and have nvcc installed, go to src/ and do ./configure; make"
exit 1
fi
else
echo "$0: WARNING: running with 1 thread and no GPU: this will be slow."
if ! cuda-compiled; then
echo "$0: WARNING: you are running with one thread but you have not compiled"
echo " for CUDA. You may be running a setup optimized for GPUs. If you have"
echo " GPUs and have nvcc installed, go to src/ and do ./configure; make"
exit 1
fi
else
$gpu && echo "$0: you must use --gpu false if you supply num-threads > 1" && exit 1;
parallel_suffix="-parallel"
parallel_train_opts="--num-threads=$num_threads"
if [ $num_threads -gt 1 ]; then
parallel_suffix="-parallel"
parallel_train_opts="--num-threads=$num_threads"
train_queue_opt="--num-threads $num_threads"
else
parallel_suffix="-simple"
fi
fi


Expand All @@ -367,7 +367,6 @@ first_model_combine=$[$num_iters-$num_iters_combine+1]

x=0


for realign_time in $realign_times; do
# Work out the iterations on which we will re-align, if the --realign-times
# option was used. This is slightly approximate.
Expand Down Expand Up @@ -407,7 +406,8 @@ while [ $x -lt $num_iters ]; do
$cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \
nnet3-copy-egs --srand=JOB --frame=random $context_opts ark:$prev_egs_dir/egs.1.ark ark:- \| \
nnet3-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
nnet3-compute-from-egs "nnet3-to-raw $dir/$x.mdl -|" ark:- ark:- \| \
nnet3-merge-egs ark:- ark:- \| \
nnet3-compute-from-egs --apply-exp "nnet3-to-raw $dir/$x.mdl -|" ark:- ark:- \| \
matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1;

sleep 3; # make sure there is time for $dir/post.$x.*.vec to appear.
Expand Down Expand Up @@ -499,7 +499,7 @@ while [ $x -lt $num_iters ]; do
# same archive with different frame indexes will give similar gradients,
# so we want to separate them in time.
$cmd $train_gpu_opt $dir/log/train.$x.$n.log \
$cmd $train_queue_opt $dir/log/train.$x.$n.log \
nnet3-train$parallel_suffix $parallel_train_opts --minibatch-size=$this_minibatch_size --srand=$x "$raw" \
"ark:nnet3-copy-egs --frame=$frame $context_opts ark:$cur_egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --minibatch-size=$this_minibatch_size ark:- ark:- |" \
$dir/$[$x+1].$n.raw || touch $dir/.error &
Expand Down Expand Up @@ -600,7 +600,8 @@ if [ $stage -le $[$num_iters+1] ]; then
$cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \
nnet3-copy-egs --frame=random $context_opts --srand=JOB ark:$cur_egs_dir/egs.1.ark ark:- \| \
nnet3-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
nnet3-compute-from-egs "nnet3-am-copy --raw=true $dir/final.mdl -|" ark:- ark:- \| \
nnet3-merge-egs ark:- ark:- \| \
nnet3-compute-from-egs --apply-exp=true "nnet3-am-copy --raw=true $dir/final.mdl -|" ark:- ark:- \| \
matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1;
sleep 3; # make sure there is time for $dir/post.$x.*.vec to appear.
Expand Down
3 changes: 2 additions & 1 deletion src/nnet3bin/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ LDFLAGS += $(CUDA_LDFLAGS)
LDLIBS += $(CUDA_LDLIBS)

BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \
nnet3-shuffle-egs nnet3-get-lda-stats
nnet3-shuffle-egs nnet3-acc-lda-stats nnet3-merge-egs \
nnet3-compute-from-egs

OBJFILES =

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// nnet3bin/nnet3-get-lda-stats.cc
// nnet3bin/nnet3-acc-lda-stats.cc

// Copyright 2015 Johns Hopkins University (author: Daniel Povey)

Expand Down Expand Up @@ -125,16 +125,14 @@ int main(int argc, char *argv[]) {
"training examples is used for the class labels. Used in obtaining\n"
"feature transforms that help nnet training work better.\n"
"\n"
"Usage: nnet3-get-lda-stats [options] <raw-nnet-in> <training-examples-in> <lda-stats-out>\n"
"Usage: nnet3-acc-lda-stats [options] <raw-nnet-in> <training-examples-in> <lda-stats-out>\n"
"e.g.:\n"
"nnet3-get-lda-stats 0.raw ark:1.egs 1.acc\n"
"nnet3-acc-lda-stats 0.raw ark:1.egs 1.acc\n"
"See also: nnet-get-feature-transform\n";

bool binary_write = true;
BaseFloat rand_prune = 0.0;

LdaEstimate lda;

ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
po.Register("rand-prune", &rand_prune,
Expand Down
Loading

0 comments on commit 53baf84

Please sign in to comment.