From 53baf84ba1c3ef0a2065d203d09a800cb2ab9005 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 29 Jul 2015 19:44:38 -0400 Subject: [PATCH] Some new functionality in nnet3; including drafts of example scripts --- .gitignore | 2 + egs/wsj/s5/local/nnet3/run_ivector_common.sh | 84 ++++++++++++ egs/wsj/s5/local/nnet3/run_tdnn.sh | 68 ++++++++++ egs/wsj/s5/run.sh | 1 - egs/wsj/s5/steps/nnet3/train_tdnn.sh | 43 +++--- src/nnet3bin/Makefile | 3 +- ...et-lda-stats.cc => nnet3-acc-lda-stats.cc} | 8 +- src/nnet3bin/nnet3-compute-from-egs.cc | 125 +++++++++++++++++ src/nnet3bin/nnet3-copy-egs.cc | 4 +- src/nnet3bin/nnet3-merge-egs.cc | 127 ++++++++++++++++++ 10 files changed, 435 insertions(+), 30 deletions(-) create mode 100755 egs/wsj/s5/local/nnet3/run_ivector_common.sh create mode 100755 egs/wsj/s5/local/nnet3/run_tdnn.sh rename src/nnet3bin/{nnet3-get-lda-stats.cc => nnet3-acc-lda-stats.cc} (97%) create mode 100644 src/nnet3bin/nnet3-compute-from-egs.cc create mode 100644 src/nnet3bin/nnet3-merge-egs.cc diff --git a/.gitignore b/.gitignore index e1becd8ae6f..0fe7c4ff1a9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ # emacs saves [#]*[#] +.[#]* *~ # .depend.mk files .depend.mk @@ -59,6 +60,7 @@ /tools/CLAPACK_include /tools/kaldi_lm /tools/env.sh +/tools/rnnlm-hs-0.1b/rnnlm # /src/ /src/kaldi.mk diff --git a/egs/wsj/s5/local/nnet3/run_ivector_common.sh b/egs/wsj/s5/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..e98213baa2e --- /dev/null +++ b/egs/wsj/s5/local/nnet3/run_ivector_common.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +# this script is called from scripts like run_ms.sh; it does the common stages +# of the build, such as feature extraction. +# This is actually the same as local/online/run_nnet2_common.sh, except +# for the directory names. + +. cmd.sh +mfccdir=mfcc + +stage=1 + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if [ $stage -le 1 ]; then + for datadir in train_si284 test_eval93 test_dev93 test_eval92; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; + done + utils/subset_data_dir.sh --first data/train_si284_hires 7138 data/train_si84_hires || exit 1 +fi + +1 +if [ $stage -le 2 ]; then + # We need to build a small system just because we need the LDA+MLLT transform + # to train the diag-UBM on top of. We align the si84 data for this purpose. + + steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \ + data/train_si84 data/lang exp/tri4b exp/nnet3/tri4b_ali_si84 +fi + +if [ $stage -le 3 ]; then + # Train a small system just for its LDA+MLLT transform. We use --num-iters 13 + # because after we get the transform (12th iter is the last), any further + # training is pointless. + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ + --realign-iters "" \ + --splice-opts "--left-context=3 --right-context=3" \ + 5000 10000 data/train_si84_hires data/lang \ + exp/nnet3/tri4b_ali_si84 exp/nnet3/tri5b +fi + +if [ $stage -le 4 ]; then + mkdir -p exp/nnet3 + + steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \ + --num-frames 400000 data/train_si84_hires 256 exp/nnet3/tri5b exp/nnet3/diag_ubm +fi + +if [ $stage -le 5 ]; then + # even though $nj is just 10, each job uses multiple processes and threads. + steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ + data/train_si284_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; +fi + +if [ $stage -le 6 ]; then + # We extract iVectors on all the train_si284 data, which will be what we + # train the system on. + + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_si284_hires \ + data/train_si284_hires_max2 + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ + data/train_si284_hires_max2 exp/nnet3/extractor exp/nnet3/ivectors_train_si284 || exit 1; +fi + +if [ $stage -le 7 ]; then + rm exp/nnet3/.error 2>/dev/null + for data in test_eval92 test_dev93 test_eval93; do + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 8 \ + data/${data}_hires exp/nnet3/extractor exp/nnet3/ivectors_${data} || touch exp/nnet3/.error & + done + wait + [ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1; +fi + +exit 0; diff --git a/egs/wsj/s5/local/nnet3/run_tdnn.sh b/egs/wsj/s5/local/nnet3/run_tdnn.sh new file mode 100755 index 00000000000..95bc524a497 --- /dev/null +++ b/egs/wsj/s5/local/nnet3/run_tdnn.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# this is the standard "tdnn" system, built in nnet3; it's what we use to +# call multi-splice. + +. cmd.sh + + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +dir=exp/nnet3/nnet_ms_a +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat </....layer/ " # note: hidden layers which are composed of one or more components, # so hidden layer indexing is different from component count @@ -62,7 +62,7 @@ randprune=4.0 # speeds up LDA. affine_opts= gpu=true # if true, we run on GPU. -cpu_num_threads=16 # if using CPU, the number of threads we use. +num_threads=16 # if using CPU, the number of threads we use. combine_num_threads=8 # number of threads for the "combine" operation cleanup=true egs_dir= @@ -257,7 +257,7 @@ if [ $stage -le -3 ]; then # Write stats with the same format as stats for LDA. $cmd JOB=1:$num_lda_jobs $dir/log/get_lda_stats.JOB.log \ - nnet3-get-lda-stats --rand-prune=$rand_prune \ + nnet3-acc-lda-stats --rand-prune=$rand_prune \ $dir/init.raw $egs_dir/egs.JOB.ark $dir/JOB.lda_stats || exit 1; all_lda_accs=$(for n in $(seq $num_lda_jobs); do echo $dir/$n.lda_stats; done) @@ -327,25 +327,25 @@ finish_add_layers_iter=$[$num_hidden_layers * $add_layers_period] echo "$0: Will train for $num_epochs epochs = $num_iters iterations" -if [ $num_threads -eq 1 ]; then +if $gpu; then parallel_suffix="-simple" # this enables us to use GPU code if # we have just one thread. + train_queue_opt="--gpu 1" parallel_train_opts= - if !$gpu; then - train_gpu_opt="--gpu 1" - if ! cuda-compiled; then - echo "$0: WARNING: you are running with one thread but you have not compiled" - echo " for CUDA. You may be running a setup optimized for GPUs. If you have" - echo " GPUs and have nvcc installed, go to src/ and do ./configure; make" - exit 1 - fi - else - echo "$0: WARNING: running with 1 thread and no GPU: this will be slow." + if ! cuda-compiled; then + echo "$0: WARNING: you are running with one thread but you have not compiled" + echo " for CUDA. You may be running a setup optimized for GPUs. If you have" + echo " GPUs and have nvcc installed, go to src/ and do ./configure; make" + exit 1 fi else - $gpu && echo "$0: you must use --gpu false if you supply num-threads > 1" && exit 1; - parallel_suffix="-parallel" - parallel_train_opts="--num-threads=$num_threads" + if [ $num_threads -gt 1 ]; then + parallel_suffix="-parallel" + parallel_train_opts="--num-threads=$num_threads" + train_queue_opt="--num-threads $num_threads" + else + parallel_suffix="-simple" + fi fi @@ -367,7 +367,6 @@ first_model_combine=$[$num_iters-$num_iters_combine+1] x=0 - for realign_time in $realign_times; do # Work out the iterations on which we will re-align, if the --realign-times # option was used. This is slightly approximate. @@ -407,7 +406,8 @@ while [ $x -lt $num_iters ]; do $cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \ nnet3-copy-egs --srand=JOB --frame=random $context_opts ark:$prev_egs_dir/egs.1.ark ark:- \| \ nnet3-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \ - nnet3-compute-from-egs "nnet3-to-raw $dir/$x.mdl -|" ark:- ark:- \| \ + nnet3-merge-egs ark:- ark:- \| \ + nnet3-compute-from-egs --apply-exp "nnet3-to-raw $dir/$x.mdl -|" ark:- ark:- \| \ matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1; sleep 3; # make sure there is time for $dir/post.$x.*.vec to appear. @@ -499,7 +499,7 @@ while [ $x -lt $num_iters ]; do # same archive with different frame indexes will give similar gradients, # so we want to separate them in time. - $cmd $train_gpu_opt $dir/log/train.$x.$n.log \ + $cmd $train_queue_opt $dir/log/train.$x.$n.log \ nnet3-train$parallel_suffix $parallel_train_opts --minibatch-size=$this_minibatch_size --srand=$x "$raw" \ "ark:nnet3-copy-egs --frame=$frame $context_opts ark:$cur_egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --minibatch-size=$this_minibatch_size ark:- ark:- |" \ $dir/$[$x+1].$n.raw || touch $dir/.error & @@ -600,7 +600,8 @@ if [ $stage -le $[$num_iters+1] ]; then $cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \ nnet3-copy-egs --frame=random $context_opts --srand=JOB ark:$cur_egs_dir/egs.1.ark ark:- \| \ nnet3-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \ - nnet3-compute-from-egs "nnet3-am-copy --raw=true $dir/final.mdl -|" ark:- ark:- \| \ + nnet3-merge-egs ark:- ark:- \| \ + nnet3-compute-from-egs --apply-exp=true "nnet3-am-copy --raw=true $dir/final.mdl -|" ark:- ark:- \| \ matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1; sleep 3; # make sure there is time for $dir/post.$x.*.vec to appear. diff --git a/src/nnet3bin/Makefile b/src/nnet3bin/Makefile index 1234306b753..8a3ba2d67af 100644 --- a/src/nnet3bin/Makefile +++ b/src/nnet3bin/Makefile @@ -7,7 +7,8 @@ LDFLAGS += $(CUDA_LDFLAGS) LDLIBS += $(CUDA_LDLIBS) BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \ - nnet3-shuffle-egs nnet3-get-lda-stats + nnet3-shuffle-egs nnet3-acc-lda-stats nnet3-merge-egs \ + nnet3-compute-from-egs OBJFILES = diff --git a/src/nnet3bin/nnet3-get-lda-stats.cc b/src/nnet3bin/nnet3-acc-lda-stats.cc similarity index 97% rename from src/nnet3bin/nnet3-get-lda-stats.cc rename to src/nnet3bin/nnet3-acc-lda-stats.cc index 409a0e8c926..b9b222fdec5 100644 --- a/src/nnet3bin/nnet3-get-lda-stats.cc +++ b/src/nnet3bin/nnet3-acc-lda-stats.cc @@ -1,4 +1,4 @@ -// nnet3bin/nnet3-get-lda-stats.cc +// nnet3bin/nnet3-acc-lda-stats.cc // Copyright 2015 Johns Hopkins University (author: Daniel Povey) @@ -125,16 +125,14 @@ int main(int argc, char *argv[]) { "training examples is used for the class labels. Used in obtaining\n" "feature transforms that help nnet training work better.\n" "\n" - "Usage: nnet3-get-lda-stats [options] \n" + "Usage: nnet3-acc-lda-stats [options] \n" "e.g.:\n" - "nnet3-get-lda-stats 0.raw ark:1.egs 1.acc\n" + "nnet3-acc-lda-stats 0.raw ark:1.egs 1.acc\n" "See also: nnet-get-feature-transform\n"; bool binary_write = true; BaseFloat rand_prune = 0.0; - LdaEstimate lda; - ParseOptions po(usage); po.Register("binary", &binary_write, "Write output in binary mode"); po.Register("rand-prune", &rand_prune, diff --git a/src/nnet3bin/nnet3-compute-from-egs.cc b/src/nnet3bin/nnet3-compute-from-egs.cc new file mode 100644 index 00000000000..35ecb6b0b8a --- /dev/null +++ b/src/nnet3bin/nnet3-compute-from-egs.cc @@ -0,0 +1,125 @@ +// nnet3bin/nnet3-compute-from-egs.cc + +// Copyright 2015 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "hmm/transition-model.h" +#include "nnet3/nnet-nnet.h" +#include "nnet3/nnet-example-utils.h" +#include "nnet3/nnet-optimize.h" +#include "transform/lda-estimate.h" + + +namespace kaldi { +namespace nnet3 { + +class NnetComputerFromEg { + public: + NnetComputerFromEg(const Nnet &nnet): + nnet_(nnet), compiler_(nnet) { } + + // Compute the output (which will have the same number of rows as the number + // of Indexes in the output of the eg), and put it in "output". + void Compute(const NnetExample &eg, Matrix *output) { + ComputationRequest request; + bool need_backprop = false, store_stats = false; + GetComputationRequest(nnet_, eg, need_backprop, store_stats, &request); + const NnetComputation &computation = *(compiler_.Compile(request)); + NnetComputeOptions options; + if (GetVerboseLevel() >= 3) + options.debug = true; + NnetComputer computer(options, computation, nnet_, NULL); + computer.AcceptInputs(nnet_, eg); + computer.Forward(); + const CuMatrixBase &nnet_output = computer.GetOutput("output"); + output->Resize(nnet_output.NumRows(), nnet_output.NumCols()); + nnet_output.CopyToMat(output); + } + private: + const Nnet &nnet_; + CachingOptimizingCompiler compiler_; + +}; + +} +} + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace kaldi::nnet3; + typedef kaldi::int32 int32; + typedef kaldi::int64 int64; + + const char *usage = + "Read input nnet training examples, and compute the output for each one.\n" + "If --apply-exp=true, apply the Exp() function to the output before writing\n" + "it out.\n" + "\n" + "Usage: nnet3-compute-from-egs [options] \n" + "e.g.:\n" + "nnet3-compute-from-egs --apply-exp=true 0.raw ark:1.egs ark:- | matrix-sum-rows ark:- ... \n" + "See also: nnet3-compute\n"; + + bool binary_write = true, + apply_exp = false; + + ParseOptions po(usage); + po.Register("binary", &binary_write, "Write output in binary mode"); + po.Register("apply-exp", &apply_exp, "If true, apply exp function to " + "output"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string nnet_rxfilename = po.GetArg(1), + examples_rspecifier = po.GetArg(2), + matrix_wspecifier = po.GetArg(3); + + Nnet nnet; + ReadKaldiObject(nnet_rxfilename, &nnet); + + NnetComputerFromEg computer(nnet); + + int64 num_egs = 0; + + SequentialNnetExampleReader example_reader(examples_rspecifier); + BaseFloatMatrixWriter matrix_writer(matrix_wspecifier); + + for (; !example_reader.Done(); example_reader.Next(), num_egs++) { + Matrix output; + computer.Compute(example_reader.Value(), &output); + KALDI_ASSERT(output.NumRows() != 0); + if (apply_exp) + output.ApplyExp(); + matrix_writer.Write(example_reader.Key(), output); + } + KALDI_LOG << "Processed " << num_egs << " examples."; + return 0; + } catch(const std::exception &e) { + std::cerr << e.what() << '\n'; + return -1; + } +} + + diff --git a/src/nnet3bin/nnet3-copy-egs.cc b/src/nnet3bin/nnet3-copy-egs.cc index daa83789868..e521e3a5498 100644 --- a/src/nnet3bin/nnet3-copy-egs.cc +++ b/src/nnet3bin/nnet3-copy-egs.cc @@ -253,9 +253,9 @@ int main(int argc, char *argv[]) { "Usage: nnet3-copy-egs [options] [ ...]\n" "\n" "e.g.\n" - "nnet-copy-egs ark:train.egs ark,t:text.egs\n" + "nnet3-copy-egs ark:train.egs ark,t:text.egs\n" "or:\n" - "nnet-copy-egs ark:train.egs ark:1.egs ark:2.egs\n"; + "nnet3-copy-egs ark:train.egs ark:1.egs ark:2.egs\n"; bool random = false; int32 srand_seed = 0; diff --git a/src/nnet3bin/nnet3-merge-egs.cc b/src/nnet3bin/nnet3-merge-egs.cc new file mode 100644 index 00000000000..352f67a7b70 --- /dev/null +++ b/src/nnet3bin/nnet3-merge-egs.cc @@ -0,0 +1,127 @@ +// nnet3bin/nnet3-merge-egs.cc + +// Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey) +// 2014 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "hmm/transition-model.h" +#include "nnet3/nnet-example.h" +#include "nnet3/nnet-example-utils.h" + +namespace kaldi { +namespace nnet3 { +// returns the number of indexes/frames in the NnetIo named "output" in the eg, +// or crashes if it is not there. +int32 NumOutputIndexes(const NnetExample &eg) { + for (size_t i = 0; i < eg.io.size(); i++) + if (eg.io[i].name == "output") + return eg.io[i].indexes.size(); + KALDI_ERR << "No output named 'output' in the eg."; + return 0; // Suppress compiler warning. +} + +} +} + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace kaldi::nnet3; + typedef kaldi::int32 int32; + typedef kaldi::int64 int64; + + const char *usage = + "This copies nnet training examples from input to output, but while doing so it\n" + "merges many NnetExample objects into one, forming a minibatch consisting of a\n" + "single NnetExample. Note: if --measure-output-frames=true, which it is by default,\n" + "the --minibatch-size option will be interpreted as a target number of output frames;\n" + "otherwise as a number of input examples to combine. This makes a difference\n" + "if the input examples have multiple supervised frames in them.\n" + "\n" + "Usage: nnet3-merge-egs [options] \n" + "e.g.\n" + "nnet3-merge-egs --minibatch-size=512 ark:1.egs ark:- | nnet3-train-simple ... \n" + "See also nnet-copy-egs\n"; + + bool compress = false; + int32 minibatch_size = 512; + bool measure_output_frames = true; + + ParseOptions po(usage); + po.Register("minibatch-size", &minibatch_size, "Target size of minibatches " + "when merging (see also --measure-output-frames)"); + po.Register("measure-output-frames", &measure_output_frames, "If true, " + "--minibatch-size is a target number of total output frames; if " + "false, --minibatch-size is the number of input examples to " + "merge."); + po.Register("compress", &compress, "If true, compress the output examples " + "(not recommended unless you are writing to disk"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string examples_rspecifier = po.GetArg(1), + examples_wspecifier = po.GetArg(2); + + SequentialNnetExampleReader example_reader(examples_rspecifier); + NnetExampleWriter example_writer(examples_wspecifier); + + std::vector examples; + examples.reserve(minibatch_size); + + int32 cur_num_output_frames = 0; + + int64 num_read = 0, num_written = 0; + while (!example_reader.Done()) { + const NnetExample &cur_eg = example_reader.Value(); + examples.resize(examples.size() + 1); + examples.back() = cur_eg; + cur_num_output_frames += NumOutputIndexes(cur_eg); + bool minibatch_ready = + (measure_output_frames ? + cur_num_output_frames >= minibatch_size : + static_cast(examples.size()) >= minibatch_size); + + // Do Next() now, so we can test example_reader.Done() below . + example_reader.Next(); + num_read++; + + if (minibatch_ready || (example_reader.Done() && !examples.empty())) { + NnetExample merged_eg; + MergeExamples(examples, compress, &merged_eg); + std::ostringstream ostr; + ostr << "merged-" << num_written; + num_written++; + std::string output_key = ostr.str(); + example_writer.Write(output_key, merged_eg); + } + } + KALDI_LOG << "Merged " << num_read << " egs to " << num_written << '.'; + return (num_written != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what() << '\n'; + return -1; + } +} + +