diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i_dp.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i_dp.sh index ea7c01b79ec..ee247135b67 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i_dp.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i_dp.sh @@ -5,13 +5,13 @@ # same as 1i but with frame level dropout # (num-params 1g:21309812 1i: 43447156) # results on sdm1 using ihm ali -#System tdnn_lstm1i_sp_bi_ihmali_ld5 -#WER on dev 37.6 36.7 -#WER on eval 40.9 39.9 -#Final train prob -0.114135 -0.118 -#Final valid prob -0.245208 -0.246 -#Final train prob (xent) -1.47648 -1.54 -#Final valid prob (xent) -2.16365 -2.10 +#System tdnn_lstm1i_sp_bi_ihmali_ld5 tdnn_lstm1i_dp_sp_bi_ihmali_ld5 +#WER on dev 37.6 36.5 +#WER on eval 40.9 39.7 +#Final train prob -0.114135 -0.124 +#Final valid prob -0.245208 -0.249 +#Final train prob (xent) -1.47648 -1.55 +#Final valid prob (xent) -2.16365 -2.11 set -e -o pipefail @@ -28,7 +28,7 @@ gmm=tri3_cleaned # the gmm for the target data ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). num_threads_ubm=32 nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned -dropout_schedule='0,0@0.20,0.5@0.50,0@0.50,0' +dropout_schedule='0,0@0.20,0.5@0.5,0@0.75,0' chunk_width=150 chunk_left_context=40 chunk_right_context=0 @@ -37,7 +37,7 @@ label_delay=5 # are just hardcoded at this level, in the commands below. train_stage=-10 tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. -tlstm_affix=1i #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1i_dp #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. common_egs_dir= # you can set this to use previously dumped egs. @@ -193,15 +193,15 @@ if [ $stage -le 15 ]; then relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults - lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 - lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 - lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false ## adding the layers for chain branch output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 6d255186cf4..9de29017d45 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -401,7 +401,7 @@ def _parse_dropout_string(num_archives_to_process, dropout_str): value_x_pair = parts[i].split('@') if len(value_x_pair) == 1: # Dropout proportion at half of training - dropout_proportion = float(value_x_pair) + dropout_proportion = float(value_x_pair[0]) num_archives = int(0.5 * num_archives_to_process) else: assert len(value_x_pair) == 2 diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py index 023eece93da..be4e93291af 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py @@ -250,7 +250,8 @@ def set_default_configs(self): 'self-repair-scale-nonlinearity' : 0.00001, 'zeroing-interval' : 20, 'zeroing-threshold' : 15.0, - 'dropout-proportion' : -1.0 # -1.0 stands for no dropout will be added + 'dropout-proportion' : -1.0 ,# -1.0 stands for no dropout will be added + 'dropout-per-frame' : 'false' } def set_derived_configs(self): @@ -285,6 +286,10 @@ def check_configs(self): self.config['dropout-proportion'] < 0.0) and self.config['dropout-proportion'] != -1.0 ): raise xparser_error("dropout-proportion has invalid value {0}.".format(self.config['dropout-proportion'])) + + if (self.config['dropout-per-frame'] != 'false' and + self.config['dropout-per-frame'] != 'true'): + raise xparser_error("dropout-per-frame has invalid value {0}.".format(self.config['dropout-per-frame'])) def auxiliary_outputs(self): return ['c_t'] @@ -347,7 +352,8 @@ def generate_lstm_config(self): pes_str = self.config['ng-per-element-scale-options'] lstm_dropout_value = self.config['dropout-proportion'] lstm_dropout_str = 'dropout-proportion='+str(self.config['dropout-proportion']) - + lstm_dropout_per_frame_value = self.config['dropout-per-frame'] + lstm_dropout_per_frame_str = 'dropout-per-frame='+str(self.config['dropout-per-frame']) # Natural gradient per element scale parameters # TODO: decide if we want to keep exposing these options if re.search('param-mean', pes_str) is None and \ @@ -427,7 +433,7 @@ def generate_lstm_config(self): # add the recurrent connections configs.append("# projection matrices : Wrm and Wpm") if lstm_dropout_value != -1.0: - configs.append("component name={0}.W_rp.m.dropout type=DropoutComponent dim={1} {2}".format(name, cell_dim, lstm_dropout_str)) + configs.append("component name={0}.rp_t.dropout type=DropoutComponent dim={1} {2} {3}".format(name, cell_dim, lstm_dropout_str, lstm_dropout_per_frame_str)) configs.append("component name={0}.W_rp.m type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, cell_dim, rec_proj_dim + nonrec_proj_dim, affine_str)) configs.append("component name={0}.r type=BackpropTruncationComponent dim={1} {2}".format(name, rec_proj_dim, bptrunc_str)) diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 7aabf02e86b..3135cab5ecf 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -202,7 +202,6 @@ def process_args(args): "value={0}. We recommend using the option " "--trainer.deriv-truncate-margin.".format( args.deriv_truncate_margin)) - if (not os.path.exists(args.dir) or not os.path.exists(args.dir+"/configs")): raise Exception("This scripts expects {0} to exist and have a configs " diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 58908a0fe09..a94486fe309 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -87,27 +87,33 @@ void PnormComponent::Write(std::ostream &os, bool binary) const { } -void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion) { +void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion, + bool dropout_per_frame) { dropout_proportion_ = dropout_proportion; + dropout_per_frame_ = dropout_per_frame; dim_ = dim; } void DropoutComponent::InitFromConfig(ConfigLine *cfl) { int32 dim = 0; BaseFloat dropout_proportion = 0.0; + bool dropout_per_frame = false; bool ok = cfl->GetValue("dim", &dim) && - cfl->GetValue("dropout-proportion", &dropout_proportion); + cfl->GetValue("dropout-proportion", &dropout_proportion) && + cfl->GetValue("dropout-per-frame", &dropout_per_frame); if (!ok || cfl->HasUnusedValues() || dim <= 0 || - dropout_proportion < 0.0 || dropout_proportion > 1.0) + dropout_proportion < 0.0 || dropout_proportion > 1.0 || + (dropout_per_frame != false and dropout_per_frame != true)) KALDI_ERR << "Invalid initializer for layer of type " << Type() << ": \"" << cfl->WholeLine() << "\""; - Init(dim, dropout_proportion); + Init(dim, dropout_proportion, dropout_per_frame); } std::string DropoutComponent::Info() const { std::ostringstream stream; stream << Type() << ", dim=" << dim_ - << ", dropout-proportion=" << dropout_proportion_; + << ", dropout-proportion=" << dropout_proportion_ + << ", dropout-per-frame=" << dropout_per_frame_; return stream.str(); } @@ -119,16 +125,30 @@ void DropoutComponent::Propagate(const ComponentPrecomputedIndexes *indexes, BaseFloat dropout = dropout_proportion_; KALDI_ASSERT(dropout >= 0.0 && dropout <= 1.0); + if(dropout_per_frame_) { + // This const_cast is only safe assuming you don't attempt + // to use multi-threaded code with the GPU. + const_cast&>(random_generator_).RandUniform(out); - // This const_cast is only safe assuming you don't attempt - // to use multi-threaded code with the GPU. - const_cast&>(random_generator_).RandUniform(out); + out->Add(-dropout); // now, a proportion "dropout" will be <0.0 + out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dropout" will + // be zero and (1 - dropout) will be 1.0. - out->Add(-dropout); // now, a proportion "dropout" will be <0.0 - out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dropout" will - // be zero and (1 - dropout) will be 1.0. + out->MulElements(in); + } else { - out->MulElements(in); + // This const_cast is only safe assuming you don't attempt + // to use multi-threaded code with the GPU. + const_cast&>(random_generator_).RandUniform(out); + out->Add(-dropout); // now, a proportion "dropout" will be <0.0 + out->ApplyHeaviside(); + CuVector *random_drop_vector = new CuVector(in.NumRows(), kSetZero); + MatrixIndexT i = 0; + random_drop_vector->CopyColFromMat(*out, i); + out->SetZero(); + out->AddVecToCols(1.0 , *random_drop_vector, 1.0); + out->MulElements(in); + } } @@ -150,11 +170,24 @@ void DropoutComponent::Backprop(const std::string &debug_info, void DropoutComponent::Read(std::istream &is, bool binary) { - ExpectOneOrTwoTokens(is, binary, "", ""); - ReadBasicType(is, binary, &dim_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &dropout_proportion_); - ExpectToken(is, binary, ""); + //back-compatibility code. + std::string token; + ReadToken(is, binary, &token); + if(token == ""){ + ReadToken(is, binary, &token); + } + KALDI_ASSERT(token == ""); + ReadBasicType(is, binary, &dim_); // read dimension. + ReadToken(is, binary, &token); + if(token == ""){ + ReadBasicType(is, binary, &dropout_proportion_); // read dropout rate + } + ReadToken(is, binary, &token); + if(token == ""){ + ReadBasicType(is, binary, &dropout_per_frame_); // read dropout mode + } + ReadToken(is, binary, &token); + KALDI_ASSERT(token == ""); } void DropoutComponent::Write(std::ostream &os, bool binary) const { @@ -163,6 +196,8 @@ void DropoutComponent::Write(std::ostream &os, bool binary) const { WriteBasicType(os, binary, dim_); WriteToken(os, binary, ""); WriteBasicType(os, binary, dropout_proportion_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, dropout_per_frame_); WriteToken(os, binary, ""); } diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index f09a989759a..7bc74911a4b 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -87,11 +87,13 @@ class PnormComponent: public Component { // "Dropout: A Simple Way to Prevent Neural Networks from Overfitting". class DropoutComponent : public RandomComponent { public: - void Init(int32 dim, BaseFloat dropout_proportion = 0.0); + void Init(int32 dim, BaseFloat dropout_proportion = 0.0, bool dropout_per_frame = false); - DropoutComponent(int32 dim, BaseFloat dropout = 0.0) { Init(dim, dropout); } + DropoutComponent(int32 dim, BaseFloat dropout = 0.0, bool dropout_per_frame = false) { + Init(dim, dropout, dropout_per_frame); + } - DropoutComponent(): dim_(0), dropout_proportion_(0.0) { } + DropoutComponent(): dim_(0), dropout_proportion_(0.0), dropout_per_frame_(false) { } virtual int32 Properties() const { return kLinearInInput|kBackpropInPlace|kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput; @@ -120,17 +122,20 @@ class DropoutComponent : public RandomComponent { Component *to_update, CuMatrixBase *in_deriv) const; virtual Component* Copy() const { return new DropoutComponent(dim_, - dropout_proportion_); } + dropout_proportion_, + dropout_per_frame_); } virtual std::string Info() const; - void SetDropoutProportion(BaseFloat dropout_proportion) { dropout_proportion_ = dropout_proportion; } + void SetDropoutProportion(BaseFloat dropout_proportion) { + dropout_proportion_ = dropout_proportion; + } private: int32 dim_; /// dropout-proportion is the proportion that is dropped out, /// e.g. if 0.1, we set 10% to zero value. BaseFloat dropout_proportion_; - + bool dropout_per_frame_; }; class ElementwiseProductComponent: public Component {