Skip to content

Commit

Permalink
add test code for LmOutputComponent
Browse files Browse the repository at this point in the history
  • Loading branch information
hainan-xv committed Apr 28, 2017
1 parent 12f119e commit af2a4c2
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 96 deletions.
4 changes: 4 additions & 0 deletions src/rnnlm/rnnlm-component-itf.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ class LmInputComponent {
/// sets is_gradient_ to true and sets learning_rate_ to 1, ignoring
/// learning_rate_factor_.
virtual void SetZero(bool treat_as_gradient) = 0;
virtual void SetAsGradient() { learning_rate_ = 1.0; is_gradient_ = true; }
virtual void PerturbParams(BaseFloat stddev) = 0;

virtual int32 InputDim() const = 0;
virtual int32 OutputDim() const = 0;
Expand Down Expand Up @@ -200,6 +202,8 @@ class LmOutputComponent {
/// sets is_gradient_ to true and sets learning_rate_ to 1, ignoring
/// learning_rate_factor_.
virtual void SetZero(bool treat_as_gradient) = 0;
virtual void SetAsGradient() { learning_rate_ = 1.0; is_gradient_ = true; }
virtual void PerturbParams(BaseFloat stddev) = 0;

virtual int32 InputDim() const = 0;
virtual int32 OutputDim() const = 0;
Expand Down
58 changes: 14 additions & 44 deletions src/rnnlm/rnnlm-component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "rnnlm/nnet-parse.h"
#include "rnnlm/rnnlm-component.h"
#include "rnnlm/rnnlm-utils.h"
#include "rnnlm/rnnlm-training.h"

namespace kaldi {
namespace rnnlm {
Expand Down Expand Up @@ -180,7 +181,7 @@ void NaturalGradientAffineImportanceSamplingComponent::Backprop(
CuMatrixBase<BaseFloat> *input_deriv) const {
CuSubMatrix<BaseFloat> bias_params(params_.ColRange(params_.NumCols() - 1, 1));
CuSubMatrix<BaseFloat> linear_params(params_.ColRange(0, params_.NumCols() - 1));
CuMatrix<BaseFloat> tmp(out_value.NumRows(), out_value.NumCols());
CuMatrix<BaseFloat> tmp(output_deriv.NumRows(), output_deriv.NumCols());
tmp.Row(0).CopyColFromMat(bias_params, 0);
if (tmp.NumRows() > 1)
tmp.RowRange(1, tmp.NumRows() - 1).CopyRowsFromVec(tmp.Row(0));
Expand All @@ -199,9 +200,9 @@ void NaturalGradientAffineImportanceSamplingComponent::Backprop(
if (to_update != NULL) {
// need to add natural gradient TODO(hxu)
CuMatrix<BaseFloat> delta(1, params_.NumRows(), kSetZero);
delta.Row(0).AddRowSumMat(learning_rate_, output_deriv, 1.0);
delta.Row(0).AddRowSumMat(to_update->learning_rate_, output_deriv, 1.0);
to_update->params_.ColRange(params_.NumCols() - 1, 1).AddMat(1.0, delta, kTrans);
to_update->params_.ColRange(0, params_.NumCols() - 1).AddMatMat(learning_rate_, output_deriv, kTrans,
to_update->params_.ColRange(0, params_.NumCols() - 1).AddMatMat(to_update->learning_rate_, output_deriv, kTrans,
in_value, kNoTrans, 1.0);
}
}
Expand Down Expand Up @@ -272,7 +273,7 @@ void NaturalGradientAffineImportanceSamplingComponent::Backprop(

precon_ones.CopyColFromMat(in_value_temp, in_value_temp.NumCols() - 1);

BaseFloat local_lrate = scale * learning_rate_;
BaseFloat local_lrate = scale * to_update->learning_rate_;
to_update->update_count_ += 1.0;
delta_bias.Row(0).AddMatVec(local_lrate, out_deriv_temp, kTrans,
precon_ones, 1.0);
Expand Down Expand Up @@ -862,6 +863,11 @@ void AffineImportanceSamplingComponent::Propagate(const CuMatrixBase<BaseFloat>
if (normalize) {
out->ApplyLogSoftMaxPerRow(*out);
}
// TODO(Hxu)
CuMatrix<BaseFloat> test_norm(*out);
// test_norm.ApplyExp();
ComputeSamplingNonlinearity(*out, &test_norm);
KALDI_LOG << "average normalization term is " << exp(test_norm.Sum() / test_norm.NumRows() - 1);
}

BaseFloat AffineImportanceSamplingComponent::ComputeLogprobOfWordGivenHistory(
Expand Down Expand Up @@ -890,50 +896,14 @@ void AffineImportanceSamplingComponent::Backprop(
= dynamic_cast<AffineImportanceSamplingComponent*>(to_update_0);

if (to_update != NULL) {
// linear_params.SetZero(); // clear the contents
// linear_params.AddMatMat(learning_rate_, new_out_deriv, kTrans,
// in_value, kNoTrans, 1.0);
CuMatrix<BaseFloat> delta_bias(1, output_deriv.NumCols(), kSetZero);
delta_bias.Row(0).AddRowSumMat(learning_rate_, new_out_deriv, kTrans);
delta_bias.Row(0).AddRowSumMat(to_update->learning_rate_, new_out_deriv, kTrans);

// to_update->params_.ColRange(0, params_.NumCols() - 1).AddMat(1.0, linear_params);
to_update->params_.ColRange(0, params_.NumCols() - 1).AddMatMat(learning_rate_, new_out_deriv, kTrans,
to_update->params_.ColRange(0, params_.NumCols() - 1).AddMatMat(to_update->learning_rate_, new_out_deriv, kTrans,
in_value, kNoTrans, 1.0);

// CuMatrix<BaseFloat> delta_bias_trans(output_deriv.NumCols(), 1, kSetZero);
// delta_bias_trans.AddMat(1.0, delta_bias, kTrans);

to_update->params_.ColRange(params_.NumCols() - 1, 1).AddMat(1.0, delta_bias, kTrans); // TODO(hxu)
}
//
// CuSubMatrix<BaseFloat> bias_params(params_.ColRange(params_.NumCols() - 1, 1));
// CuSubMatrix<BaseFloat> linear_params(params_.ColRange(0, params_.NumCols() - 1));
//
// CuMatrix<BaseFloat> tmp(out_value);
// tmp.Set(0.0);
// tmp.Row(0).CopyColFromMat(bias_params, 0);
// if (tmp.NumRows() > 1)
// tmp.RowRange(1, tmp.NumRows() - 1).CopyRowsFromVec(tmp.Row(0));
// tmp.AddMatMat(1.0, in_value, kNoTrans, linear_params, kTrans, 1.0);
//
// // now tmp is the in_value for log-softmax
//
// tmp.DiffLogSoftmaxPerRow(tmp, output_deriv);
//
// if (input_deriv != NULL)
// input_deriv->AddMatMat(1.0, output_deriv, kNoTrans, linear_params, kNoTrans,
// 1.0);
//
// AffineImportanceSamplingComponent* to_update
// = dynamic_cast<AffineImportanceSamplingComponent*>(to_update_0);
//
// if (to_update != NULL) {
// CuMatrix<BaseFloat> delta(1, params_.NumRows(), kSetZero);
// delta.Row(0).AddRowSumMat(learning_rate_, output_deriv, 1.0);
// to_update->params_.ColRange(params_.NumCols() - 1, 1).AddMat(1.0, delta, kTrans);
// to_update->params_.ColRange(0, params_.NumCols() - 1).AddMatMat(learning_rate_, output_deriv, kTrans,
// in_value, kNoTrans, 1.0);
// }
}

void AffineImportanceSamplingComponent::Backprop(
Expand All @@ -960,10 +930,10 @@ void AffineImportanceSamplingComponent::Backprop(

if (to_update != NULL) {
new_linear.SetZero(); // clear the contents
new_linear.AddMatMat(learning_rate_, new_out_deriv, kTrans,
new_linear.AddMatMat(to_update->learning_rate_, new_out_deriv, kTrans,
in_value, kNoTrans, 1.0);
CuMatrix<BaseFloat> delta_bias(1, output_deriv.NumCols(), kSetZero);
delta_bias.Row(0).AddRowSumMat(learning_rate_, new_out_deriv, kTrans);
delta_bias.Row(0).AddRowSumMat(to_update->learning_rate_, new_out_deriv, kTrans);

vector<int> indexes_2(bias_params.NumRows(), -1);
for (int i = 0; i < indexes.size(); i++) {
Expand Down
163 changes: 126 additions & 37 deletions src/rnnlm/rnnlm-sampling-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,125 @@
namespace kaldi {
namespace rnnlm {

void TestSimpleComponentDataDerivative(BaseFloat perturb_delta) {
for (int t = 0; t < 100; t++) {
int32 input_dim = rand() % 200 + 50,
output_dim = rand() % 200 + 50;

int32 num_rows = RandInt(1, 100);

NaturalGradientAffineImportanceSamplingComponent c;
c.Init(input_dim, output_dim, 0.1, 0.1);

CuMatrix<BaseFloat> input_data(num_rows, input_dim, kSetZero),
output_data(num_rows, output_dim, kSetZero),
output_deriv(num_rows, output_dim, kSetZero);
input_data.SetRandn();
output_deriv.SetRandn();

// ResetSeed(rand_seed, c);
c.Propagate(input_data, false, &output_data);

CuMatrix<BaseFloat> input_deriv(num_rows, input_dim, kSetZero),
empty_mat;
c.Backprop(input_data, empty_mat, output_deriv, NULL,
&input_deriv);

int32 test_dim = 3;
BaseFloat original_objf = TraceMatMat(output_deriv, output_data, kTrans);
Vector<BaseFloat> measured_objf_change(test_dim),
predicted_objf_change(test_dim);
for (int32 i = 0; i < test_dim; i++) {
CuMatrix<BaseFloat> perturbed_input_data(num_rows, input_dim,
kSetZero),
perturbed_output_data(num_rows, output_dim,
kSetZero);
perturbed_input_data.SetRandn();
perturbed_input_data.Scale(perturb_delta);
// at this point, perturbed_input_data contains the offset at the input data.
predicted_objf_change(i) = TraceMatMat(perturbed_input_data, input_deriv,
kTrans);
perturbed_input_data.AddMat(1.0, input_data);

// ResetSeed(rand_seed, c);
c.Propagate(perturbed_input_data, false, &perturbed_output_data);
measured_objf_change(i) = TraceMatMat(output_deriv, perturbed_output_data,
kTrans) - original_objf;
}
KALDI_LOG << "Predicted objf-change = " << predicted_objf_change;
KALDI_LOG << "Measured objf-change = " << measured_objf_change;
BaseFloat threshold = 0.1;
bool ans = ApproxEqual(predicted_objf_change, measured_objf_change, threshold);
if (!ans)
KALDI_WARN << "Data-derivative test failed, component-type="
<< c.Type() << ", input-dim=" << input_dim
<< ", output-dim=" << output_dim;
}
}

void TestSimpleComponentModelDerivative(BaseFloat perturb_delta) {
for (int t = 0; t < 100; t++) {
int32 input_dim = rand() % 200 + 50,
output_dim = rand() % 200 + 50;

int32 num_rows = RandInt(1, 100);

NaturalGradientAffineImportanceSamplingComponent c;
c.Init(input_dim, output_dim, 0.1, 0.1);

CuMatrix<BaseFloat> input_data(num_rows, input_dim, kSetZero),
output_data(num_rows, output_dim, kSetZero),
output_deriv(num_rows, output_dim, kSetZero);
input_data.SetRandn();
output_deriv.SetRandn();

c.Propagate(input_data, false, &output_data);

BaseFloat original_objf = TraceMatMat(output_deriv, output_data, kTrans);

LmOutputComponent *c_copy = c.Copy();

c_copy->Scale(0.0);
c_copy->SetAsGradient();

CuMatrix<BaseFloat> input_deriv(num_rows, input_dim,
kSetZero),
empty_mat;
c.Backprop(input_data, empty_mat, output_deriv, c_copy,
&input_deriv);

// check that the model derivative is accurate.
int32 test_dim = 3;

Vector<BaseFloat> measured_objf_change(test_dim),
predicted_objf_change(test_dim);
for (int32 i = 0; i < test_dim; i++) {
CuMatrix<BaseFloat> perturbed_output_data(num_rows, output_dim,
kSetZero);
LmOutputComponent *c_perturbed = c.Copy();
c_perturbed->PerturbParams(perturb_delta);

predicted_objf_change(i) = c_copy->DotProduct(*c_perturbed) -
c_copy->DotProduct(c);
c_perturbed->Propagate(input_data, &perturbed_output_data);
measured_objf_change(i) = TraceMatMat(output_deriv, perturbed_output_data,
kTrans) - original_objf;
delete c_perturbed;
}
KALDI_LOG << "Predicted objf-change = " << predicted_objf_change;
KALDI_LOG << "Measured objf-change = " << measured_objf_change;
BaseFloat threshold = 0.1;

bool ans = ApproxEqual(predicted_objf_change, measured_objf_change,
threshold);
if (!ans)
KALDI_WARN << "Model-derivative test failed, component-type="
<< c.Type() << ", input-dim=" << input_dim
<< ", output-dim=" << output_dim;
delete c_copy;
}
}

void PrepareVector(int n, int ones_size, int num_bigrams, std::set<int>* must_sample_set,
vector<double>* u, std::map<int, double> *bigrams) {
u->resize(n, 0);
Expand All @@ -33,7 +152,7 @@ void PrepareVector(int n, int ones_size, int num_bigrams, std::set<int>* must_sa
}

for (std::map<int, double>::iterator iter = bigrams->begin();
iter != bigrams->end(); iter++) {
iter != bigrams->end(); iter++) {
iter->second = iter->second / bigram_sum * bigram_total_sum;
}

Expand All @@ -44,34 +163,6 @@ void PrepareVector(int n, int ones_size, int num_bigrams, std::set<int>* must_sa
}

void UnitTestCDFGrouping() {
//// int dim = 16;
//// vector<BaseFloat> u(dim);
////
//// for (int i = 0; i < dim / 2; i++) {
//// u[i] = 0.9 / dim * 2;
//// }
//// for (int i = dim / 2; i < dim; i++) {
//// u[i] = 0.1 / dim * 2;
//// }
////
//// vector<BaseFloat> cdf(u.size(), 0);
//// cdf[0] = u[0];
//// for (int i = 1; i < u.size(); i++) {
//// cdf[i] = cdf[i - 1] + u[i];
//// }
////
//// int k = 6;
////
//// std::set<int> must_sample;
//// std::map<int, BaseFloat> bigrams;
////
//// for (int i = 3; i < dim; i += 2) {
//// bigrams[i] = 2.0 / dim;
//// }
////
//// for (int i = 5; i < dim; i += 5) {
//// must_sample.insert(i);
//// }
for (int t = 0; t < 100; t++) {

int dim = rand() % 3000 + 2000;
Expand All @@ -95,13 +186,7 @@ void UnitTestCDFGrouping() {
std::vector<interval> groups;
DoGroupingCDF(u, cdf, k, must_sample, bigrams, &groups);

// for (int i = 0; i < groups.size(); i++) {
// KALDI_LOG << "group " << i << ": " << groups[i].L << " " << groups[i].R << " " << groups[i].selection_prob;
// }

// CheckValidGrouping(groups, k);

CheckValidGrouping(u, must_sample, bigrams, k, groups);
CheckValidGrouping(u, cdf, must_sample, bigrams, k, groups);
}
}

Expand All @@ -114,7 +199,7 @@ void UnitTestSamplingNonlinearity() {
CuMatrix<BaseFloat> in1(num_rows, num_cols);
CuMatrix<BaseFloat> out1(num_rows, num_cols);
in1.SetRandn();
in1.Add(-1.0);
in1.Add(-10.0);
ComputeSamplingNonlinearity(in1, &out1);

// testing the forward non-linearity
Expand All @@ -140,6 +225,7 @@ void UnitTestSamplingNonlinearity() {
probs.Scale(0.5);
probs.Add(0.5);
probs.InvertElements();
probs.Set(1);
probs.Scale(-1);

BaseFloat objf = 0.0;
Expand Down Expand Up @@ -206,6 +292,9 @@ int main() {
using namespace rnnlm;

UnitTestCDFGrouping();
TestSimpleComponentModelDerivative(0.0001);
TestSimpleComponentDataDerivative(0.0001);
// UnitTestSamplingNonlinearity();

return 0;

Expand Down
Loading

0 comments on commit af2a4c2

Please sign in to comment.