From c4123c1725edfff90ca7f82052aa2776bc1cde64 Mon Sep 17 00:00:00 2001 From: David Snyder Date: Wed, 2 Mar 2016 15:02:05 -0500 Subject: [PATCH] xvector: Adding binary nnet3-xvector-scoring, and a function in xvector/xvector.h for scoring individual pairs of xvectors --- src/xvector/xvector.cc | 12 ++ src/xvector/xvector.h | 19 ++- src/xvectorbin/Makefile | 2 +- src/xvectorbin/nnet3-xvector-scoring.cc | 150 ++++++++++++++++++++++++ 4 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 src/xvectorbin/nnet3-xvector-scoring.cc diff --git a/src/xvector/xvector.cc b/src/xvector/xvector.cc index b29d99ac497..10e05f8eef6 100644 --- a/src/xvector/xvector.cc +++ b/src/xvector/xvector.cc @@ -115,4 +115,16 @@ void ComputeXvectorObjfAndDeriv( (*tot_weight) = N; } +BaseFloat SimilarityScore(const Vector &v, + const Vector &w, const SpMatrix &S, + BaseFloat b) { + KALDI_ASSERT(v.Dim() == w.Dim() && v.Dim() == S.NumRows()); + Vector Sv(v.Dim()); + Sv.AddSpVec(1.0, S, v, 0); + Vector Sw(w.Dim()); + Sw.AddSpVec(1.0, S, w, 0); + BaseFloat L = VecVec(v, w) - VecVec(v, Sv) - VecVec(w, Sw) + b; + return L; +} + } // namespace kaldi diff --git a/src/xvector/xvector.h b/src/xvector/xvector.h index aa464c94f0c..fa6c580ab43 100644 --- a/src/xvector/xvector.h +++ b/src/xvector/xvector.h @@ -44,7 +44,7 @@ namespace kaldi { the objective function correctly. Let the log-odds L(v,w) [interpreted as log(p_same(v,w) / p_different(v,w))] be defined as: - L(v, w) = v' w - v' S v - w' S w + L(v, w) = v' w - v' S v - w' S w + b then p_same(v, w) = -log(1 + exp(-l(v, w)), and p_different(v, w) = 1 - p_same(v, w) = -log(1 + exp(-l(v, w)). @@ -72,6 +72,23 @@ namespace kaldi { CuMatrixBase *scores_out, BaseFloat *tot_objf, BaseFloat *tot_weight); + + /* + Compute the similarity score between two input xvectors. The score is + defined as: + L(v, w) = v' w - v' S v - w' S w + b + @param [in] v The first xvector. + @param [in] w The second xvector. + @param [in] S A symmetric matrix, usually a constant output of the + Nnet the xvectors came from. + @param [in] b A scalar offset, usually a constant output of the Nnet + the xvectors came from. + @return The score between vectors v and w. + */ + BaseFloat SimilarityScore(const Vector &v, + const Vector &w, const SpMatrix &S, + BaseFloat b); + } // namespace kaldi #endif diff --git a/src/xvectorbin/Makefile b/src/xvectorbin/Makefile index 3c962d874e7..be87b75fac4 100644 --- a/src/xvectorbin/Makefile +++ b/src/xvectorbin/Makefile @@ -8,7 +8,7 @@ LDLIBS += $(CUDA_LDLIBS) BINFILES = nnet3-xvector-get-egs nnet3-xvector-compute-prob \ nnet3-xvector-show-progress nnet3-xvector-train \ - nnet3-xvector-compute + nnet3-xvector-compute nnet3-xvector-scoring OBJFILES = diff --git a/src/xvectorbin/nnet3-xvector-scoring.cc b/src/xvectorbin/nnet3-xvector-scoring.cc new file mode 100644 index 00000000000..38a4a70c808 --- /dev/null +++ b/src/xvectorbin/nnet3-xvector-scoring.cc @@ -0,0 +1,150 @@ +// xvectorbin/nnet3-xvector-scoring.cc + +// Copyright 2013 Daniel Povey +// 2016 David Snyder + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "nnet3/nnet-utils.h" +#include "xvector/xvector.h" + + +int main(int argc, char *argv[]) { + using namespace kaldi; + using namespace kaldi::nnet3; + typedef kaldi::int32 int32; + typedef kaldi::int64 int64; + try { + const char *usage = + "Computes scores between pairs of xvectors.\n" + "The 'trials-file' has lines of the form\n" + " \n" + "and the output will have the form\n" + " []\n" + "(if either key could not be found, the score field in the output\n" + "will be absent, and this program will print a warning)\n" + "\n" + "Usage: nnet3-xvector-scoring [options] " + " " + "\n" + "e.g.: \n" + " nnet3-xvector-scoring nnet.final trials ark:spk_xvectors.scp " + "ark:test_xvectors.scp trials.scored\n" + "See also: ivector-plda-scoring and ivector-compute-dot-products\n"; + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() != 5) { + po.PrintUsage(); + exit(1); + } + + std::string nnet_rxfilename = po.GetArg(1), + trials_rxfilename = po.GetArg(2), + xvector1_rspecifier = po.GetArg(3), + xvector2_rspecifier = po.GetArg(4), + scores_wxfilename = po.GetArg(5); + + + int64 num_done = 0, num_err = 0; + Nnet nnet; + ReadKaldiObject(nnet_rxfilename, &nnet); + // We need to ensure that the Nnet has outputs called 's' and 'b' + // and that 'b' is a scalar and 's' can be interpreted as a symmetric + // matrix. + int32 s_index = nnet.GetNodeIndex("s"), + b_index = nnet.GetNodeIndex("b"); + if (s_index == -1 || b_index == -1) + KALDI_ERR << "The input Nnet cannot be used for xvector scoring" + << "because it has no output called 's' or 'b'."; + if (!nnet.IsOutputNode(s_index) || !nnet.IsOutputNode(b_index)) + KALDI_ERR << "The nodes 's' and 'b' must be output nodes."; + + int32 s_dim = nnet.OutputDim("s"), + b_dim = nnet.OutputDim("b"); + if (b_dim != 1) + KALDI_ERR << "The output 'b' is a scalar offset. Input Nnet has an" + << "output called 'b' but it has a dimension of " << b_dim; + int32 d = (0.5) * (1 + sqrt(1 + 8 * s_dim)) - 1; + if (((d + 1) * d) / 2 != s_dim) + KALDI_ERR << "Output 's' cannot be interpretedas a symmetric matrix."; + Vector s_vec(s_dim); + Vector b_vec(1); + GetConstantOutput(nnet, "s", &s_vec); + GetConstantOutput(nnet, "b", &b_vec); + SpMatrix S(d); + S.CopyFromVec(s_vec); + BaseFloat b = b_vec(0); + + RandomAccessBaseFloatVectorReader xvector1_reader(xvector1_rspecifier); + RandomAccessBaseFloatVectorReader xvector2_reader(xvector2_rspecifier); + + Input ki(trials_rxfilename); + + bool binary = false; + Output ko(scores_wxfilename, binary); + double sum = 0.0, sumsq = 0.0; + + std::string line; + while (std::getline(ki.Stream(), line)) { + std::vector fields; + SplitStringToVector(line, " \t\n\r", true, &fields); + if (fields.size() != 2) { + KALDI_ERR << "Bad line " << (num_done + num_err) << " in input " + << "(expected two fields: key1 key2): " << line; + } + std::string key1 = fields[0], key2 = fields[1]; + if (!xvector1_reader.HasKey(key1)) { + KALDI_WARN << "Key " << key1 << " not present in 1st table of xvectors."; + num_err++; + continue; + } + if (!xvector2_reader.HasKey(key2)) { + KALDI_WARN << "Key " << key2 << " not present in 2nd table of xvectors."; + num_err++; + continue; + } + const Vector &xvector1 = xvector1_reader.Value(key1), + &xvector2 = xvector2_reader.Value(key2); + // The following will crash if the dimensions differ, but + // they would likely also differ for all the xvectors so it's probably + // best to just crash. + BaseFloat score = SimilarityScore(xvector1, xvector2, S, b); + sum += score; + sumsq += score * score; + num_done++; + ko.Stream() << key1 << ' ' << key2 << ' ' << score << std::endl; + } + + if (num_done != 0) { + BaseFloat mean = sum / num_done, scatter = sumsq / num_done, + variance = scatter - mean * mean, stddev = sqrt(variance); + KALDI_LOG << "Mean score was " << mean << ", standard deviation was " + << stddev; + } + KALDI_LOG << "Processed " << num_done << " trials " << num_err + << " had errors."; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +}