diff --git a/src/lat/sausages.cc b/src/lat/sausages.cc index 7cb7a273b98..16a61b3f5eb 100644 --- a/src/lat/sausages.cc +++ b/src/lat/sausages.cc @@ -114,11 +114,11 @@ double MinimumBayesRisk::EditDistance(int32 N, int32 Q, for (int32 q = 0; q <= Q; q++) { if (q == 0) { alpha_dash_arc(q) = // line 15. - alpha_dash(s_a, q) + l(w_a, 0) + delta(); + alpha_dash(s_a, q) + l(w_a, 0, true); } else { // a1,a2,a3 are the 3 parts of min expression of line 17. int32 r_q = r(q); double a1 = alpha_dash(s_a, q-1) + l(w_a, r_q), - a2 = alpha_dash(s_a, q) + l(w_a, 0) + delta(), + a2 = alpha_dash(s_a, q) + l(w_a, 0, true), a3 = alpha_dash_arc(q-1) + l(0, r_q); alpha_dash_arc(q) = std::min(a1, std::min(a2, a3)); } @@ -166,11 +166,11 @@ void MinimumBayesRisk::AccStats() { const Arc &arc = arcs_[pre_[n][i]]; int32 s_a = arc.start_node, w_a = arc.word; BaseFloat p_a = arc.loglike; - alpha_dash_arc(0) = alpha_dash(s_a, 0) + l(w_a, 0) + delta(); // line 14. + alpha_dash_arc(0) = alpha_dash(s_a, 0) + l(w_a, 0, true); // line 14. for (int32 q = 1; q <= Q; q++) { // this loop == lines 15-18. int32 r_q = r(q); double a1 = alpha_dash(s_a, q-1) + l(w_a, r_q), - a2 = alpha_dash(s_a, q) + l(w_a, 0) + delta(), + a2 = alpha_dash(s_a, q) + l(w_a, 0, true), a3 = alpha_dash_arc(q-1) + l(0, r_q); if (a1 <= a2) { if (a1 <= a3) { b_arc[q] = 1; alpha_dash_arc(q) = a1; } diff --git a/src/lat/sausages.h b/src/lat/sausages.h index a6af91cc12f..9dab0b68713 100644 --- a/src/lat/sausages.h +++ b/src/lat/sausages.h @@ -128,8 +128,18 @@ class MinimumBayesRisk { /// Minimum-Bayes-Risk Decode. Top-level algorithm. Figure 6 of the paper. void MbrDecode(); - /// The basic edit-distance function l(a,b), as in the paper. - inline double l(int32 a, int32 b) { return (a == b ? 0.0 : 1.0); } + /// Without the 'penalize' argument this gives us the basic edit-distance + /// function l(a,b), as in the paper. + /// With the 'penalize' argument it can be interpreted as the edit distance + /// plus the 'delta' from the paper, except that we make a kind of conceptual + /// bug-fix and only apply the delta if the edit-distance was not already + /// zero. This bug-fix was necessary in order to force all the stats to show + /// up, that should show up, and applying the bug-fix makes the sausage stats + /// significantly less sparse. + inline double l(int32 a, int32 b, bool penalize = false) { + if (a == b) return 0.0; + else return (penalize ? 1.0 + delta() : 1.0); + } /// returns r_q, in one-based indexing, as in the paper. inline int32 r(int32 q) { return R_[q-1]; } @@ -151,8 +161,14 @@ class MinimumBayesRisk { // epsilon (0). (But if no words in vec, just one epsilon) static void NormalizeEps(std::vector *vec); - static inline BaseFloat delta() { return 1.0e-05; } // A constant - // used in the algorithm. + // delta() is a constant used in the algorithm, which penalizes + // the use of certain epsilon transitions in the edit-distance which would cause + // words not to show up in the accumulated edit-distance statistics. + // There has been a conceptual bug-fix versus the way it was presented in + // the paper: we now add delta only if the edit-distance was not already + // zero. + static inline BaseFloat delta() { return 1.0e-05; } + /// Function used to increment map. static inline void AddToMap(int32 i, double d, std::map *gamma) {