From f353913420b6ef8a31ecc55d2ec0c988178698e0 Mon Sep 17 00:00:00 2001
From: Myle Ott <myleott@fb.com>
Date: Tue, 24 Mar 2020 11:33:44 -0700
Subject: [PATCH] Print tokenized hypothesis in fairseq-interactive (fixes
 #1881) (#1888)

Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/1888

Reviewed By: ngoyal2707

Differential Revision: D20596497

Pulled By: myleott

fbshipit-source-id: 689a7b8d352aac87e080773676f55a3bd89f555f
---
 fairseq_cli/interactive.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fairseq_cli/interactive.py b/fairseq_cli/interactive.py
index edcc704067..12efdf8fbb 100644
--- a/fairseq_cli/interactive.py
+++ b/fairseq_cli/interactive.py
@@ -182,9 +182,12 @@ def decode_fn(x):
                     tgt_dict=tgt_dict,
                     remove_bpe=args.remove_bpe,
                 )
-                hypo_str = decode_fn(hypo_str)
+                detok_hypo_str = decode_fn(hypo_str)
                 score = hypo['score'] / math.log(2)  # convert to base 2
+                # original hypothesis (after tokenization and BPE)
                 print('H-{}\t{}\t{}'.format(id, score, hypo_str))
+                # detokenized hypothesis
+                print('D-{}\t{}\t{}'.format(id, score, detok_hypo_str))
                 print('P-{}\t{}'.format(
                     id,
                     ' '.join(map(