Print tokenized hypothesis in fairseq-interactive (fixes #1881) (#1888)

Summary: Pull Request resolved: #1888 Reviewed By: ngoyal2707 Differential Revision: D20596497 Pulled By: myleott fbshipit-source-id: 689a7b8d352aac87e080773676f55a3bd89f555f
facebookresearch · Mar 24, 2020 · f353913 · f353913
1 parent 01e5ab5
commit f353913
Showing 1 changed file with 4 additions and 1 deletion.
diff --git a/fairseq_cli/interactive.py b/fairseq_cli/interactive.py
@@ -182,9 +182,12 @@ def decode_fn(x):
                     tgt_dict=tgt_dict,
                     remove_bpe=args.remove_bpe,
                 )
-                hypo_str = decode_fn(hypo_str)
+                detok_hypo_str = decode_fn(hypo_str)
                 score = hypo['score'] / math.log(2)  # convert to base 2
+                # original hypothesis (after tokenization and BPE)
                 print('H-{}\t{}\t{}'.format(id, score, hypo_str))
+                # detokenized hypothesis
+                print('D-{}\t{}\t{}'.format(id, score, detok_hypo_str))
                 print('P-{}\t{}'.format(
                     id,
                     ' '.join(map(