From f353913420b6ef8a31ecc55d2ec0c988178698e0 Mon Sep 17 00:00:00 2001 From: Myle Ott Date: Tue, 24 Mar 2020 11:33:44 -0700 Subject: [PATCH] Print tokenized hypothesis in fairseq-interactive (fixes #1881) (#1888) Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/1888 Reviewed By: ngoyal2707 Differential Revision: D20596497 Pulled By: myleott fbshipit-source-id: 689a7b8d352aac87e080773676f55a3bd89f555f --- fairseq_cli/interactive.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fairseq_cli/interactive.py b/fairseq_cli/interactive.py index edcc704067..12efdf8fbb 100644 --- a/fairseq_cli/interactive.py +++ b/fairseq_cli/interactive.py @@ -182,9 +182,12 @@ def decode_fn(x): tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) - hypo_str = decode_fn(hypo_str) + detok_hypo_str = decode_fn(hypo_str) score = hypo['score'] / math.log(2) # convert to base 2 + # original hypothesis (after tokenization and BPE) print('H-{}\t{}\t{}'.format(id, score, hypo_str)) + # detokenized hypothesis + print('D-{}\t{}\t{}'.format(id, score, detok_hypo_str)) print('P-{}\t{}'.format( id, ' '.join(map(