-
Notifications
You must be signed in to change notification settings - Fork 1
/
compute_isometric_slt_stat.sh
42 lines (33 loc) · 1.35 KB
/
compute_isometric_slt_stat.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/bash
# SCRIPT FOR MT EVALUATION (BERTScore, SACREBleu, LC/Length compliance stat)
# For more detail on the metrics, please see ./scripts/README
set -e
SRC="en"
TGT=$1
MT_INPUT=$2
MT_OUTPUT=$3
REFERENCE=$4
PWD=$PWD
LOG=$PWD/log.stat.$TGT #${MT_OUTPUT}.stat
LC_STAT=$PWD/isometric_slt_stat.py
# best model for bert-score considering a higher correlation with human eval (see https://github.com/Tiiiger/bert_score)
BERTSCORE_MODEL='microsoft/deberta-xlarge-mnli'
BATCH=32
# compute stat
compute_stat () {
local INPUT=$1
local MT=$2
local REF=$3
echo -e "Computing stat for:\n$(head -2 $INPUT $MT $REF)"
# compute length compliance (length ratio and range) b/n the mt output and input
LC_INPUT_MT=$(python $LC_STAT -s ${INPUT} -t ${MT})
# compute detok-bleu and rescaled bert-score b/n the mt output and reference
SACREBLEU=$(sacrebleu ${REF} -i ${MT} -m bleu)
BERTSCORE=$(bert-score --rescale_with_baseline --m $BERTSCORE_MODEL -r $REF -c ${MT} --lang $TGT --batch_size $BATCH)
echo -e "PAIR: $SRC->$TGT \nINPUT: $INPUT \nMT: $MT \nREF: $REF
\nSACRE Bleu: ${SACREBLEU}
\nBERT Score: ${BERTSCORE}
\nLength Stat: ${LC_INPUT_MT}" | tee -a $LOG
}
compute_stat "${MT_INPUT}" "${MT_OUTPUT}" "${REFERENCE}"
echo -e "===\n" | tee -a $LOG