forked from EdinburghNLP/nematus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gen_lattice.sh
executable file
·72 lines (52 loc) · 2.14 KB
/
gen_lattice.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/bin/bash
source ~/.bashrc
# set -u
SEARCH_GRAPH_=$1
SEARCH_GRAPH=$(basename $SEARCH_GRAPH_)
OUTPUT_DIR=$2
THRESHOLD=$3
BPE_CODES=$4
BPE_TYPE=$5
ID=$6
#update with path to subword-nmt
subword_nmt=~/subword-nmt
[[ ! -d $OUTPUT_DIR ]] && mkdir -p $OUTPUT_DIR
stem=$OUTPUT_DIR/${SEARCH_GRAPH}.$ID
if [[ ! -s ${stem}.p=${THRESHOLD}.${BPE_TYPE}-bpe.fst.txt ]]; then
if [[ ! -s ${stem}.fst.txt ]]; then
# Get the sentence graph from the graph file for the test set.
# Next, convert the search graph into the FSM text format and write out the sym table
grep "^$ID " $SEARCH_GRAPH_ \
| python /home/hltcoe/mpost/expts/whale17/scripts/searchgraph_to_fst.py --prefix $OUTPUT_DIR/${SEARCH_GRAPH}
fi
if [[ -s ${stem}.fst.txt ]]; then
if [[ ! -s ${stem}.fst ]]; then
#comile fst
fstcompile --isymbols=${stem}.keys --osymbols=${stem}.keys ${stem}.fst.txt > ${stem}.fst
fi
# determinize and minimize graph
# Apply pruning before det-min
# Garnish with topsort
cat ${stem}.fst \
| fstprune --weight=$THRESHOLD \
| fstrmepsilon | fstdeterminize | fstminimize \
| fsttopsort > ${stem}.p=${THRESHOLD}.fst
if [[ ! -s $OUTPUT_DIR/${SEARCH_GRAPH}.${ID}.${BPE_TYPE}-bpe.keys ]]; then
#apply bpe to keys
f=$OUTPUT_DIR/${SEARCH_GRAPH}.${ID}.keys
awk '{ print $1 }' $f > ${f}_words_TEMP
awk '{ print $2 }' $f > ${f}_nums_TEMP
#remove _
sed 's/|/ /g' ${f}_words_TEMP > ${f}_words_spaces_TEMP
$subword_nmt/apply_bpe.py -c $BPE_CODES < ${f}_words_spaces_TEMP > ${f}_words.${BPE_TYPE}-bpe_TEMP
#replace spaces with |
sed 's/ /|/g' ${f}_words.${BPE_TYPE}-bpe_TEMP > ${f}_words.${BPE_TYPE}-bpe_pipes_TEMP
paste -d ' ' ${f}_words.${BPE_TYPE}-bpe_pipes_TEMP ${f}_nums_TEMP > $OUTPUT_DIR/${SEARCH_GRAPH}.${ID}.${BPE_TYPE}-bpe.keys
sed -i 's,<@@|/@@|e@@|os@@|>,<eos>,g' $OUTPUT_DIR/${SEARCH_GRAPH}.${ID}.${BPE_TYPE}-bpe.keys
fi
cat ${stem}.p=${THRESHOLD}.fst \
| fstprint --isymbols=${stem}.${BPE_TYPE}-bpe.keys --osymbols=${stem}.${BPE_TYPE}-bpe.keys \
--fst_field_separator=" "> ${stem}.p=${THRESHOLD}.${BPE_TYPE}-bpe.fst.txt
fi
fi
rm ${stem}*TEMP