-
Notifications
You must be signed in to change notification settings - Fork 0
/
4gram.sh
executable file
·55 lines (37 loc) · 1.36 KB
/
4gram.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/bash
FLAG=4gram
DATA_DIR="./data/"
OUT_DIR="./result/${FLAG}"
mkdir -p $OUT_DIR
TRAIN_DATA="train_4gram"
TEST_DATA="dev_4gram"
TRAIN_FEAT=${OUT_DIR}/${TRAIN_DATA}.feats
TEST_FEAT=${OUT_DIR}/${TEST_DATA}.feats
MODEL=${OUT_DIR}/${TRAIN_DATA}.model
FEATURIZER="python ./featurizer.py"
CRF="crfsuite"
EVAL="perl connlleval.pl"
mkdir -p ${OUT_DIR}
echo "***** Running ${FEATURIZER} on ${TRAIN_DATA} (`date`) *****"
cat ${DATA_DIR}/${TRAIN_DATA} | ${FEATURIZER} > ${TRAIN_FEAT}
echo "***** Running ${FEATURIZER} on ${TRAIN_DATA} (`date`) *****"
cat ${DATA_DIR}/${TEST_DATA} | ${FEATURIZER} > ${TEST_FEAT}
TRAIN_OPTS="learn -a ap"
RUN_CMD="${CRF} ${TRAIN_OPTS} -m ${MODEL} ${TRAIN_FEAT}"
#training
eval "${RUN_CMD}"
#dumping a model
${CRF} dump ${MODEL} > ${MODEL}.txt
TEST_OPTS="tag -r"
RUN_CMD="${CRF} ${TEST_OPTS} -m ${MODEL} ${TEST_FEAT}"
#prediction
eval "${RUN_CMD} > ${TEST_FEAT}.results"
echo "${TEST_FEAT} finish prediction"
#cat ${TEST_FEAT}.results | tr '\t' ' ' | perl -ne '{chomp;s/\r//g;print $_,"\n";}' | \
# ${EVAL} > ${TEST_FEAT}.SUMMARY
cat ${TEST_FEAT}.results | tr '\t' ' ' | perl -ne '{chomp;s/\r//g;print $_,"\n";}' | \
perl ngram2token.pl 4 $DATA_DIR/dev | \
${EVAL} > ${TEST_FEAT}.SUMMARY
#cat ${TEST_FEAT}.results | tr '\t' ' ' | perl -ne '{chomp;s/\r//g;print $_,"\n";}' | \
# ${EVAL} > ${TEST_FEAT}.SUMMARY
cat ${TEST_FEAT}.SUMMARY