forked from nytimes/ingredient-phrase-tagger
-
Notifications
You must be signed in to change notification settings - Fork 78
/
test_e2e
executable file
·43 lines (34 loc) · 1.4 KB
/
test_e2e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/bash
# End-to-end test for ingredient-phrase-tagger.
#
# Trains a new model using a known set of labelled data then verifies that all
# generated outputs match the golden outputs.
# Exit build script on first failure
set -e
# Echo commands to stdout.
set -x
export LABELLED_DATA_FILE=nyt-ingredients-snapshot-2015.csv
export LABELLED_EXAMPLE_COUNT=22000
export TRAINING_DATA_PERCENT=0.9
# This needs to be explicit so that there is consistent training between
# different machines.
export CRF_TRAINING_THREADS=2
export OUTPUT_DIR=$(mktemp -d)
ACTUAL_CRF_TRAINING_FILE="${OUTPUT_DIR}/training_data.crf"
ACTUAL_CRF_TESTING_FILE="${OUTPUT_DIR}/testing_data.crf"
ACTUAL_TESTING_OUTPUT_FILE="${OUTPUT_DIR}/testing_output"
ACTUAL_EVAL_OUTPUT_FILE="${OUTPUT_DIR}/eval_output"
# Train a new model.
bin/train-model
cat "$ACTUAL_EVAL_OUTPUT_FILE"
# Check against golden output.
GOLDEN_DIR=tests/golden
GOLDEN_CRF_TRAINING_FILE="${GOLDEN_DIR}/training_data.crf"
GOLDEN_CRF_TESTING_FILE="${GOLDEN_DIR}/testing_data.crf"
GOLDEN_TESTING_OUTPUT_FILE="${GOLDEN_DIR}/testing_output"
GOLDEN_EVAL_OUTPUT_FILE="${GOLDEN_DIR}/eval_output"
diff --context=2 "$GOLDEN_CRF_TRAINING_FILE" "$ACTUAL_CRF_TRAINING_FILE"
diff --context=2 "$GOLDEN_CRF_TESTING_FILE" "$ACTUAL_CRF_TESTING_FILE"
diff --context=2 "$GOLDEN_TESTING_OUTPUT_FILE" "$ACTUAL_TESTING_OUTPUT_FILE"
diff "$GOLDEN_EVAL_OUTPUT_FILE" "$ACTUAL_EVAL_OUTPUT_FILE"
rm -rf $OUTPUT_DIR