forked from udieckmann/Kielipankki-utilities
-
Notifications
You must be signed in to change notification settings - Fork 0
/
REPACK
60 lines (48 loc) · 1.76 KB
/
REPACK
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# -*- mode: Shell-script; -*-
# Repack a fragment file in smaller fragments to facilitate ocular
# inspection of whichever data may have made a tool to malfunction,
# because it is becoming so that it is tedious.
# Allow DIR/data/a000/www.vrf for hunting further in the output,
# because repacking DIR/a000/nwww.vrf results in
# - DIR.XXXX/temp/a000/mwww.vrt
# - DIR.XXXX/data/a000/*.vrf
# where "data/" has been inserted in the relevant output path.
shopt -s extglob
usage () {
1>&2 echo "usage: bash -e bin/REPACK <N> DIR/a000/mwww.vrf ==> DIR.XXXX/"
1>&2 echo "- also allow data/ as in DIR/data/a000/mwww.vrf"
1>&2 echo "- <N> = 10, 20, 50, 100, 200, 500, 1000, 2000, 5000 (tokens)"
1>&2 echo "- fragment file must exist, bin/vrt-pack must exist"
1>&2 echo "[failed with $1]"
exit 2
}
case "$#" in
2) ;;
*) usage nargs ;;
esac
case "$1" in
10 | 20 | 50 | 100 | 200 | 500 | 1000 | 2000 | 5000) ;;
*) usage number ;;
esac
case "$2" in
[a-zA-Z]*([a-zA-z0-9.-])/?(data/)a000/m[0-9][0-9][0-9].vrf) ;;
*) usage path ;;
esac
: keep moving this line to the left margin again and again
: because the extended glob pattern confuses the indenter
test -e "$2" || usage exist
test -e "bin/vrt-pack" || usage bin/vrt-pack
VRF="$2" # DIR/a000/mwww.vrf OR DIR/data/a000/mwww.vrf
MAIN="${VRF%%/*}" # DIR
HIVE="${VRF#*/}" # a000/mwww.vrf OR data/a000/mwww.vrf
HIVE="${HIVE%/*}" # a000 OR data/a000
FRAG="${VRF##*/}"
FRAG="${FRAG%.vrf}"
DIR="$(mktemp --directory ${MAIN}.XXXX)"
VRT="${DIR}/temp/${HIVE}/${FRAG}.vrt"
mkdir --parents "${DIR}/temp/${HIVE}"
echo "make ${VRT}"
cp "${VRF}" "${VRT}"
sed -i -E '\=^</?[.][.][.]=d' "${VRT}"
echo "pack ${DIR}/temp in ${DIR}/data at $1 tokens"
bin/vrt-pack "--tokens=$1" --out="${DIR}/data" "${DIR}/temp"