-
Notifications
You must be signed in to change notification settings - Fork 1
/
run-train.sh
executable file
·139 lines (133 loc) · 4.17 KB
/
run-train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env bash
# set -xe
usage="$(basename "$0") [-h|--help] [-r|--run_id <string>] [-d|--destination <path>] [-x|--decoder <string>] [-l|--lm <path>] [-a|--lm_vocab <path>] [-t|--train_files <path>] [-v|--valid_files <path>] [-m|--minutes <int>] [-g|--gpu <int>] [-b|--batch_size <int>] [-e|--epochs <int>]
where:
-h|--help show this help text
-r|--run_id <string> run-id to use (used to resume training)
-d|--destination <path> destination directory to store results
-x|--decoder <'beamsearch'|'bestpath'> decoder to use (default: beamsearch)
-l|--lm path to n-gram KenLM model (if possible binary)
-a|--lm_vocab path to file containing the vocabulary of the LM specified by -lm. The file must contain the words used for training delimited by space (no newlines)
-t|--train_files <path> one or more comma-separated paths to CSV files containing the corpus files to use for training
-v|--valid_files <path> one or more comma-separated paths to CSV files containing the corpus files to use for validation
-m|--minutes <int> number of minutes of audio for training. If not set or set to 0, all training data will be used (default: 0)
-g|--gpu <int> GPU to use (default: 2)
-b|--batch_size <int> batch size
-e|--epochs <int> number of epochs to train
Train a simplified model of the DeepSpeech RNN on a given corpus of training- and validation-data.
"
# Defaults
run_id=''
target_dir='/home/daniel_tiefenauer'
minutes=0
gpu='2'
batch_size='16'
epochs='20'
decoder='beamsearch'
lm='./lm/libri-timit-lm.klm'
lm_vocab='./lm/words.txt'
train_files='/media/D1/readylingua-en/readylingua-en-train.csv'
valid_files='/media/D1/readylingua-en/readylingua-en-dev.csv'
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-h|--help)
echo ${usage}
shift # past argument
exit
;;
-r|--run_id)
run_id="$2"
shift # past argument
shift # past value
;;
-d|--destination)
target_dir="$2"
shift # past argument
shift # past value
;;
-x|--decoder)
decoder="$2"
shift # past argument
shift # past value
;;
-l|--lm)
lm="$2"
shift
shift
;;
-a|--lm_vocab)
lm_vocab="$2"
shift
shift
;;
-t|--train_files)
train_files="$2"
shift # past argument
shift # past value
;;
-v|--valid_files)
valid_files="$2"
shift # past argument
shift # past value
;;
-m|--minutes)
minutes="$2"
shift # past argument
shift # past value
;;
-g|--gpu)
gpu="$2"
shift # past argument
shift # past value
;;
-b|--batch_size)
batch_size="$2"
shift
shift
;;
-e|--epochs)
epochs="$2"
shift
shift
;;
*) # unknown option
POSITIONAL+=("$1") # save it in an array for later
shift # past argument
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
echo ' '
echo '-----------------------------------------------------'
echo ' starting training run with the following parameters'
echo '-----------------------------------------------------'
echo run_id = "${run_id}"
echo target_dir = "${target_dir}"
echo minutes = "${minutes}"
echo decoder = "${decoder}"
echo lm = "${lm}"
echo lm_vocab = "${lm_vocab}"
echo train_files = "${train_files}"
echo valid_files = "${valid_files}"
echo gpu = "${gpu}"
echo batch_size = "${batch_size}"
echo epochs = "${epochs}"
echo '-----------------------------------------------------'
echo ' '
cd ./src/
python3 run-train.py \
--run_id ${run_id} \
--target_dir ${target_dir} \
--minutes ${minutes} \
--decoder ${decoder} \
--lm ${lm} \
--lm_vocab ${lm_vocab} \
--gpu ${gpu} \
--batch_size ${batch_size} \
--epochs ${epochs} \
--train_files ${train_files} \
--valid_files ${valid_files} \
2>&1 | tee ${target_dir}/${run_id}.log # write to stdout and log file