forked from mozilla/DeepSpeech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.train.tmpl
70 lines (56 loc) · 2.07 KB
/
Dockerfile.train.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
FROM tensorflow/tensorflow:1.15.2-gpu-py3
ENV DEBIAN_FRONTEND=noninteractive
ENV MOZILLA_VOICE_STT_REPO=#MOZILLA_VOICE_STT_REPO#
ENV MOZILLA_VOICE_STT_SHA=#MOZILLA_VOICE_STT_SHA#
RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils \
bash-completion \
build-essential \
cmake \
curl \
git \
libboost-all-dev \
libbz2-dev \
locales \
python3-venv \
unzip \
wget
# We need to remove it because it's breaking deepspeech install later with
# weird errors about setuptools
RUN apt-get purge -y python3-xdg
# Install dependencies for audio augmentation
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
# Try and free some space
RUN rm -rf /var/lib/apt/lists/*
WORKDIR /
RUN echo git clone $MOZILLA_VOICE_STT_REPO
RUN git clone $MOZILLA_VOICE_STT_REPO
WORKDIR /STT
RUN echo git checkout $MOZILLA_VOICE_STT_SHA
RUN git checkout $MOZILLA_VOICE_STT_SHA
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
# Prepare deps
RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3
# Install Mozilla Voice STT
# - No need for the decoder since we did it earlier
# - There is already correct TensorFlow GPU installed on the base image,
# we don't want to break that
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
# Tool to convert output graph for inference
RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \
--artifact convert_graphdef_memmapped_format --target .
# Build KenLM to generate new scorers
WORKDIR /STT/native_client
RUN rm -rf kenlm && \
git clone https://github.com/kpu/kenlm && \
cd kenlm && \
git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
mkdir -p build && \
cd build && \
cmake .. && \
make -j $(nproc)
WORKDIR /STT
RUN ./bin/run-ldc93s1.sh