forked from horovod/horovod
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.travis.yml
138 lines (117 loc) · 6.51 KB
/
.travis.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
dist: trusty
language: python
python:
- "2.7"
- "3.5"
- "3.6"
services:
- docker
before_install:
# force latest Debian for Python 3.6 and nightly TensorFlow which requires new glibc
- |
if [[ ${TRAVIS_PYTHON_VERSION} == "3.6" || ${TF_PACKAGE} == "tf-nightly" ]]; then
export DEBIAN=sid
elif [[ ${TRAVIS_PYTHON_VERSION} == "3.5" ]]; then
export DEBIAN=stretch
else
export DEBIAN=jessie
fi
- docker pull debian:${DEBIAN}
# run docker container for an hour
- docker run -v `pwd`:/horovod debian:${DEBIAN} /bin/sh -c "sleep 3600" &
# wait for docker to start
- sleep 5
- export CONTAINER=$(docker ps -q | head -n 1)
- docker exec ${CONTAINER} /bin/sh -c "apt-get update -qq"
# install necessary network tools
- docker exec ${CONTAINER} /bin/sh -c "apt-get install -y wget openssh-client git build-essential"
# install Python and add a proper symlink
- |
if [[ ${TRAVIS_PYTHON_VERSION} == "3.6" ]]; then
# Python 3.6 requires special handling in Debian `sid` since the default is Python 3.7.
docker exec ${CONTAINER} /bin/sh -c "apt-get install -y python${TRAVIS_PYTHON_VERSION} python${TRAVIS_PYTHON_VERSION}-dev python3-distutils"
else
docker exec ${CONTAINER} /bin/sh -c "apt-get install -y python${TRAVIS_PYTHON_VERSION} python${TRAVIS_PYTHON_VERSION}-dev"
fi
- docker exec ${CONTAINER} /bin/sh -c "ln -s /usr/bin/python${TRAVIS_PYTHON_VERSION} /usr/bin/python"
- docker exec ${CONTAINER} /bin/sh -c "wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py && rm get-pip.py"
- docker exec ${CONTAINER} /bin/sh -c "pip install -U --force pip setuptools requests"
env:
matrix:
- TF_PACKAGE=tensorflow==1.1.0 KERAS_PACKAGE=keras==2.0.0 PYTORCH_PACKAGE=torch==0.4.0 MPI=OpenMPI
- TF_PACKAGE=tensorflow==1.6.0 KERAS_PACKAGE=keras==2.1.2 PYTORCH_PACKAGE=torch==0.4.0 MPI=OpenMPI
- TF_PACKAGE=tensorflow==1.9.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==0.4.1 MPI=OpenMPI
- TF_PACKAGE=tf-nightly KERAS_PACKAGE=git+https://github.com/keras-team/keras.git PYTORCH_PACKAGE=torch-nightly MPI=OpenMPI
- TF_PACKAGE=tensorflow==1.9.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==0.4.1 MPI=MPICH
matrix:
fast_finish: true
exclude:
- python: "3.5"
env: TF_PACKAGE=tensorflow==1.6.0 KERAS_PACKAGE=keras==2.1.2 PYTORCH_PACKAGE=torch==0.4.0 MPI=OpenMPI
- python: "3.6"
env: TF_PACKAGE=tensorflow==1.6.0 KERAS_PACKAGE=keras==2.1.2 PYTORCH_PACKAGE=torch==0.4.0 MPI=OpenMPI
- python: "3.5"
env: TF_PACKAGE=tensorflow==1.9.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==0.4.1 MPI=MPICH
- python: "3.6"
env: TF_PACKAGE=tensorflow==1.9.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==0.4.1 MPI=MPICH
- python: "3.5"
env: TF_PACKAGE=tf-nightly KERAS_PACKAGE=git+https://github.com/keras-team/keras.git PYTORCH_PACKAGE=torch-nightly MPI=OpenMPI
install:
- |
if [[ ${MPI} == "OpenMPI" ]]; then
docker exec ${CONTAINER} /bin/sh -c "wget -O /tmp/openmpi-3.0.0-bin.tar.gz https://github.com/uber/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz"
docker exec ${CONTAINER} /bin/sh -c "cd /usr/local && tar -zxf /tmp/openmpi-3.0.0-bin.tar.gz && ldconfig"
else
# installs mpich version 3.0.4
docker exec ${CONTAINER} /bin/sh -c "apt-get install -y mpich"
fi
# TensorFlow
- docker exec ${CONTAINER} /bin/sh -c "pip install ${TF_PACKAGE}"
# Keras
- docker exec ${CONTAINER} /bin/sh -c "pip install ${KERAS_PACKAGE}"
# h5py for Keras model saving
- docker exec ${CONTAINER} /bin/sh -c "pip install h5py"
# scipy for Keras image preprocessing
- docker exec ${CONTAINER} /bin/sh -c "pip install scipy"
# future for PyTorch, needed due to https://github.com/pytorch/pytorch/pull/12504
- docker exec ${CONTAINER} /bin/sh -c "pip install future"
# PyTorch
- |
if [[ ${PYTORCH_PACKAGE} == "torch-nightly" ]]; then
docker exec ${CONTAINER} /bin/sh -c "pip install torchvision"
docker exec ${CONTAINER} /bin/sh -c "pip uninstall -y torch"
docker exec ${CONTAINER} /bin/sh -c "pip install torch_nightly -v -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html"
else
docker exec ${CONTAINER} /bin/sh -c "pip install ${PYTORCH_PACKAGE} torchvision"
fi
# Horovod
- docker exec ${CONTAINER} /bin/sh -c "cd /horovod && python setup.py sdist"
- docker exec ${CONTAINER} /bin/sh -c "pip install -v /horovod/dist/horovod-*.tar.gz"
script:
- |
if [[ ${MPI} == "OpenMPI" ]]; then
export MPIRUN="mpirun -allow-run-as-root -np 2 -H localhost:2 -bind-to none -map-by slot"
else
export MPIRUN="mpirun -np 2"
fi
# run unit tests
- docker exec ${CONTAINER} /bin/sh -c "pip install pytest && cd /horovod/test && ${MPIRUN} pytest -v"
# hack TensorFlow MNIST example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/last_step=20000/last_step=100/\" /horovod/examples/tensorflow_mnist.py"
# run TensorFlow MNIST example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/tensorflow_mnist.py"
# download Keras MNIST dataset
- docker exec ${CONTAINER} /bin/sh -c "python -c \"from keras.datasets import mnist; mnist.load_data()\""
# hack Keras MNIST advanced example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/epochs = .*/epochs = 12/\" /horovod/examples/keras_mnist_advanced.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/model.add(Conv2D(32, kernel_size=(3, 3),/model.add(Conv2D(1, kernel_size=(3, 3),/\" /horovod/examples/keras_mnist_advanced.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/model.add(Conv2D(64, (3, 3), activation='relu'))//\" /horovod/examples/keras_mnist_advanced.py"
# run Keras MNIST advanced example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/keras_mnist_advanced.py"
# hack PyTorch MNIST example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/self.fc1 = nn.Linear(320, 50)/self.fc1 = nn.Linear(784, 50)/\" /horovod/examples/pytorch_mnist.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/x = F.relu(F.max_pool2d(self.conv1(x), 2))//\" /horovod/examples/pytorch_mnist.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))//\" /horovod/examples/pytorch_mnist.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/x = x.view(-1, 320)/x = x.view(-1, 784)/\" /horovod/examples/pytorch_mnist.py"
# run PyTorch MNIST example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/pytorch_mnist.py"