Skip to content

Commit

Permalink
remake nvidia docker (#6686)
Browse files Browse the repository at this point in the history
* use latest

* remake

* examples
  • Loading branch information
Borda authored Mar 29, 2021
1 parent f0c5479 commit dcf6e4e
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 22 deletions.
5 changes: 3 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,9 @@ jobs:
- script: |
set -e
python -m pytest pl_examples -v --maxfail=2 --durations=0
python setup.py install --user --quiet
bash pl_examples/run_ddp-example.sh
pip install . --user --quiet
bash pl_examples/run_examples-args.sh --gpus 1 --max_epochs 1 --batch_size 64 --limit_train_batches 5 --limit_val_batches 3
bash pl_examples/run_ddp-examples.sh --max_epochs 1 --batch_size 32 --limit_train_batches 2 --limit_val_batches 2
# cd pl_examples/basic_examples
# bash submit_ddp_job.sh
# bash submit_ddp2_job.sh
Expand Down
54 changes: 48 additions & 6 deletions dockers/nvidia/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,68 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM nvcr.io/nvidia/pytorch:21.02-py3
FROM nvcr.io/nvidia/cuda:11.1.1-runtime-ubuntu20.04

MAINTAINER PyTorchLightning <https://github.com/PyTorchLightning>

ARG LIGHTNING_VERSION=""

COPY ./ ./pytorch-lightning/
SHELL ["/bin/bash", "-c"]
# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/
ENV \
DEBIAN_FRONTEND=noninteractive \
TZ=Europe/Prague \
PATH="$PATH:/root/.local/bin" \
CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
MKL_THREADING_LAYER=GNU

RUN apt-get update -qq && \
apt-get install -y --no-install-recommends \
build-essential \
python3 \
python3-distutils \
python3-dev \
pkg-config \
cmake \
git \
wget \
unzip \
ca-certificates \
&& \

# Cleaning
apt-get autoremove -y && \
apt-get clean && \
rm -rf /root/.cache && \
rm -rf /var/lib/apt/lists/* && \

# Setup PIP
update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \
python get-pip.py && \
rm get-pip.py && \
pip --version

COPY ./ /home/pytorch-lightning/

# install dependencies
RUN \
#conda install "pip>20.1" && \
pip list | grep torch && \
cd /home && \
mv pytorch-lightning/notebooks . && \
mv pytorch-lightning/pl_examples . && \
# replace by specific version if asked
if [ ! -z "$LIGHTNING_VERSION" ] ; then \
rm -rf pytorch-lightning ; \
wget https://github.com/PyTorchLightning/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --progress=bar:force:noscroll ; \
unzip ${LIGHTNING_VERSION}.zip ; \
mv pytorch-lightning-*/ pytorch-lightning ; \
rm *.zip ; \
fi && \
pip install ./pytorch-lightning["extra"] --no-cache-dir && \

# Installations
python -c "fname = './pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \
pip install -r ./pytorch-lightning/requirements/extra.txt -U --no-cache-dir && \
pip install -r ./pytorch-lightning/requirements/examples.txt -U --no-cache-dir && \
pip install ./pytorch-lightning --no-cache-dir && \
rm -rf pytorch-lightning

RUN python --version && \
Expand Down
7 changes: 5 additions & 2 deletions dockers/release/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,14 @@ MAINTAINER PyTorchLightning <https://github.com/PyTorchLightning>

ARG LIGHTNING_VERSION=""

COPY ./ ./pytorch-lightning/
COPY ./ /home/pytorch-lightning/

# install dependencies
RUN \
#conda install "pip>20.1" && \
cd /home && \
mv pytorch-lightning/notebooks . && \
mv pytorch-lightning/pl_examples . && \
# replace by specific version if asked
if [ ! -z "$LIGHTNING_VERSION" ] ; then \
rm -rf pytorch-lightning ; \
wget https://github.com/PyTorchLightning/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --progress=bar:force:noscroll ; \
Expand Down
12 changes: 0 additions & 12 deletions pl_examples/run_ddp-example.sh

This file was deleted.

13 changes: 13 additions & 0 deletions pl_examples/run_ddp-examples.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

ARGS_EXTRA_DDP=" --gpus 2 --accelerator ddp"
ARGS_EXTRA_AMP=" --precision 16"

python pl_examples/basic_examples/simple_image_classifier.py $@ ${ARGS_EXTRA_DDP}
python pl_examples/basic_examples/simple_image_classifier.py $@ ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP}

python pl_examples/basic_examples/backbone_image_classifier.py $@ ${ARGS_EXTRA_DDP}
python pl_examples/basic_examples/backbone_image_classifier.py $@ ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP}

python pl_examples/basic_examples/autoencoder.py $@ ${ARGS_EXTRA_DDP}
python pl_examples/basic_examples/autoencoder.py $@ ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP}
15 changes: 15 additions & 0 deletions pl_examples/run_examples-args.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

echo $@

full_path=$(realpath $0)
echo $full_path

dir_path=$(dirname $full_path)
echo $dir_path

python ${dir_path}/basic_examples/simple_image_classifier.py $@

python ${dir_path}/basic_examples/backbone_image_classifier.py $@

python ${dir_path}/basic_examples/autoencoder.py $@

0 comments on commit dcf6e4e

Please sign in to comment.