Skip to content

Commit

Permalink
Update unix gpu toolchain (apache#18186)
Browse files Browse the repository at this point in the history
* update nvidiadocker command & remove cuda compat

* replace cu101 with cuda since compat is no longer to be used

* skip flaky tests

* get rid of ubuntu_build_cuda and point ubuntu_cu101 to base gpu instead of cuda compat

* Revert "skip flaky tests"

This reverts commit 1c720fa.

* revert removal of ubuntu_build_cuda

* add linux gpu g4 node to all steps using g3 in unix-gpu pipeline
  • Loading branch information
ChaiBapchya committed Jul 24, 2020
1 parent e6de5ae commit 752a57d
Show file tree
Hide file tree
Showing 6 changed files with 394 additions and 33 deletions.
1 change: 1 addition & 0 deletions ci/Jenkinsfile_utils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def assign_node_labels(args) {
// knowing about the limitations.
NODE_LINUX_CPU = args.linux_cpu
NODE_LINUX_GPU = args.linux_gpu
NODE_LINUX_GPU_G4 = args.linux_gpu_g4
NODE_LINUX_GPU_P3 = args.linux_gpu_p3
NODE_WINDOWS_CPU = args.windows_cpu
NODE_WINDOWS_GPU = args.windows_gpu
Expand Down
3 changes: 2 additions & 1 deletion ci/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,9 @@ def container_run(docker_client: SafeDockerClient,

# Equivalent command
docker_cmd_list = [
get_docker_binary(nvidia_runtime),
"docker",
'run',
"--gpus all" if nvidia_runtime else "",
"--cap-add",
"SYS_PTRACE", # Required by ASAN
'--rm',
Expand Down
166 changes: 166 additions & 0 deletions ci/docker/Dockerfile.build.ubuntu
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# -*- mode: dockerfile -*-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Dockerfile for Ubuntu based builds.
#
# See docker-compose.yml for supported BASE_IMAGE ARGs and targets.

####################################################################################################
# The Dockerfile uses a dynamic BASE_IMAGE (for example ubuntu:18.04
# nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 etc).
# On top of BASE_IMAGE we install all dependencies shared by all MXNet build
# environments into a "base" target. At the end of this file, we can specialize
# "base" for specific usecases. The target built by docker can be selected via
# "--target" option or docker-compose.yml
####################################################################################################
ARG BASE_IMAGE
FROM $BASE_IMAGE AS base

WORKDIR /work/deps

RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get update && \
apt-get install -y wget software-properties-common && \
wget -qO - http://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
wget -qO - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | apt-key add - && \
apt-add-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main" && \
apt-add-repository "deb https://apt.repos.intel.com/mkl all main" && \
apt-get update && \
apt-get install -y \
## Utilities
curl \
unzip \
pandoc \
## Development tools
build-essential \
ninja-build \
git \
protobuf-compiler \
libprotobuf-dev \
clang-6.0 \
clang-tidy-6.0 \
python-yaml \
clang-10 \
g++ \
g++-8 \
intel-mkl-2020.0-088 \
## Dependencies
libgomp1 \
libturbojpeg0-dev \
libopenblas-dev \
libcurl4-openssl-dev \
libatlas-base-dev \
libzmq3-dev \
liblapack-dev \
libopencv-dev \
# Caffe
caffe-cpu \
libcaffe-cpu-dev \
## Frontend languages
# Python
python3 \
python3-pip \
python3-nose \
python3-nose-timer \
# Scala
openjdk-8-jdk \
openjdk-8-jre \
maven \
scala \
# Clojure
clojure \
leiningen \
# R
r-base-core \
r-cran-devtools \
libcairo2-dev \
libxml2-dev \
## Documentation
doxygen \
pandoc \
## Build-dependencies for ccache 3.7.9
gperf \
libb2-dev \
libzstd-dev && \
rm -rf /var/lib/apt/lists/*

# ccache 3.7.9 has fixes for caching nvcc outputs
RUN cd /usr/local/src && \
git clone --recursive https://github.com/ccache/ccache.git && \
cd ccache && \
git checkout v3.7.9 && \
./autogen.sh && \
./configure --disable-man && \
make -j$(nproc) && \
make install && \
cd /usr/local/src && \
rm -rf ccache

# Python & cmake
COPY install/requirements /work/
RUN python3 -m pip install cmake==3.16.6 && \
python3 -m pip install -r /work/requirements

# Only OpenJDK 8 supported at this time..
RUN update-java-alternatives -s java-1.8.0-openjdk-amd64

# julia not available on 18.04
COPY install/ubuntu_julia.sh /work/
RUN /work/ubuntu_julia.sh

# PDL::CCS missing on 18.04
COPY install/ubuntu_perl.sh /work/
RUN /work/ubuntu_perl.sh

# MXNetJS nightly needs emscripten for wasm
COPY install/ubuntu_emscripten.sh /work/
RUN /work/ubuntu_emscripten.sh

ARG USER_ID=0
COPY install/docker_filepermissions.sh /work/
RUN /work/docker_filepermissions.sh

ENV PYTHONPATH=./python/
WORKDIR /work/mxnet

COPY runtime_functions.sh /work/

####################################################################################################
# Specialize base image to install more gpu specific dependencies.
# The target built by docker can be selected via "--target" option or docker-compose.yml
####################################################################################################
FROM base as gpu
# Install Thrust 1.9.8 to be shipped with Cuda 11.
# Fixes https://github.com/thrust/thrust/issues/1072 for Clang 10
# This file can be deleted when using Cuda 11 on CI
RUN cd /usr/local && \
git clone https://github.com/thrust/thrust.git && \
cd thrust && \
git checkout 1.9.8


FROM gpu as gpuwithcudaruntimelibs
# Special case because the CPP-Package requires the CUDA runtime libs
# and not only stubs (which are provided by the base image)
# This prevents usage of this image for actual GPU tests with Docker.
# This is a bug in CPP-Package and should be fixed.
RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get update && \
apt install -y --no-install-recommends \
cuda-10-1 && \
rm -rf /var/lib/apt/lists/*
208 changes: 208 additions & 0 deletions ci/docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# We use the cache_from feature introduced in file form version 3.4 (released 2017-11-01)
version: "3.4"

# For simplicity, only the centos7_cpu is commented. But the comments apply to
# all other services as well.
services:
###################################################################################################
# Dockerfile.build.centos7 based images used for building on CentOS7. On
# CentOS7, we respectively test the oldest supported toolchain and dependency
# versions
###################################################################################################
centos7_cpu:
# The resulting image will be named build.centos7_cpu:latest and will be
# pushed to the dockerhub user specified in the environment variable
# ${DOCKER_CACHE_REGISTRY} (typicall "mxnetci") under this name
image: ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest
build:
context: .
dockerfile: Dockerfile.build.centos7
# Use "base" target declared in Dockerfile.build.centos7 as "build.centos7_cpu:latest"
target: base
args:
# BASE_IMAGE is used to dynamically specify the FROM image in Dockerfile.build.centos7
BASE_IMAGE: centos:7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest
centos7_gpu_cu92:
image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu92:latest
build:
context: .
dockerfile: Dockerfile.build.centos7
target: gpu
args:
BASE_IMAGE: nvidia/cuda:9.2-cudnn7-devel-centos7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu92:latest
centos7_gpu_cu100:
image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest
build:
context: .
dockerfile: Dockerfile.build.centos7
target: gpu
args:
BASE_IMAGE: nvidia/cuda:10.0-cudnn7-devel-centos7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest
centos7_gpu_cu101:
image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest
build:
context: .
dockerfile: Dockerfile.build.centos7
target: gpu
args:
BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-centos7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest
centos7_gpu_cu102:
image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest
build:
context: .
dockerfile: Dockerfile.build.centos7
target: gpu
args:
BASE_IMAGE: nvidia/cuda:10.2-cudnn7-devel-centos7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest
###################################################################################################
# Dockerfile.build.ubuntu based images. On Ubuntu we test more recent
# toolchain and dependency versions compared to CentOS7. We attempt to update
# the Ubuntu base image every 6 months, following the Ubuntu release cycle,
# and testing the dependencies in their version provided by the respective
# Ubuntu release.
###################################################################################################
ubuntu_cpu:
image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest
build:
context: .
dockerfile: Dockerfile.build.ubuntu
target: base
args:
BASE_IMAGE: ubuntu:18.04
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest
ubuntu_gpu_cu101:
image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest
build:
context: .
dockerfile: Dockerfile.build.ubuntu
target: gpu
args:
BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest
ubuntu_build_cuda:
image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
build:
context: .
dockerfile: Dockerfile.build.ubuntu
target: gpuwithcudaruntimelibs
args:
BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
###################################################################################################
# Dockerfile.build.android based images used for testing cross-compilation for plain ARM
###################################################################################################
armv6:
image: ${DOCKER_CACHE_REGISTRY}/build.armv6:latest
build:
context: .
dockerfile: Dockerfile.build.arm
target: armv6
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.armv6:latest
armv7:
image: ${DOCKER_CACHE_REGISTRY}/build.armv7:latest
build:
context: .
dockerfile: Dockerfile.build.arm
target: armv7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.armv7:latest
armv8:
image: ${DOCKER_CACHE_REGISTRY}/build.armv8:latest
build:
context: .
dockerfile: Dockerfile.build.arm
target: armv8
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.armv8:latest
###################################################################################################
# Dockerfile.test.arm based images for testing ARM artefacts via QEMU
###################################################################################################
test.armv7:
image: ${DOCKER_CACHE_REGISTRY}/test.armv7:latest
build:
context: .
dockerfile: Dockerfile.test.arm
args:
BASE_IMAGE: arm32v7/ubuntu:20.04
cache_from:
- ${DOCKER_CACHE_REGISTRY}/test.armv7:latest
test.armv8:
image: ${DOCKER_CACHE_REGISTRY}/test.armv8:latest
build:
context: .
dockerfile: Dockerfile.test.arm
args:
BASE_IMAGE: arm64v8/ubuntu:20.04
cache_from:
- ${DOCKER_CACHE_REGISTRY}/test.armv8:latest
###################################################################################################
# Dockerfile.build.android based images used for testing cross-compilation for Android
###################################################################################################
android_armv7:
image: ${DOCKER_CACHE_REGISTRY}/build.android_armv7:latest
build:
context: .
dockerfile: Dockerfile.build.android
target: armv7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.android_armv7:latest
android_armv8:
image: ${DOCKER_CACHE_REGISTRY}/build.android_armv8:latest
build:
context: .
dockerfile: Dockerfile.build.android
target: armv8
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.android_armv8:latest
###################################################################################################
# Dockerfile.publish.test based images used for testing binary artifacts on minimal systems.
###################################################################################################
publish.test.centos7_cpu:
image: ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_cpu:latest
build:
context: .
dockerfile: Dockerfile.publish.test.centos7
args:
BASE_IMAGE: centos:7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_cpu:latest
publish.test.centos7_gpu:
image: ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_gpu:latest
build:
context: .
dockerfile: Dockerfile.publish.test.centos7
args:
BASE_IMAGE: nvidia/cuda:9.2-cudnn7-devel-centos7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_gpu:latest
Loading

0 comments on commit 752a57d

Please sign in to comment.