Skip to content

Commit

Permalink
CI: Migrate remaining Dockerfiles to docker-compose.yml and remove un…
Browse files Browse the repository at this point in the history
…used code (apache#18771)

* Migrate remaining Dockerfiles to docker-compose.yml

- Delete unused Dockerfiles
- Delete unused install/*.sh scripts
- Consolidate ubuntu_gpu_tensorrt and ubuntu_gpu
- Remove deprecated logic in ci/build.py (no longer needed with
  docker-compose)
- Remove ci/docker_cache.py (no longer needed with docker-compose)

* Fix

* Fix

* Fix ubuntu_cpu_jekyll
  • Loading branch information
leezu authored Jul 23, 2020
1 parent 1928117 commit 18af71e
Show file tree
Hide file tree
Showing 41 changed files with 105 additions and 1,881 deletions.
1 change: 0 additions & 1 deletion ci/Jenkinsfile_docker_cache
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ core_logic: {
ws('workspace/docker_cache') {
timeout(time: total_timeout, unit: 'MINUTES') {
utils.init_git()
sh "python3 ./ci/docker_cache.py --docker-registry ${env.DOCKER_CACHE_REGISTRY}"
sh "cd ci && python3 ./docker_login.py --secret-name ${env.DOCKERHUB_SECRET_NAME} && docker-compose -f docker/docker-compose.yml pull && docker-compose -f docker/docker-compose.yml build --parallel && COMPOSE_HTTP_TIMEOUT=600 docker-compose -f docker/docker-compose.yml push && docker logout"
}
}
Expand Down
189 changes: 30 additions & 159 deletions ci/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,73 +18,37 @@
# specific language governing permissions and limitations
# under the License.

"""Multi arch dockerized build tool.
"""Multi arch dockerized build tool."""

"""

__author__ = 'Marco de Abreu, Kellen Sunderland, Anton Chernov, Pedro Larroy'
__version__ = '0.3'
__author__ = 'Marco de Abreu, Kellen Sunderland, Anton Chernov, Pedro Larroy, Leonard Lausen'
__version__ = '0.4'

import argparse
import glob
import pprint
import re
import os
import shutil
import signal
import subprocess
from itertools import chain
from subprocess import check_call, check_output
from subprocess import check_call
from typing import *

import yaml

from safe_docker_run import SafeDockerClient
from util import *

# NOTE: Temporary whitelist used until all Dockerfiles are refactored for docker compose
DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100',
'centos7_gpu_cu101', 'centos7_gpu_cu102', 'ubuntu_cpu',
'ubuntu_build_cuda', 'ubuntu_gpu_cu101', 'publish.test.centos7_cpu',
'publish.test.centos7_gpu', 'android_armv7', 'android_armv8',
'armv6', 'armv7', 'armv8', 'test.armv7', 'test.armv8')
# Files for docker compose
DOCKER_COMPOSE_FILES = set(('docker/build.centos7', 'docker/build.ubuntu', 'docker/build.android',
'docker/build.arm', 'docker/test.arm', 'docker/publish.test.centos7'))


def get_dockerfiles_path():
return "docker"


def get_platforms(path: str = get_dockerfiles_path(), legacy_only=False) -> List[str]:
"""Get a list of architectures given our dockerfiles"""
dockerfiles = glob.glob(os.path.join(path, "Dockerfile.*"))
dockerfiles = set(filter(lambda x: x[-1] != '~', dockerfiles))
files = set(map(lambda x: re.sub(r"Dockerfile.(.*)", r"\1", x), dockerfiles))
if legacy_only:
files = files - DOCKER_COMPOSE_FILES
platforms = list(map(lambda x: os.path.split(x)[1], sorted(files)))
return platforms

def get_platforms() -> List[str]:
"""Get a list of architectures declared in docker-compose.yml"""
with open("docker/docker-compose.yml", "r") as f:
compose_config = yaml.load(f.read(), yaml.SafeLoader)
return list(compose_config["services"].keys())

def get_docker_tag(platform: str, registry: str) -> str:
""":return: docker tag to be used for the container"""
if platform in DOCKER_COMPOSE_WHITELIST:
with open("docker/docker-compose.yml", "r") as f:
compose_config = yaml.load(f.read(), yaml.SafeLoader)
return compose_config["services"][platform]["image"].replace('${DOCKER_CACHE_REGISTRY}', registry)

platform = platform if any(x in platform for x in ['build.', 'publish.']) else 'build.{}'.format(platform)
if not registry:
registry = "mxnet_local"
return "{0}/{1}".format(registry, platform)


def get_dockerfile(platform: str, path=get_dockerfiles_path()) -> str:
platform = platform if any(x in platform for x in ['build.', 'publish.']) else 'build.{}'.format(platform)
return os.path.join(path, "Dockerfile.{0}".format(platform))

with open("docker/docker-compose.yml", "r") as f:
compose_config = yaml.load(f.read(), yaml.SafeLoader)
return compose_config["services"][platform]["image"].replace('${DOCKER_CACHE_REGISTRY}', registry)

def build_docker(platform: str, registry: str, num_retries: int, no_cache: bool,
cache_intermediate: bool = False) -> str:
Expand All @@ -96,50 +60,18 @@ def build_docker(platform: str, registry: str, num_retries: int, no_cache: bool,
:param no_cache: pass no-cache to docker to rebuild the images
:return: Id of the top level image
"""
tag = get_docker_tag(platform=platform, registry=registry)
logging.info('Building docker container \'%s\' based on ci/docker/docker-compose.yml', platform)
# We add a user with the same group as the executing non-root user so files created in the
# container match permissions of the local user. Same for the group.
cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'build',
"--build-arg", "USER_ID={}".format(os.getuid()),
"--build-arg", "GROUP_ID={}".format(os.getgid())]
if cache_intermediate:
cmd.append('--no-rm')
cmd.append(platform)

env = os.environ.copy()

# Case 1: docker-compose
if platform in DOCKER_COMPOSE_WHITELIST:
logging.info('Building docker container tagged \'%s\' based on ci/docker/docker-compose.yml', tag)
# We add a user with the same group as the executing non-root user so files created in the
# container match permissions of the local user. Same for the group.
cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'build',
"--build-arg", "USER_ID={}".format(os.getuid()),
"--build-arg", "GROUP_ID={}".format(os.getgid())]
if cache_intermediate:
cmd.append('--no-rm')
cmd.append(platform)
env["DOCKER_CACHE_REGISTRY"] = registry
else: # Case 2: Deprecated way, will be removed
# We add a user with the same group as the executing non-root user so files created in the
# container match permissions of the local user. Same for the group.
#
# These variables are used in the docker files to create user and group with these ids.
# see: docker/install/ubuntu_adduser.sh
#
# cache-from is needed so we use the cached images tagged from the remote via
# docker pull see: docker_cache.load_docker_cache
#
# This also prevents using local layers for caching: https://github.com/moby/moby/issues/33002
# So to use local caching, we should omit the cache-from by using --no-dockerhub-cache argument to this
# script.
#
# This doesn't work with multi head docker files.
logging.info("Building docker container tagged '%s'", tag)
cmd = ["docker", "build",
"-f", get_dockerfile(platform),
"--build-arg", "USER_ID={}".format(os.getuid()),
"--build-arg", "GROUP_ID={}".format(os.getgid())]
if no_cache:
cmd.append("--no-cache")
if cache_intermediate:
cmd.append("--rm=false")
elif registry:
cmd.extend(["--cache-from", tag])
cmd.extend(["-t", tag, get_dockerfiles_path()])

env["DOCKER_CACHE_REGISTRY"] = registry

@retry(subprocess.CalledProcessError, tries=num_retries)
def run_cmd(env=None):
Expand All @@ -148,27 +80,6 @@ def run_cmd(env=None):

run_cmd(env=env)

# Get image id by reading the tag. It's guaranteed (except race condition) that the tag exists. Otherwise, the
# check_call would have failed
image_id = _get_local_image_id(docker_tag=tag)
if not image_id:
raise FileNotFoundError('Unable to find docker image id matching with {}'.format(tag))
return image_id


def _get_local_image_id(docker_tag):
"""
Get the image id of the local docker layer with the passed tag
:param docker_tag: docker tag
:return: Image id as string or None if tag does not exist
"""
cmd = ["docker", "images", "-q", docker_tag]
image_id_b = check_output(cmd)
image_id = image_id_b.decode('utf-8').strip()
if not image_id:
raise RuntimeError('Unable to find docker image id matching with tag {}'.format(docker_tag))
return image_id


def buildir() -> str:
return os.path.join(get_mxnet_root(), "build")
Expand Down Expand Up @@ -291,21 +202,11 @@ def list_platforms() -> str:
def load_docker_cache(platform, tag, docker_registry) -> None:
"""Imports tagged container from the given docker registry"""
if docker_registry:
if platform in DOCKER_COMPOSE_WHITELIST:
env = os.environ.copy()
env["DOCKER_CACHE_REGISTRY"] = docker_registry
cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'pull', platform]
logging.info("Running command: 'DOCKER_CACHE_REGISTRY=%s %s'", docker_registry, ' '.join(cmd))
check_call(cmd, env=env)
return

# noinspection PyBroadException
try:
import docker_cache
logging.info('Docker cache download is enabled from registry %s', docker_registry)
docker_cache.load_docker_cache(registry=docker_registry, docker_tag=tag)
except Exception:
logging.exception('Unable to retrieve Docker cache. Continue without...')
env = os.environ.copy()
env["DOCKER_CACHE_REGISTRY"] = docker_registry
cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'pull', platform]
logging.info("Running command: 'DOCKER_CACHE_REGISTRY=%s %s'", docker_registry, ' '.join(cmd))
check_call(cmd, env=env)
else:
logging.info('Distributed docker cache disabled')

Expand All @@ -327,9 +228,9 @@ def main() -> int:

parser = argparse.ArgumentParser(description="""Utility for building and testing MXNet on docker
containers""", epilog="")
parser.add_argument("-p", "--platform",
help="platform",
type=str)
parser.add_argument("-p", "--platform", type=str, help= \
"Platform. See ci/docker/docker-compose.yml for list of supported " \
"platforms (services).")

parser.add_argument("-b", "--build-only",
help="Only build the container, don't build the project",
Expand All @@ -339,10 +240,6 @@ def main() -> int:
help="Only run the container, don't rebuild the container",
action='store_true')

parser.add_argument("-a", "--all",
help="build for all platforms",
action='store_true')

parser.add_argument("-n", "--nvidiadocker",
help="Use nvidia docker",
action='store_true')
Expand Down Expand Up @@ -443,32 +340,6 @@ def main() -> int:
logging.critical("Execution of %s failed with status: %d", command, ret)
return ret

elif args.all:
platforms = get_platforms()
platforms = [platform for platform in platforms if 'build.' in platform]
logging.info("Building for all architectures: %s", platforms)
logging.info("Artifacts will be produced in the build/ directory.")
for platform in platforms:
tag = get_docker_tag(platform=platform, registry=args.docker_registry)
load_docker_cache(platform=platform, tag=tag, docker_registry=args.docker_registry)
build_docker(platform, registry=args.docker_registry, num_retries=args.docker_build_retries,
no_cache=args.no_cache)
if args.build_only:
continue
shutil.rmtree(buildir(), ignore_errors=True)
build_platform = "build_{}".format(platform)
plat_buildir = os.path.abspath(os.path.join(get_mxnet_root(), '..',
"mxnet_{}".format(build_platform)))
if os.path.exists(plat_buildir):
logging.warning("%s already exists, skipping", plat_buildir)
continue
command = ["/work/mxnet/ci/docker/runtime_functions.sh", build_platform]
container_run(
docker_client=docker_client, platform=platform, nvidia_runtime=args.nvidiadocker,
shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry,
local_ccache_dir=args.ccache_dir, environment=environment)
shutil.move(buildir(), plat_buildir)
logging.info("Built files left in: %s", plat_buildir)

else:
parser.print_help()
Expand Down
1 change: 0 additions & 1 deletion ci/dev_menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def provision_virtualenv(venv_path=DEFAULT_PYENV):
('[Docker] sanity_check. Check for linting and code formatting and licenses.',
[
"ci/build.py --platform ubuntu_cpu /work/runtime_functions.sh sanity_check",
"ci/build.py --platform ubuntu_rat /work/runtime_functions.sh nightly_test_rat_check",
]),
('[Docker] Python3 CPU unittests',
[
Expand Down
51 changes: 24 additions & 27 deletions ci/docker/Dockerfile.build.ubuntu
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
libzmq3-dev \
liblapack-dev \
libopencv-dev \
# Caffe
caffe-cpu \
libcaffe-cpu-dev \
libxml2-dev \
# BytePS
numactl \
libnuma-dev \
Expand All @@ -80,23 +78,11 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
python3-pip \
python3-nose \
python3-nose-timer \
# Scala
openjdk-8-jdk \
openjdk-8-jre \
maven \
scala \
# Clojure
clojure \
leiningen \
# R
r-base-core \
r-cran-devtools \
libcairo2-dev \
libxml2-dev \
## Documentation
doxygen \
pandoc \
## Build-dependencies for ccache 3.7.9
autoconf \
gperf \
libb2-dev \
libzstd-dev && \
Expand All @@ -114,22 +100,16 @@ RUN cd /usr/local/src && \
cd /usr/local/src && \
rm -rf ccache

# RAT License Checker tool
RUN cd /usr/local/src && \
wget https://archive.apache.org/dist/creadur/apache-rat-0.13/apache-rat-0.13-bin.tar.gz && \
tar xf apache-rat-0.13-bin.tar.gz

# Python & cmake
COPY install/requirements /work/
RUN python3 -m pip install cmake==3.16.6 && \
python3 -m pip install -r /work/requirements

# Only OpenJDK 8 supported at this time..
RUN update-java-alternatives -s java-1.8.0-openjdk-amd64

# julia not available on 18.04
COPY install/ubuntu_julia.sh /work/
RUN /work/ubuntu_julia.sh

# MXNetJS nightly needs emscripten for wasm
COPY install/ubuntu_emscripten.sh /work/
RUN /work/ubuntu_emscripten.sh

ARG USER_ID=0
COPY install/docker_filepermissions.sh /work/
RUN /work/docker_filepermissions.sh
Expand All @@ -152,6 +132,23 @@ RUN cd /usr/local && \
cd thrust && \
git checkout 1.9.8

# Install TensorRT
# We need to redeclare ARG due to
# https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
ARG BASE_IMAGE
RUN export SHORT_CUDA_VERSION=${CUDA_VERSION%.*} && \
apt-get update && \
if [ ${SHORT_CUDA_VERSION} = 10.0 ]; then \
apt-get install -y "libnvinfer-dev=5.1.5-1+cuda10.0"; \
elif [ ${SHORT_CUDA_VERSION} = 10.1 ]; then \
apt-get install -y "libnvinfer-dev=5.1.5-1+cuda10.1"; \
elif [ ${SHORT_CUDA_VERSION} = 10.2 ]; then \
apt-get install -y "libnvinfer-dev=6.0.1-1+cuda10.2"; \
else \
echo "ERROR: Cuda ${SHORT_CUDA_VERSION} not yet supported in Dockerfile.build.ubuntu"; \
exit 1; \
fi && \
rm -rf /var/lib/apt/lists/*

FROM gpu as gpuwithcudaruntimelibs
# Special case because the CPP-Package requires the CUDA runtime libs
Expand Down
35 changes: 0 additions & 35 deletions ci/docker/Dockerfile.build.ubuntu_cpu_c

This file was deleted.

Loading

0 comments on commit 18af71e

Please sign in to comment.