Skip to content

Commit

Permalink
[aarch64][CICD]Add aarch64 docker image build. (#1472)
Browse files Browse the repository at this point in the history
* Add aarch64 docker image build

* removing ulimit for PT workflow

* set aarch64 worker for docker build
  • Loading branch information
Mike Schneider committed Aug 9, 2023
1 parent 14851d9 commit bb821d4
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 54 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/build-manywheel-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ on:
paths:
- .github/workflows/build-manywheel-images.yml
- manywheel/Dockerfile
- manywheel/Dockerfile_aarch64
- manywheel/Dockerfile_cxx11-abi
- manywheel/build_docker.sh
- 'common/*'
pull_request:
paths:
- .github/workflows/build-manywheel-images.yml
- manywheel/Dockerfile
- manywheel/Dockerfile_aarch64
- manywheel/Dockerfile_cxx11-abi
- 'common/*'
- manywheel/build_docker.sh
Expand Down Expand Up @@ -82,6 +84,21 @@ jobs:
- name: Build Docker Image
run: |
manywheel/build_docker.sh
build-docker-cpu-aarch64:
runs-on: linux.t4g.2xlarge
env:
GPU_ARCH_TYPE: cpu-aarch64
steps:
- name: Checkout PyTorch
uses: actions/checkout@v3
- name: Authenticate if WITH_PUSH
run: |
if [[ "${WITH_PUSH}" == true ]]; then
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
fi
- name: Build Docker Image
run: |
manywheel/build_docker.sh
build-docker-cpu-cxx11-abi:
runs-on: ubuntu-22.04
env:
Expand Down
21 changes: 1 addition & 20 deletions aarch64_linux/aarch64_ci_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,6 @@ CONDA_EXE=/opt/conda/bin/conda
PATH=/opt/conda/bin:$PATH
LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH

###############################################################################
# Install OS dependent packages
###############################################################################
yum -y install epel-release
yum -y install less zstd libgomp

###############################################################################
# Install conda
# disable SSL_verify due to getting "Could not find a suitable TLS CA certificate bundle, invalid path"
Expand All @@ -26,19 +20,6 @@ chmod +x /mambaforge.sh
/mambaforge.sh -b -p /opt/conda
rm /mambaforge.sh
/opt/conda/bin/conda config --set ssl_verify False
/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf pygit2 openblas
/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf pygit2 openblas ninja scons
python --version
conda --version

###############################################################################
# Exec libglfortran.a hack
#
# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC.
# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get
# ubuntu's libgfortran.a which is compiled with -fPIC
###############################################################################
cd ~/
curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb
ar x ~/libgfortran-10-dev.deb
tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/
cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/
67 changes: 33 additions & 34 deletions aarch64_linux/aarch64_wheel_ci_build.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,42 @@
#!/usr/bin/env python3
# encoding: UTF-8

import os
import subprocess
from pygit2 import Repository
from typing import List


''''
Helper for getting paths for Python
'''
def list_dir(path: str) -> List[str]:
return subprocess.check_output(["ls", "-1", path]).decode().split("\n")
''''
Helper for getting paths for Python
'''
return subprocess.check_output(["ls", "-1", path]).decode().split("\n")


'''
Using ArmComputeLibrary for aarch64 PyTorch
'''
def build_ArmComputeLibrary(git_clone_flags: str = "") -> None:
'''
Using ArmComputeLibrary for aarch64 PyTorch
'''
print('Building Arm Compute Library')
os.system("cd / && mkdir /acl")
os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.05.1 {git_clone_flags}")
os.system('sed -i -e \'s/"armv8.2-a"/"armv8-a"/g\' ComputeLibrary/SConscript; '
'sed -i -e \'s/-march=armv8.2-a+fp16/-march=armv8-a/g\' ComputeLibrary/SConstruct; '
'sed -i -e \'s/"-march=armv8.2-a"/"-march=armv8-a"/g\' ComputeLibrary/filedefs.json')
os.system(f"cd ComputeLibrary; export acl_install_dir=/acl; " \
f"scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native build_dir=$acl_install_dir/build; " \
f"cp -r arm_compute $acl_install_dir; " \
f"cp -r include $acl_install_dir; " \
f"cp -r utils $acl_install_dir; " \
f"cp -r support $acl_install_dir; " \
f"cp -r src $acl_install_dir; cd /")


'''
Complete wheel build and put in artifact location
'''
os.system("cd ComputeLibrary; export acl_install_dir=/acl; "
"scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native build_dir=$acl_install_dir/build; "
"cp -r arm_compute $acl_install_dir; "
"cp -r include $acl_install_dir; "
"cp -r utils $acl_install_dir; "
"cp -r support $acl_install_dir; "
"cp -r src $acl_install_dir; cd /")


def complete_wheel(folder: str):
'''
Complete wheel build and put in artifact location
'''
wheel_name = list_dir(f"/{folder}/dist")[0]

if "pytorch" in folder:
Expand All @@ -54,10 +55,10 @@ def complete_wheel(folder: str):
return repaired_wheel_name


'''
Parse inline arguments
'''
def parse_arguments():
'''
Parse inline arguments
'''
from argparse import ArgumentParser
parser = ArgumentParser("AARCH64 wheels python CD")
parser.add_argument("--debug", action="store_true")
Expand All @@ -67,11 +68,10 @@ def parse_arguments():
return parser.parse_args()


'''
Entry Point
'''
if __name__ == '__main__':

'''
Entry Point
'''
args = parse_arguments()
enable_mkldnn = args.enable_mkldnn
repo = Repository('/pytorch')
Expand All @@ -80,26 +80,25 @@ def parse_arguments():
branch = 'master'

git_clone_flags = " --depth 1 --shallow-submodules"
os.system(f"conda install -y ninja scons")

print('Building PyTorch wheel')
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
os.system(f"python setup.py clean")
os.system("python setup.py clean")

if branch == 'nightly' or branch == 'master':
build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/pytorch').decode().replace('-','')
version = subprocess.check_output(['cat','version.txt'], cwd='/pytorch').decode().strip()[:-2]
build_date = subprocess.check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
version = subprocess.check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
if branch.startswith("v1.") or branch.startswith("v2."):
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
if enable_mkldnn:
build_ArmComputeLibrary(git_clone_flags)
print("build pytorch with mkldnn+acl backend")
build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " \
"ACL_ROOT_DIR=/acl " \
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \
"ACL_INCLUDE_DIR=/acl/build " \
"ACL_LIBRARY=/acl/build "
"ACL_ROOT_DIR=/acl " \
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \
"ACL_INCLUDE_DIR=/acl/build " \
"ACL_LIBRARY=/acl/build "
else:
print("build pytorch without mkldnn backend")

Expand Down
86 changes: 86 additions & 0 deletions manywheel/Dockerfile_aarch64
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
FROM quay.io/pypa/manylinux2014_aarch64 as base


# Graviton needs GCC 10 for the build
ARG DEVTOOLSET_VERSION=10

# Language variabes
ENV LC_ALL=en_US.UTF-8
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US.UTF-8

# Installed needed OS packages. This is to support all
# the binary builds (torch, vision, audio, text, data)
RUN yum -y install epel-release
RUN yum -y update
RUN yum install -y \
autoconf \
automake \
bison \
bzip2 \
curl \
diffutils \
file \
git \
make \
patch \
perl \
unzip \
util-linux \
wget \
which \
xz \
yasm \
less \
zstd \
libgomp \
devtoolset-${DEVTOOLSET_VERSION}-gcc \
devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ \
devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
devtoolset-${DEVTOOLSET_VERSION}-binutils

# Ensure the expected devtoolset is used
ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH


# git236+ would refuse to run git commands in repos owned by other users
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
# Override this behaviour by treating every folder as safe
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
RUN git config --global --add safe.directory "*"


###############################################################################
# libglfortran.a hack
#
# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC.
# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get
# ubuntu's libgfortran.a which is compiled with -fPIC
# NOTE: Need a better way to get this library as Ubuntu's package can be removed by the vender, or changed
###############################################################################
RUN cd ~/ \
&& curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb \
&& ar x ~/libgfortran-10-dev.deb \
&& tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ \
&& cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/

# install cmake
RUN yum install -y cmake3 && \
ln -s /usr/bin/cmake3 /usr/bin/cmake

FROM base as openssl
# Install openssl (this must precede `build python` step)
# (In order to have a proper SSL module, Python is compiled
# against a recent openssl [see env vars above], which is linked
# statically. We delete openssl afterwards.)
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
ENV SSL_CERT_FILE=/opt/_internal/certs.pem

FROM openssl as final
# remove unncessary python versions
RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
2 changes: 2 additions & 0 deletions manywheel/build_all_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ TOPDIR=$(git rev-parse --show-toplevel)
GPU_ARCH_TYPE=cpu "${TOPDIR}/manywheel/build_docker.sh"
MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cpu "${TOPDIR}/manywheel/build_docker.sh"

GPU_ARCH_TYPE=cpu-aarch64 "${TOPDIR}/manywheel/build_docker.sh"

GPU_ARCH_TYPE=cpu-cxx11-abi "${TOPDIR}/manywheel/build_docker.sh"

for cuda_version in 12.1 11.8; do
Expand Down
8 changes: 8 additions & 0 deletions manywheel/build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ case ${GPU_ARCH_TYPE} in
GPU_IMAGE=centos:7
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
;;
cpu-aarch64)
TARGET=final
DOCKER_TAG=cpu-aarch64
LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-cpu-aarch64
GPU_IMAGE=arm64v8/centos:7
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10"
MANY_LINUX_VERSION="aarch64"
;;
cpu-cxx11-abi)
TARGET=final
DOCKER_TAG=cpu-cxx11-abi
Expand Down

0 comments on commit bb821d4

Please sign in to comment.