diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index 69af67803..df890f038 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -12,6 +12,7 @@ on: paths: - .github/workflows/build-manywheel-images.yml - manywheel/Dockerfile + - manywheel/Dockerfile_aarch64 - manywheel/Dockerfile_cxx11-abi - manywheel/build_docker.sh - 'common/*' @@ -19,6 +20,7 @@ on: paths: - .github/workflows/build-manywheel-images.yml - manywheel/Dockerfile + - manywheel/Dockerfile_aarch64 - manywheel/Dockerfile_cxx11-abi - 'common/*' - manywheel/build_docker.sh @@ -82,6 +84,21 @@ jobs: - name: Build Docker Image run: | manywheel/build_docker.sh + build-docker-cpu-aarch64: + runs-on: linux.t4g.2xlarge + env: + GPU_ARCH_TYPE: cpu-aarch64 + steps: + - name: Checkout PyTorch + uses: actions/checkout@v3 + - name: Authenticate if WITH_PUSH + run: | + if [[ "${WITH_PUSH}" == true ]]; then + echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin + fi + - name: Build Docker Image + run: | + manywheel/build_docker.sh build-docker-cpu-cxx11-abi: runs-on: ubuntu-22.04 env: diff --git a/aarch64_linux/aarch64_ci_setup.sh b/aarch64_linux/aarch64_ci_setup.sh index c7065056a..6d2d780fe 100755 --- a/aarch64_linux/aarch64_ci_setup.sh +++ b/aarch64_linux/aarch64_ci_setup.sh @@ -9,12 +9,6 @@ CONDA_EXE=/opt/conda/bin/conda PATH=/opt/conda/bin:$PATH LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH -############################################################################### -# Install OS dependent packages -############################################################################### -yum -y install epel-release -yum -y install less zstd libgomp - ############################################################################### # Install conda # disable SSL_verify due to getting "Could not find a suitable TLS CA certificate bundle, invalid path" @@ -26,19 +20,6 @@ chmod +x /mambaforge.sh /mambaforge.sh -b -p /opt/conda rm /mambaforge.sh /opt/conda/bin/conda config --set ssl_verify False -/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf pygit2 openblas +/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf pygit2 openblas ninja scons python --version conda --version - -############################################################################### -# Exec libglfortran.a hack -# -# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC. -# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get -# ubuntu's libgfortran.a which is compiled with -fPIC -############################################################################### -cd ~/ -curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb -ar x ~/libgfortran-10-dev.deb -tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ -cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/ diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py index 125cfe9fd..5d80a95e4 100755 --- a/aarch64_linux/aarch64_wheel_ci_build.py +++ b/aarch64_linux/aarch64_wheel_ci_build.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# encoding: UTF-8 import os import subprocess @@ -6,36 +7,36 @@ from typing import List -'''' -Helper for getting paths for Python -''' def list_dir(path: str) -> List[str]: - return subprocess.check_output(["ls", "-1", path]).decode().split("\n") + '''' + Helper for getting paths for Python + ''' + return subprocess.check_output(["ls", "-1", path]).decode().split("\n") -''' -Using ArmComputeLibrary for aarch64 PyTorch -''' def build_ArmComputeLibrary(git_clone_flags: str = "") -> None: + ''' + Using ArmComputeLibrary for aarch64 PyTorch + ''' print('Building Arm Compute Library') os.system("cd / && mkdir /acl") os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.05.1 {git_clone_flags}") os.system('sed -i -e \'s/"armv8.2-a"/"armv8-a"/g\' ComputeLibrary/SConscript; ' 'sed -i -e \'s/-march=armv8.2-a+fp16/-march=armv8-a/g\' ComputeLibrary/SConstruct; ' 'sed -i -e \'s/"-march=armv8.2-a"/"-march=armv8-a"/g\' ComputeLibrary/filedefs.json') - os.system(f"cd ComputeLibrary; export acl_install_dir=/acl; " \ - f"scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native build_dir=$acl_install_dir/build; " \ - f"cp -r arm_compute $acl_install_dir; " \ - f"cp -r include $acl_install_dir; " \ - f"cp -r utils $acl_install_dir; " \ - f"cp -r support $acl_install_dir; " \ - f"cp -r src $acl_install_dir; cd /") - - -''' -Complete wheel build and put in artifact location -''' + os.system("cd ComputeLibrary; export acl_install_dir=/acl; " + "scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native build_dir=$acl_install_dir/build; " + "cp -r arm_compute $acl_install_dir; " + "cp -r include $acl_install_dir; " + "cp -r utils $acl_install_dir; " + "cp -r support $acl_install_dir; " + "cp -r src $acl_install_dir; cd /") + + def complete_wheel(folder: str): + ''' + Complete wheel build and put in artifact location + ''' wheel_name = list_dir(f"/{folder}/dist")[0] if "pytorch" in folder: @@ -54,10 +55,10 @@ def complete_wheel(folder: str): return repaired_wheel_name -''' -Parse inline arguments -''' def parse_arguments(): + ''' + Parse inline arguments + ''' from argparse import ArgumentParser parser = ArgumentParser("AARCH64 wheels python CD") parser.add_argument("--debug", action="store_true") @@ -67,11 +68,10 @@ def parse_arguments(): return parser.parse_args() -''' -Entry Point -''' if __name__ == '__main__': - + ''' + Entry Point + ''' args = parse_arguments() enable_mkldnn = args.enable_mkldnn repo = Repository('/pytorch') @@ -80,15 +80,14 @@ def parse_arguments(): branch = 'master' git_clone_flags = " --depth 1 --shallow-submodules" - os.system(f"conda install -y ninja scons") print('Building PyTorch wheel') build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " - os.system(f"python setup.py clean") + os.system("python setup.py clean") if branch == 'nightly' or branch == 'master': - build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/pytorch').decode().replace('-','') - version = subprocess.check_output(['cat','version.txt'], cwd='/pytorch').decode().strip()[:-2] + build_date = subprocess.check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '') + version = subprocess.check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2] build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 " if branch.startswith("v1.") or branch.startswith("v2."): build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " @@ -96,10 +95,10 @@ def parse_arguments(): build_ArmComputeLibrary(git_clone_flags) print("build pytorch with mkldnn+acl backend") build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " \ - "ACL_ROOT_DIR=/acl " \ - "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \ - "ACL_INCLUDE_DIR=/acl/build " \ - "ACL_LIBRARY=/acl/build " + "ACL_ROOT_DIR=/acl " \ + "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \ + "ACL_INCLUDE_DIR=/acl/build " \ + "ACL_LIBRARY=/acl/build " else: print("build pytorch without mkldnn backend") diff --git a/manywheel/Dockerfile_aarch64 b/manywheel/Dockerfile_aarch64 new file mode 100644 index 000000000..abfc2fd84 --- /dev/null +++ b/manywheel/Dockerfile_aarch64 @@ -0,0 +1,86 @@ +FROM quay.io/pypa/manylinux2014_aarch64 as base + + +# Graviton needs GCC 10 for the build +ARG DEVTOOLSET_VERSION=10 + +# Language variabes +ENV LC_ALL=en_US.UTF-8 +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US.UTF-8 + +# Installed needed OS packages. This is to support all +# the binary builds (torch, vision, audio, text, data) +RUN yum -y install epel-release +RUN yum -y update +RUN yum install -y \ + autoconf \ + automake \ + bison \ + bzip2 \ + curl \ + diffutils \ + file \ + git \ + make \ + patch \ + perl \ + unzip \ + util-linux \ + wget \ + which \ + xz \ + yasm \ + less \ + zstd \ + libgomp \ + devtoolset-${DEVTOOLSET_VERSION}-gcc \ + devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ \ + devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran \ + devtoolset-${DEVTOOLSET_VERSION}-binutils + +# Ensure the expected devtoolset is used +ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH +ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH + + +# git236+ would refuse to run git commands in repos owned by other users +# Which causes version check to fail, as pytorch repo is bind-mounted into the image +# Override this behaviour by treating every folder as safe +# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327 +RUN git config --global --add safe.directory "*" + + +############################################################################### +# libglfortran.a hack +# +# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC. +# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get +# ubuntu's libgfortran.a which is compiled with -fPIC +# NOTE: Need a better way to get this library as Ubuntu's package can be removed by the vender, or changed +############################################################################### +RUN cd ~/ \ + && curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb \ + && ar x ~/libgfortran-10-dev.deb \ + && tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ \ + && cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/ + +# install cmake +RUN yum install -y cmake3 && \ + ln -s /usr/bin/cmake3 /usr/bin/cmake + +FROM base as openssl +# Install openssl (this must precede `build python` step) +# (In order to have a proper SSL module, Python is compiled +# against a recent openssl [see env vars above], which is linked +# statically. We delete openssl afterwards.) +ADD ./common/install_openssl.sh install_openssl.sh +RUN bash ./install_openssl.sh && rm install_openssl.sh +ENV SSL_CERT_FILE=/opt/_internal/certs.pem + +FROM openssl as final +# remove unncessary python versions +RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2 +RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 +RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 +RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh index 97c1f89ab..2bd720f2f 100644 --- a/manywheel/build_all_docker.sh +++ b/manywheel/build_all_docker.sh @@ -7,6 +7,8 @@ TOPDIR=$(git rev-parse --show-toplevel) GPU_ARCH_TYPE=cpu "${TOPDIR}/manywheel/build_docker.sh" MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cpu "${TOPDIR}/manywheel/build_docker.sh" +GPU_ARCH_TYPE=cpu-aarch64 "${TOPDIR}/manywheel/build_docker.sh" + GPU_ARCH_TYPE=cpu-cxx11-abi "${TOPDIR}/manywheel/build_docker.sh" for cuda_version in 12.1 11.8; do diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh index 38d043ff4..e547b4275 100755 --- a/manywheel/build_docker.sh +++ b/manywheel/build_docker.sh @@ -20,6 +20,14 @@ case ${GPU_ARCH_TYPE} in GPU_IMAGE=centos:7 DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9" ;; + cpu-aarch64) + TARGET=final + DOCKER_TAG=cpu-aarch64 + LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-cpu-aarch64 + GPU_IMAGE=arm64v8/centos:7 + DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10" + MANY_LINUX_VERSION="aarch64" + ;; cpu-cxx11-abi) TARGET=final DOCKER_TAG=cpu-cxx11-abi