Skip to content

Commit

Permalink
One-step ROCm manywheel/libtorch docker build (#1418)
Browse files Browse the repository at this point in the history
* Use rocm/dev-centos-7:<rocm_version>-complete image with full ROCm install

* Remove ROCm install step and reinstate magma and MIOpen build steps

* Install full package for MIOpen, including headers and db files

Retained some of the disk-cleanup-related code from cb0912c

* Use rocm/dev-ubuntu-20.04:<rocm_version>-complete image with full ROCm install

* Remove ROCm install and reinstate magma build from source

* Use --offload-arch instead of --amdgpu-target to silence warnings

* Use beefier runner instance for ROCm docker builds

* Typo

* Simplify ROCm targets
  • Loading branch information
jithunnair-amd committed Jun 8, 2023
1 parent 52541e8 commit 3f9562d
Show file tree
Hide file tree
Showing 8 changed files with 21 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-libtorch-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
run: |
libtorch/build_docker.sh
build-docker-rocm:
runs-on: ubuntu-22.04
runs-on: linux.12xlarge
strategy:
matrix:
rocm_version: ["5.4.2", "5.5"]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-manywheel-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
run: |
manywheel/build_docker.sh
build-docker-rocm:
runs-on: ubuntu-22.04
runs-on: linux.12xlarge
strategy:
matrix:
rocm_version: ["5.4.2", "5.5"]
Expand Down
25 changes: 4 additions & 21 deletions common/install_miopen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ if [[ $ROCM_INT -lt 40001 ]]; then
exit 0
fi

# CHANGED: Do not uninstall. To avoid out of disk space issues, we will copy lib over existing.
# Uninstall existing package, to avoid errors during later yum install indicating packages did not change.
#yum remove -y miopen-hip
yum remove -y miopen-hip

# Function to retry functions that sometimes timeout or have flaky failures
retry () {
Expand Down Expand Up @@ -77,24 +75,14 @@ elif [[ $ROCM_INT -ge 50100 ]] && [[ $ROCM_INT -lt 50200 ]]; then
elif [[ $ROCM_INT -ge 50000 ]] && [[ $ROCM_INT -lt 50100 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
MIOPEN_BRANCH="release/rocm-rel-5.0-staging"
elif [[ $ROCM_INT -ge 40500 ]] && [[ $ROCM_INT -lt 50000 ]]; then
MIOPEN_CMAKE_COMMON_FLAGS="${MIOPEN_CMAKE_COMMON_FLAGS} -DMIOPEN_USE_HIP_KERNELS=Off -DMIOPEN_DEFAULT_FIND_MODE=Normal"
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
MIOPEN_BRANCH="release/rocm-rel-4.5-staging"
elif [[ $ROCM_INT -ge 40300 ]] && [[ $ROCM_INT -lt 40500 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx900_64;gfx906_60;gfx906_64;gfx90878;gfx1030_36"
MIOPEN_BRANCH="release/rocm-rel-4.3-staging"
elif [[ $ROCM_INT -ge 40200 ]] && [[ $ROCM_INT -lt 40300 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx803_36;gfx803_64;gfx900_56;gfx900_64;gfx906_60;gfx906_64;gfx90878"
MIOPEN_BRANCH="rocm-4.2.x-staging"
else
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
exit 1
fi

git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH}
pushd MIOpen
# remove .git to save disk space ince CI runner was running out
# remove .git to save disk space since CI runner was running out
rm -rf .git
# Don't build MLIR to save docker build time
# since we are disabling MLIR backend for MIOpen anyway
Expand Down Expand Up @@ -122,18 +110,13 @@ PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}"
make MIOpen -j $(nproc)

# CHANGED: Do not build package.
# Build MIOpen package
#make -j $(nproc) package
make -j $(nproc) package

# clean up since CI runner was running out of disk space
rm -rf /usr/local/cget

# CHANGED: Do not install package, just copy lib over existing.
#yum install -y miopen-*.rpm
dest=$(ls ${ROCM_INSTALL_PATH}/lib/libMIOpen.so.1.0.*)
rm -f ${dest}
cp lib/libMIOpen.so.1.0 ${dest}
yum install -y miopen-*.rpm

popd
rm -rf MIOpen
2 changes: 1 addition & 1 deletion common/install_rocm_magma.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ else
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
fi
for arch in $amdgpu_targets; do
echo "DEVCCFLAGS += --amdgpu-target=$arch" >> make.inc
echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
done
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
Expand Down
14 changes: 4 additions & 10 deletions libtorch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,18 @@ FROM cpu as rocm
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
ENV MKLROOT /opt/intel
ADD ./common/install_rocm.sh install_rocm.sh
# No need to install ROCm as base docker image should have full ROCm install
#ADD ./common/install_rocm.sh install_rocm.sh
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
#ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
# gfortran and python needed for building magma from source for ROCm
RUN apt-get update -y && \
apt-get install gfortran -y && \
apt-get install python -y && \
apt-get clean

FROM rocm as rocm5.4.2
RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh

FROM rocm as rocm5.5
RUN ROCM_VERSION=5.5 bash ./install_rocm.sh && rm install_rocm.sh
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh

FROM ${BASE_TARGET} as final
# Install LLVM
Expand Down
4 changes: 2 additions & 2 deletions libtorch/build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ case ${GPU_ARCH_TYPE} in
DOCKER_GPU_BUILD_ARG=""
;;
rocm)
BASE_TARGET=rocm${GPU_ARCH_VERSION}
BASE_TARGET=rocm
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-magma
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
Expand Down
15 changes: 7 additions & 8 deletions manywheel/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,14 @@ FROM cpu_final as rocm_final
ARG ROCM_VERSION=3.7
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
# Install ROCm
ADD ./common/install_rocm.sh install_rocm.sh
RUN ROCM_VERSION=${ROCM_VERSION} bash ./install_rocm.sh && rm install_rocm.sh
# No need to install ROCm as base docker image should have full ROCm install
#ADD ./common/install_rocm.sh install_rocm.sh
#RUN ROCM_VERSION=${ROCM_VERSION} bash ./install_rocm.sh && rm install_rocm.sh
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
# cmake3 is needed for the MIOpen build
RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3
### The following is now performed beforehand in a new GPU_IMAGE with magma and miopen preinstalled
#ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
#ADD ./common/install_miopen.sh install_miopen.sh
#RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
ADD ./common/install_miopen.sh install_miopen.sh
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
2 changes: 1 addition & 1 deletion manywheel/build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ case ${GPU_ARCH_TYPE} in
TARGET=rocm_final
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-rocm:${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-magma-miopen-staging
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
Expand Down

0 comments on commit 3f9562d

Please sign in to comment.