diff --git a/.github/workflows/build-conda-images.yml b/.github/workflows/build-conda-images.yml index 1d388c4fa..533aea817 100644 --- a/.github/workflows/build-conda-images.yml +++ b/.github/workflows/build-conda-images.yml @@ -26,7 +26,7 @@ jobs: runs-on: linux.2xlarge strategy: matrix: - cuda_version: ["10.2", "11.3", "11.5", "11.6", "11.7", "cpu"] + cuda_version: ["10.2", "11.3", "11.5", "11.6", "11.7", "11.8", "cpu"] env: CUDA_VERSION: ${{ matrix.cuda_version }} steps: diff --git a/common/install_cuda.sh b/common/install_cuda.sh index 77d190011..4e9a41e9d 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -98,7 +98,7 @@ function install_116 { } function install_117 { - echo "Installing CUDA 11.7 and CuDNN 8.3" + echo "Installing CUDA 11.7 and CuDNN 8.5" rm -rf /usr/local/cuda-11.7 /usr/local/cuda # install CUDA 11.7.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run @@ -118,6 +118,27 @@ function install_117 { ldconfig } +function install_118 { + echo "Installing CUDA 11.8 and cuDNN 8.5" + rm -rf /usr/local/cuda-11.8 /usr/local/cuda + # install CUDA 11.8.0 in the same container + wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run + chmod +x cuda_11.8.0_520.61.05_linux.run + ./cuda_11.8.0_520.61.05_linux.run --toolkit --silent + rm -f cuda_11.8.0_520.61.05_linux.run + rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.8 /usr/local/cuda + + # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement + mkdir tmp_cudnn && cd tmp_cudnn + wget -q https://ossci-linux.s3.amazonaws.com/cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz + tar xf cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz + cp -a cudnn-linux-x86_64-8.5.0.96_cuda11-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-8.5.0.96_cuda11-archive/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_cudnn + ldconfig +} + function prune_102 { echo "Pruning CUDA 10.2 and CuDNN" ##################################################################################### @@ -275,6 +296,37 @@ function prune_117 { rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.2.0 $CUDA_BASE/nsight-systems-2022.1.3 } +function prune_118 { + echo "Pruning CUDA 11.8 and cuDNN" + ##################################################################################### + # CUDA 11.8 prune static libs + ##################################################################################### + export NVPRUNE="/usr/local/cuda-11.8/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-11.8/lib64" + + export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" + export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" + + if [[ -n "$OVERRIDE_GENCODE" ]]; then + export GENCODE=$OVERRIDE_GENCODE + fi + + # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included) + ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ + | xargs -I {} bash -c \ + "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" + + # prune CuDNN and CuBLAS + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a + + ##################################################################################### + # CUDA 11.8 prune visual tools + ##################################################################################### + export CUDA_BASE="/usr/local/cuda-11.8/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/ +} + # idiomatic parameter and option handling in sh while test $# -gt 0 do @@ -289,6 +341,8 @@ do ;; 11.7) install_117; prune_117 ;; + 11.8) install_118; prune_118 + ;; *) echo "bad argument $1"; exit 1 ;; esac diff --git a/conda/Dockerfile b/conda/Dockerfile index f4f4c834a..dbbb85c1c 100644 --- a/conda/Dockerfile +++ b/conda/Dockerfile @@ -64,6 +64,10 @@ FROM cuda as cuda11.7 RUN bash ./install_cuda.sh 11.7 ENV DESIRED_CUDA=11.7 +FROM cuda as cuda11.8 +RUN bash ./install_cuda.sh 11.8 +ENV DESIRED_CUDA=11.8 + # Install MNIST test data FROM base as mnist ADD ./common/install_mnist.sh install_mnist.sh @@ -75,6 +79,7 @@ COPY --from=cuda11.3 /usr/local/cuda-11.3 /usr/local/cuda-11.3 COPY --from=cuda11.5 /usr/local/cuda-11.5 /usr/local/cuda-11.5 COPY --from=cuda11.6 /usr/local/cuda-11.6 /usr/local/cuda-11.6 COPY --from=cuda11.7 /usr/local/cuda-11.7 /usr/local/cuda-11.7 +COPY --from=cuda11.8 /usr/local/cuda-11.8 /usr/local/cuda-11.8 FROM ${BASE_TARGET} as final # Install LLVM diff --git a/conda/build_all_docker.sh b/conda/build_all_docker.sh index bc4397675..c91951b40 100755 --- a/conda/build_all_docker.sh +++ b/conda/build_all_docker.sh @@ -4,6 +4,6 @@ set -eou pipefail TOPDIR=$(git rev-parse --show-toplevel) -for CUDA_VERSION in 11.7 11.6 11.5 11.3 10.2 cpu; do +for CUDA_VERSION in 11.8 11.7 11.6 11.5 11.3 10.2 cpu; do CUDA_VERSION="${CUDA_VERSION}" conda/build_docker.sh done