From 01b8210c12417b908f5783154ac1805b62a0bd48 Mon Sep 17 00:00:00 2001 From: Andrey Talman Date: Mon, 17 Jun 2024 09:19:57 -0400 Subject: [PATCH] Python 313 work - rebase on main (#1868) * Remove triton constraint for py312 (#1846) * Cache OpenBLAS to docker image for SBSA builds (#1842) * apply openblas cache for cpu-aarch64 * reapply for cuda-aarch64 * [MacOS] Don't build wheel while building libtorch Not sure why this was ever done twice * Allow validate doker images to be called from different workflow (#1850) * Allow validate doker images to be called from different workflow * Revert "[MacOS] Don't build wheel while building libtorch" This reverts commit d88495a830ea56fad85e71affc44b29da319bf37. * [MacOS] Don't build libtorch twice (take 2) By not invoking `tools/build_libtorch.py` as as it's not done on Linux * [MacOs][LibTorch] Copy libomp.dylib into libtorch package * Update cudnn from v8 to v9 across CUDA versions and x86/arm (#1847) * Update cudnn to v9.1.0.70 for cuda11.8, cuda12.1, and cuda12.4 * Add CUDNN_VERSION variable * Remove 2 spaces for install_cu124 * trivial fix * Fix DEPS_LIST and DEPS_SONAME for x86 Update cudnn to v9 for arm cuda binary as well * libcudnn_adv_infer/libcudnn_adv_train becomes libcudnn_adv * Change DEPS due to cudnn v9 libraries name changes (and additions) * Fix lint * Add missing changes to cu121/cu124 * Change OpenSSL URL (#1854) * Change OpenSSL URL * Change to use openssl URL (but no longer ftp!) * Update build-manywheel-images.yml - Add a note about manylinux_2_28 state * Revert "Update cudnn from v8 to v9 across CUDA versions and x86/arm" (#1855) This reverts commit 5783bcca9b058422ef8cc947df29f53df1b3eedd. * Don't run torch.compile on runtime images in docker validations (#1858) * Don't run torch.compile on runtime images * test * Don't run torch.compile on runtime images in docker validations * Update cudnn from v8 to v9 across CUDA versions and x86/arm (#1857) * Update cudnn to v9.1.0.70 for cuda11.8, cuda12.1, and cuda12.4 * Add CUDNN_VERSION variable * Remove 2 spaces for install_cu124 * trivial fix * Fix DEPS_LIST and DEPS_SONAME for x86 Update cudnn to v9 for arm cuda binary as well * libcudnn_adv_infer/libcudnn_adv_train becomes libcudnn_adv * Change DEPS due to cudnn v9 libraries name changes (and additions) * Fix lint * Add missing changes to cu121/cu124 * Fix aarch64 cuda typos * Update validate-docker-images.yml - disable runtime error check for now * Update validate-docker-images.yml - use validation_runner rather then hardcoded one * Update validate-docker-images.yml - fix MATRIX_GPU_ARCH_TYPE setting for cpu only workflows * [aarch64 cuda cudnn] Add RUNPATH to libcudnn_graph.so.9 (#1859) * Add executorch to pypi prep, promotion and validation scripts (#1860) * Add AOTriton install step for ROCm manylinux images (#1862) * Add AOTriton install step for ROCm * No common_utils.sh needed * temporary disable runtime error check * Add python 3.13 builder (#1845) --------- Co-authored-by: Ting Lu <92425201+tinglvv@users.noreply.github.com> Co-authored-by: Nikita Shulga Co-authored-by: Wei Wang <143543872+nWEIdia@users.noreply.github.com> Co-authored-by: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> --- .github/scripts/validate_binaries.sh | 2 +- .github/workflows/build-manywheel-images.yml | 1 + ..._images.yml => validate-docker-images.yml} | 43 +++++++++++-- aarch64_linux/aarch64_wheel_ci_build.py | 57 ++++-------------- analytics/validate_pypi_staging.py | 15 +++-- common/aotriton_version.txt | 5 ++ common/install_aotriton.sh | 21 +++++++ common/install_cuda.sh | 32 +++++----- common/install_cuda_aarch64.sh | 10 ++-- common/install_openblas.sh | 21 +++++++ conda/build_pytorch.sh | 4 +- conda/pytorch-nightly/build.sh | 6 +- libtorch/Dockerfile | 6 ++ manywheel/Dockerfile | 6 ++ manywheel/Dockerfile_aarch64 | 7 +++ manywheel/Dockerfile_cuda_aarch64 | 9 ++- manywheel/build_cuda.sh | 60 ++++++++++--------- manywheel/build_scripts/build_utils.sh | 2 +- release/pypi/promote_pypi_to_production.sh | 6 +- release/pypi/promote_pypi_to_staging.sh | 9 ++- release/release_versions.sh | 7 ++- test/smoke_test/smoke_test.py | 13 +++- wheel/build_wheel.sh | 7 +-- windows/internal/cuda_install.bat | 6 +- 24 files changed, 224 insertions(+), 131 deletions(-) rename .github/workflows/{validate_docker_images.yml => validate-docker-images.yml} (54%) create mode 100644 common/aotriton_version.txt create mode 100644 common/install_aotriton.sh create mode 100644 common/install_openblas.sh diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh index bf5c15690..3d367e399 100755 --- a/.github/scripts/validate_binaries.sh +++ b/.github/scripts/validate_binaries.sh @@ -62,7 +62,7 @@ else if [[ ${TARGET_OS} == 'windows' ]]; then python ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} else - python3 ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} + python3 ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check "disabled" fi if [[ ${TARGET_OS} == 'macos-arm64' ]]; then diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index 36b21cd9b..243e302f4 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -60,6 +60,7 @@ jobs: - name: Build Docker Image run: | manywheel/build_docker.sh + # NOTE: manylinux_2_28 are still experimental, see https://github.com/pytorch/pytorch/issues/123649 build-docker-cuda-manylinux_2_28: runs-on: linux.12xlarge strategy: diff --git a/.github/workflows/validate_docker_images.yml b/.github/workflows/validate-docker-images.yml similarity index 54% rename from .github/workflows/validate_docker_images.yml rename to .github/workflows/validate-docker-images.yml index d82af1a26..02fb55e73 100644 --- a/.github/workflows/validate_docker_images.yml +++ b/.github/workflows/validate-docker-images.yml @@ -1,5 +1,22 @@ -name: Validate Docker Images (with Matrix Generation) +name: Validate Nightly Docker Images on: + workflow_call: + inputs: + channel: + description: 'PyTorch channel to use (nightly, test, release, all)' + required: true + type: string + default: 'nightly' + generate_dockerhub_images: + description: 'Generate Docker Hub images (strip ghcr.io/ prefix for release)' + default: false + required: false + type: boolean + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string workflow_dispatch: inputs: channel: @@ -15,8 +32,13 @@ on: description: 'Generate Docker Hub images (strip ghcr.io/ prefix for release)' default: false required: false - type: boolean - + type: boolean + ref: + description: 'Reference to checkout, defaults to empty' + default: "" + required: false + type: string + jobs: generate-matrix: uses: pytorch/test-infra/.github/workflows/generate_docker_release_matrix.yml@main @@ -31,7 +53,7 @@ jobs: fail-fast: false uses: pytorch/test-infra/.github/workflows/linux_job.yml@main with: - runner: linux.g5.4xlarge.nvidia.gpu + runner: ${{ matrix.validation_runner }} repository: "pytorch/builder" ref: ${{ inputs.ref || github.ref }} job-name: cuda${{ matrix.cuda }}-cudnn${{ matrix.cudnn_version }}-${{ matrix.image_type }} @@ -40,7 +62,16 @@ jobs: timeout: 180 script: | set -ex - export MATRIX_GPU_ARCH_TYPE="cuda" + export MATRIX_GPU_ARCH_VERSION="${{ matrix.cuda }}" + export MATRIX_IMAGE_TYPE="${{ matrix.image_type }}" export TARGET_OS="linux" - python test/smoke_test/smoke_test.py --package torchonly --runtime-error-check enabled + TORCH_COMPILE_CHECK="--torch-compile-check enabled" + if [[ ${MATRIX_IMAGE_TYPE} == "runtime" ]]; then + TORCH_COMPILE_CHECK="--torch-compile-check disabled" + fi + export MATRIX_GPU_ARCH_TYPE="cuda" + if [[ ${MATRIX_GPU_ARCH_VERSION} == "cpu" ]]; then + export MATRIX_GPU_ARCH_TYPE="cpu" + fi + python test/smoke_test/smoke_test.py --package torchonly --runtime-error-check disabled ${TORCH_COMPILE_CHECK} diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py index 458de5464..151a2fe7e 100755 --- a/aarch64_linux/aarch64_wheel_ci_build.py +++ b/aarch64_linux/aarch64_wheel_ci_build.py @@ -14,44 +14,6 @@ def list_dir(path: str) -> List[str]: """ return check_output(["ls", "-1", path]).decode().split("\n") - -def build_OpenBLAS() -> None: - ''' - Building OpenBLAS, because the package in many linux is old - ''' - print('Building OpenBLAS') - openblas_build_flags = [ - "NUM_THREADS=128", - "USE_OPENMP=1", - "NO_SHARED=0", - "DYNAMIC_ARCH=1", - "TARGET=ARMV8", - "CFLAGS=-O3", - ] - openblas_checkout_dir = "OpenBLAS" - - check_call( - [ - "git", - "clone", - "https://github.com/OpenMathLib/OpenBLAS.git", - "-b", - "v0.3.25", - "--depth", - "1", - "--shallow-submodules", - ] - ) - - check_call(["make", "-j8"] - + openblas_build_flags, - cwd=openblas_checkout_dir) - check_call(["make", "-j8"] - + openblas_build_flags - + ["install"], - cwd=openblas_checkout_dir) - - def build_ArmComputeLibrary() -> None: """ Using ArmComputeLibrary for aarch64 PyTorch @@ -103,7 +65,7 @@ def update_wheel(wheel_path) -> None: os.system(f"unzip {wheel_path} -d {folder}/tmp") libs_to_copy = [ "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", - "/usr/local/cuda/lib64/libcudnn.so.8", + "/usr/local/cuda/lib64/libcudnn.so.9", "/usr/local/cuda/lib64/libcublas.so.12", "/usr/local/cuda/lib64/libcublasLt.so.12", "/usr/local/cuda/lib64/libcudart.so.12", @@ -116,12 +78,13 @@ def update_wheel(wheel_path) -> None: "/usr/local/cuda/lib64/libnvJitLink.so.12", "/usr/local/cuda/lib64/libnvrtc.so.12", "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4", - "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8", - "/usr/local/cuda/lib64/libcudnn_adv_train.so.8", - "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8", - "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8", - "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8", - "/usr/local/cuda/lib64/libcudnn_ops_train.so.8", + "/usr/local/cuda/lib64/libcudnn_adv.so.9", + "/usr/local/cuda/lib64/libcudnn_cnn.so.9", + "/usr/local/cuda/lib64/libcudnn_graph.so.9", + "/usr/local/cuda/lib64/libcudnn_ops.so.9", + "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9", + "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9", + "/usr/local/cuda/lib64/libcudnn_heuristic.so.9", "/opt/conda/envs/aarch64_env/lib/libgomp.so.1", "/opt/OpenBLAS/lib/libopenblas.so.0", "/acl/build/libarm_compute.so", @@ -134,6 +97,9 @@ def update_wheel(wheel_path) -> None: os.system( f"cd {folder}/tmp/torch/lib/; patchelf --set-rpath '$ORIGIN' {folder}/tmp/torch/lib/libtorch_cuda.so" ) + os.system( + f"cd {folder}/tmp/torch/lib/; patchelf --set-rpath '$ORIGIN' {folder}/tmp/torch/lib/libcudnn_graph.so.9" + ) os.mkdir(f"{folder}/cuda_wheel") os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *") shutil.move( @@ -227,7 +193,6 @@ def parse_arguments(): elif branch.startswith(("v1.", "v2.")): build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " - build_OpenBLAS() if enable_mkldnn: build_ArmComputeLibrary() print("build pytorch with mkldnn+acl backend") diff --git a/analytics/validate_pypi_staging.py b/analytics/validate_pypi_staging.py index 9314aa204..5321313df 100644 --- a/analytics/validate_pypi_staging.py +++ b/analytics/validate_pypi_staging.py @@ -15,13 +15,20 @@ "win_amd64", "macosx_11_0_arm64", ] -PYTHON_VERSIONS = ["cp38", "cp39", "cp310", "cp311", "cp312"] +PYTHON_VERSIONS = [ + "cp38", + "cp39", + "cp310", + "cp311", + "cp312" + ] S3_PYPI_STAGING = "pytorch-backup" PACKAGE_RELEASES = { - "torch": "2.3.0", - "torchvision": "0.18.0", - "torchaudio": "2.3.0", + "torch": "2.3.1", + "torchvision": "0.18.1", + "torchaudio": "2.3.1", "torchtext": "0.18.0", + "executorch": "0.2.1" } PATTERN_V = "Version:" diff --git a/common/aotriton_version.txt b/common/aotriton_version.txt new file mode 100644 index 000000000..d13e9d756 --- /dev/null +++ b/common/aotriton_version.txt @@ -0,0 +1,5 @@ +0.6b +manylinux_2_17 +rocm6 +04b5df8c8123f90cba3ede7e971e6fbc6040d506 +3db6ecbc915893ff967abd6e1b43bd5f54949868873be60dc802086c3863e648 diff --git a/common/install_aotriton.sh b/common/install_aotriton.sh new file mode 100644 index 000000000..0241548b8 --- /dev/null +++ b/common/install_aotriton.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -ex + +TARBALL='aotriton.tar.bz2' +# This read command alwasy returns with exit code 1 +read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true +ARCH=$(uname -m) +AOTRITON_INSTALL_PREFIX="$1" +AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}.tar.bz2" + +cd "${AOTRITON_INSTALL_PREFIX}" +# Must use -L to follow redirects +curl -L --retry 3 -o "${TARBALL}" "${AOTRITON_URL}" +ACTUAL_SHA256=$(sha256sum "${TARBALL}" | cut -d " " -f 1) +if [ "${SHA256}" != "${ACTUAL_SHA256}" ]; then + echo -n "Error: The SHA256 of downloaded tarball is ${ACTUAL_SHA256}," + echo " which does not match the expected value ${SHA256}." + exit +fi +tar xf "${TARBALL}" && rm -rf "${TARBALL}" diff --git a/common/install_cuda.sh b/common/install_cuda.sh index ad06342be..d79c8c1e7 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -2,6 +2,8 @@ set -ex +CUDNN_VERSION=9.1.0.70 + function install_cusparselt_040 { # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html mkdir tmp_cusparselt && pushd tmp_cusparselt @@ -25,7 +27,7 @@ function install_cusparselt_052 { } function install_118 { - echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.4.0" + echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL 2.15 and cuSparseLt-0.4.0" rm -rf /usr/local/cuda-11.8 /usr/local/cuda # install CUDA 11.8.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run @@ -36,10 +38,10 @@ function install_118 { # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz - tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz - cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/ + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/ cd .. rm -rf tmp_cudnn @@ -58,7 +60,7 @@ function install_118 { } function install_121 { - echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2" + echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL 2.20.5 and cuSparseLt-0.5.2" rm -rf /usr/local/cuda-12.1 /usr/local/cuda # install CUDA 12.1.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run @@ -69,10 +71,10 @@ function install_121 { # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz - tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz - cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/ + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ cd .. rm -rf tmp_cudnn @@ -91,7 +93,7 @@ function install_121 { } function install_124 { - echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2" + echo "Installing CUDA 12.4 and cuDNN ${CUDNN_VERSION} and NCCL 2.20.5 and cuSparseLt-0.5.2" rm -rf /usr/local/cuda-12.4 /usr/local/cuda # install CUDA 12.4.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run @@ -102,10 +104,10 @@ function install_124 { # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz - tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz - cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/ + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ cd .. rm -rf tmp_cudnn diff --git a/common/install_cuda_aarch64.sh b/common/install_cuda_aarch64.sh index ba97385bd..51fd8c1f2 100644 --- a/common/install_cuda_aarch64.sh +++ b/common/install_cuda_aarch64.sh @@ -14,7 +14,7 @@ function install_cusparselt_052 { } function install_124 { - echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2" + echo "Installing CUDA 12.4 and cuDNN 9.1 and NCCL 2.20.5 and cuSparseLt-0.5.2" rm -rf /usr/local/cuda-12.4 /usr/local/cuda # install CUDA 12.4.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux_sbsa.run @@ -25,10 +25,10 @@ function install_124 { # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz - tar xf cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz - cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/ + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz -O cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz + tar xf cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz + cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/lib/* /usr/local/cuda/lib64/ cd .. rm -rf tmp_cudnn diff --git a/common/install_openblas.sh b/common/install_openblas.sh new file mode 100644 index 000000000..e2deec811 --- /dev/null +++ b/common/install_openblas.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -ex + +cd / +git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.25 --depth 1 --shallow-submodules + + +OPENBLAS_BUILD_FLAGS=" +NUM_THREADS=128 +USE_OPENMP=1 +NO_SHARED=0 +DYNAMIC_ARCH=1 +TARGET=ARMV8 +CFLAGS=-O3 +" + +OPENBLAS_CHECKOUT_DIR="OpenBLAS" + +make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR} +make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR} \ No newline at end of file diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh index 56bb7654c..9f0649c50 100755 --- a/conda/build_pytorch.sh +++ b/conda/build_pytorch.sh @@ -287,9 +287,9 @@ else TRITON_VERSION=$(cat $pytorch_rootdir/.ci/docker/triton_version.txt) if [[ -n "$OVERRIDE_PACKAGE_VERSION" && "$OVERRIDE_PACKAGE_VERSION" =~ .*dev.* ]]; then TRITON_SHORTHASH=$(cut -c1-10 $pytorch_rootdir/.github/ci_commit_pins/triton.txt) - export CONDA_TRITON_CONSTRAINT=" - torchtriton==${TRITON_VERSION}+${TRITON_SHORTHASH} # [py < 312]" + export CONDA_TRITON_CONSTRAINT=" - torchtriton==${TRITON_VERSION}+${TRITON_SHORTHASH} # [py < 313]" else - export CONDA_TRITON_CONSTRAINT=" - torchtriton==${TRITON_VERSION} # [py < 312]" + export CONDA_TRITON_CONSTRAINT=" - torchtriton==${TRITON_VERSION} # [py < 313]" fi fi diff --git a/conda/pytorch-nightly/build.sh b/conda/pytorch-nightly/build.sh index 63a8fc501..eee3a3d86 100755 --- a/conda/pytorch-nightly/build.sh +++ b/conda/pytorch-nightly/build.sh @@ -59,12 +59,12 @@ if [[ -n "$build_with_cuda" ]]; then if [[ $CUDA_VERSION == 11.8* ]]; then TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;3.7+PTX;9.0" #for cuda 11.8 include all dynamic loading libraries - DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8 /usr/local/cuda/lib64/libcusparseLt.so.0) + DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.9 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8 /usr/local/cuda/lib64/libcusparseLt.so.0) elif [[ $CUDA_VERSION == 12.1* || $CUDA_VERSION == 12.4* ]]; then # cuda 12 does not support sm_3x TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;9.0" - # for cuda 12.1 (12.4) we use cudnn 8.8 (8.9) and include all dynamic loading libraries - DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12 /usr/local/cuda/lib64/libcusparseLt.so.0) + # for cuda 12.1 (12.4) we use cudnn 9.1 and include all dynamic loading libraries + DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.9 /usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12 /usr/local/cuda/lib64/libcusparseLt.so.0) fi if [[ -n "$OVERRIDE_TORCH_CUDA_ARCH_LIST" ]]; then TORCH_CUDA_ARCH_LIST="$OVERRIDE_TORCH_CUDA_ARCH_LIST" diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile index 27c6db287..c10449fc1 100644 --- a/libtorch/Dockerfile +++ b/libtorch/Dockerfile @@ -81,6 +81,12 @@ RUN apt-get update -y && \ RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh +# Install AOTriton +COPY ./common/aotriton_version.txt aotriton_version.txt +COPY ./common/install_aotriton.sh install_aotriton.sh +RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt +ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton + FROM ${BASE_TARGET} as final COPY --from=openssl /opt/openssl /opt/openssl # Install patchelf diff --git a/manywheel/Dockerfile b/manywheel/Dockerfile index df8f5acd8..2c805baa3 100644 --- a/manywheel/Dockerfile +++ b/manywheel/Dockerfile @@ -173,3 +173,9 @@ ADD ./common/install_rocm_magma.sh install_rocm_magma.sh RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh ADD ./common/install_miopen.sh install_miopen.sh RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh + +# Install AOTriton +COPY ./common/aotriton_version.txt aotriton_version.txt +COPY ./common/install_aotriton.sh install_aotriton.sh +RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt +ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton diff --git a/manywheel/Dockerfile_aarch64 b/manywheel/Dockerfile_aarch64 index abfc2fd84..b0716d158 100644 --- a/manywheel/Dockerfile_aarch64 +++ b/manywheel/Dockerfile_aarch64 @@ -78,9 +78,16 @@ ADD ./common/install_openssl.sh install_openssl.sh RUN bash ./install_openssl.sh && rm install_openssl.sh ENV SSL_CERT_FILE=/opt/_internal/certs.pem +FROM base as openblas +# Install openblas +ADD ./common/install_openblas.sh install_openblas.sh +RUN bash ./install_openblas.sh && rm install_openblas.sh + FROM openssl as final # remove unncessary python versions RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2 RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 +COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/ +ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH \ No newline at end of file diff --git a/manywheel/Dockerfile_cuda_aarch64 b/manywheel/Dockerfile_cuda_aarch64 index 74c60b299..d243b06b8 100644 --- a/manywheel/Dockerfile_cuda_aarch64 +++ b/manywheel/Dockerfile_cuda_aarch64 @@ -74,10 +74,17 @@ ARG BASE_CUDA_VERSION ADD ./common/install_magma.sh install_magma.sh RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh +FROM base as openblas +# Install openblas +ADD ./common/install_openblas.sh install_openblas.sh +RUN bash ./install_openblas.sh && rm install_openblas.sh + FROM final as cuda_final ARG BASE_CUDA_VERSION RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION} COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION} COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION} +COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/ RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda -ENV PATH=/usr/local/cuda/bin:$PATH \ No newline at end of file +ENV PATH=/usr/local/cuda/bin:$PATH +ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH \ No newline at end of file diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index ffc280e42..1d8e8b295 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -149,13 +149,14 @@ if [[ $CUDA_VERSION == "12.1" || $CUDA_VERSION == "12.4" ]]; then if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then echo "Bundling with cudnn and cublas." DEPS_LIST+=( - "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" - "/usr/local/cuda/lib64/libcudnn.so.8" + "/usr/local/cuda/lib64/libcudnn_adv.so.9" + "/usr/local/cuda/lib64/libcudnn_cnn.so.9" + "/usr/local/cuda/lib64/libcudnn_graph.so.9" + "/usr/local/cuda/lib64/libcudnn_ops.so.9" + "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9" + "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9" + "/usr/local/cuda/lib64/libcudnn_heuristic.so.9" + "/usr/local/cuda/lib64/libcudnn.so.9" "/usr/local/cuda/lib64/libcublas.so.12" "/usr/local/cuda/lib64/libcublasLt.so.12" "/usr/local/cuda/lib64/libcudart.so.12" @@ -164,13 +165,14 @@ if [[ $CUDA_VERSION == "12.1" || $CUDA_VERSION == "12.4" ]]; then "/usr/local/cuda/lib64/libnvrtc-builtins.so" ) DEPS_SONAME+=( - "libcudnn_adv_infer.so.8" - "libcudnn_adv_train.so.8" - "libcudnn_cnn_infer.so.8" - "libcudnn_cnn_train.so.8" - "libcudnn_ops_infer.so.8" - "libcudnn_ops_train.so.8" - "libcudnn.so.8" + "libcudnn_adv.so.9" + "libcudnn_cnn.so.9" + "libcudnn_graph.so.9" + "libcudnn_ops.so.9" + "libcudnn_engines_runtime_compiled.so.9" + "libcudnn_engines_precompiled.so.9" + "libcudnn_heuristic.so.9" + "libcudnn.so.9" "libcublas.so.12" "libcublasLt.so.12" "libcudart.so.12" @@ -215,13 +217,14 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then echo "Bundling with cudnn and cublas." DEPS_LIST+=( - "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" - "/usr/local/cuda/lib64/libcudnn.so.8" + "/usr/local/cuda/lib64/libcudnn_adv.so.9" + "/usr/local/cuda/lib64/libcudnn_cnn.so.9" + "/usr/local/cuda/lib64/libcudnn_graph.so.9" + "/usr/local/cuda/lib64/libcudnn_ops.so.9" + "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9" + "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9" + "/usr/local/cuda/lib64/libcudnn_heuristic.so.9" + "/usr/local/cuda/lib64/libcudnn.so.9" "/usr/local/cuda/lib64/libcublas.so.11" "/usr/local/cuda/lib64/libcublasLt.so.11" "/usr/local/cuda/lib64/libcudart.so.11.0" @@ -230,13 +233,14 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8" ) DEPS_SONAME+=( - "libcudnn_adv_infer.so.8" - "libcudnn_adv_train.so.8" - "libcudnn_cnn_infer.so.8" - "libcudnn_cnn_train.so.8" - "libcudnn_ops_infer.so.8" - "libcudnn_ops_train.so.8" - "libcudnn.so.8" + "libcudnn_adv.so.9" + "libcudnn_cnn.so.9" + "libcudnn_graph.so.9" + "libcudnn_ops.so.9" + "libcudnn_engines_runtime_compiled.so.9" + "libcudnn_engines_precompiled.so.9" + "libcudnn_heuristic.so.9" + "libcudnn.so.9" "libcublas.so.11" "libcublasLt.so.11" "libcudart.so.11.0" diff --git a/manywheel/build_scripts/build_utils.sh b/manywheel/build_scripts/build_utils.sh index 7f607ed44..548f36c6e 100755 --- a/manywheel/build_scripts/build_utils.sh +++ b/manywheel/build_scripts/build_utils.sh @@ -1,7 +1,7 @@ #!/bin/bash # Helper utilities for build -OPENSSL_DOWNLOAD_URL=https://ftp.openssl.org/source/old/1.1.1/ +OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/ CURL_DOWNLOAD_URL=https://curl.askapache.com/download AUTOCONF_DOWNLOAD_URL=https://ftp.gnu.org/gnu/autoconf diff --git a/release/pypi/promote_pypi_to_production.sh b/release/pypi/promote_pypi_to_production.sh index a14fbb637..728bd3853 100644 --- a/release/pypi/promote_pypi_to_production.sh +++ b/release/pypi/promote_pypi_to_production.sh @@ -33,5 +33,7 @@ promote_staging_binaries() { promote_staging_binaries torch "${PYTORCH_VERSION}" promote_staging_binaries torchvision "${TORCHVISION_VERSION}" promote_staging_binaries torchaudio "${TORCHAUDIO_VERSION}" -promote_staging_binaries torchtext "${TORCHTEXT_VERSION}" -promote_staging_binaries torchdata "${TORCHDATA_VERSION}" + +promote_staging_binaries executorch "${EXECUTORCH_VERSION}" +#promote_staging_binaries torchtext "${TORCHTEXT_VERSION}" +#promote_staging_binaries torchdata "${TORCHDATA_VERSION}" diff --git a/release/pypi/promote_pypi_to_staging.sh b/release/pypi/promote_pypi_to_staging.sh index 3753d70ab..578cb3999 100644 --- a/release/pypi/promote_pypi_to_staging.sh +++ b/release/pypi/promote_pypi_to_staging.sh @@ -41,6 +41,9 @@ PLATFORM="linux_aarch64" VERSION_SUFFIX="" upload PLATFORM="win_amd64" VERSION_SUFFIX="${CPU_VERSION_SUFFIX}" upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}" PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}" -PLATFORM="linux_x86" VERSION_SUFFIX="${CPU_VERSION_SUFFIX}" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}" -PLATFORM="win_amd64" VERSION_SUFFIX="${CPU_VERSION_SUFFIX}" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}" -PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}" +PLATFORM="linux_x86" VERSION_SUFFIX="${WIN_VERSION_SUFFIX}" upload_pypi_to_staging executorch "${EXECUTORCH_VERSION}" +PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging executorch "${EXECUTORCH_VERSION}" + +#PLATFORM="linux_x86" VERSION_SUFFIX="${CPU_VERSION_SUFFIX}" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}" +#PLATFORM="win_amd64" VERSION_SUFFIX="${CPU_VERSION_SUFFIX}" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}" +#PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}" diff --git a/release/release_versions.sh b/release/release_versions.sh index f092786d6..e73549b90 100644 --- a/release/release_versions.sh +++ b/release/release_versions.sh @@ -1,12 +1,13 @@ #!/usr/bin/env bash # Make sure to update these versions when doing a release first -PYTORCH_VERSION=${PYTORCH_VERSION:-2.3.0} -TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.18.0} -TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.3.0} +PYTORCH_VERSION=${PYTORCH_VERSION:-2.3.1} +TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.18.1} +TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.3.1} TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.18.0} TORCHREC_VERSION=${TORCHREC_VERSION:-0.7.0} TENSORRT_VERSION=${TENSORRT_VERSION:-2.2.0} +EXECUTORCH_VERSION=${EXECUTORCH_VERSION:-0.2.1} # NB: FBGEMMGPU uses the practice of keeping rc version in the filename, i.e. # fbgemm_gpu-0.6.0rc1+cpu-cp311-cp311. On the other hand, its final RC will diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py index 3e95c79ae..fdf8ba6a4 100644 --- a/test/smoke_test/smoke_test.py +++ b/test/smoke_test/smoke_test.py @@ -146,7 +146,7 @@ def test_cuda_runtime_errors_captured() -> None: raise RuntimeError("Expected CUDA RuntimeError but have not received!") -def smoke_test_cuda(package: str, runtime_error_check: str) -> None: +def smoke_test_cuda(package: str, runtime_error_check: str, torch_compile_check: str) -> None: if not torch.cuda.is_available() and is_cuda_system: raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.") @@ -163,7 +163,7 @@ def smoke_test_cuda(package: str, runtime_error_check: str) -> None: print(f"{module['name']} CUDA: {version}") # torch.compile is available on macos-arm64 and Linux for python 3.8-3.11 - if sys.version_info < (3, 12, 0) and ( + if torch_compile_check == "enabled" and sys.version_info < (3, 12, 0) and ( (target_os == "linux" and torch.cuda.is_available()) or target_os == "macos-arm64"): smoke_test_compile() @@ -310,6 +310,13 @@ def main() -> None: choices=["enabled", "disabled"], default="enabled", ) + parser.add_argument( + "--torch-compile-check", + help="Check torch compile", + type=str, + choices=["enabled", "disabled"], + default="enabled", + ) options = parser.parse_args() print(f"torch: {torch.__version__}") @@ -323,7 +330,7 @@ def main() -> None: if options.package == "all": smoke_test_modules() - smoke_test_cuda(options.package, options.runtime_error_check) + smoke_test_cuda(options.package, options.runtime_error_check, options.torch_compile_check) if __name__ == "__main__": diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh index 04c82ad2e..cef59693b 100755 --- a/wheel/build_wheel.sh +++ b/wheel/build_wheel.sh @@ -246,11 +246,6 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then fi else pushd "$pytorch_rootdir" - mkdir -p build - pushd build - # TODO: Remove this flag once https://github.com/pytorch/pytorch/issues/55952 is closed - CFLAGS='-Wno-deprecated-declarations' python ../tools/build_libtorch.py - popd mkdir -p libtorch/{lib,bin,include,share} cp -r "$(pwd)/build/lib" "$(pwd)/libtorch/" @@ -270,6 +265,8 @@ else else cp -r "$(pwd)/any_wheel/torch/lib/libiomp5.dylib" "$(pwd)/libtorch/lib/" fi + else + cp -r "$(pwd)/any_wheel/torch/lib/libomp.dylib" "$(pwd)/libtorch/lib/" fi rm -rf "$(pwd)/any_wheel" diff --git a/windows/internal/cuda_install.bat b/windows/internal/cuda_install.bat index 62790a51f..d94c2c484 100644 --- a/windows/internal/cuda_install.bat +++ b/windows/internal/cuda_install.bat @@ -36,7 +36,7 @@ if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( set "ARGS=cuda_profiler_api_11.8 thrust_11.8 nvcc_11.8 cuobjdump_11.8 nvprune_11.8 nvprof_11.8 cupti_11.8 cublas_11.8 cublas_dev_11.8 cudart_11.8 cufft_11.8 cufft_dev_11.8 curand_11.8 curand_dev_11.8 cusolver_11.8 cusolver_dev_11.8 cusparse_11.8 cusparse_dev_11.8 npp_11.8 npp_dev_11.8 nvrtc_11.8 nvrtc_dev_11.8 nvml_dev_11.8 nvtx_11.8" ) -set CUDNN_FOLDER=cudnn-windows-x86_64-8.7.0.84_cuda11-archive +set CUDNN_FOLDER=cudnn-windows-x86_64-9.1.0.70_cuda11-archive set CUDNN_LIB_FOLDER="lib" set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip" if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" ( @@ -63,7 +63,7 @@ if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( set "ARGS=cuda_profiler_api_12.1 thrust_12.1 nvcc_12.1 cuobjdump_12.1 nvprune_12.1 nvprof_12.1 cupti_12.1 cublas_12.1 cublas_dev_12.1 cudart_12.1 cufft_12.1 cufft_dev_12.1 curand_12.1 curand_dev_12.1 cusolver_12.1 cusolver_dev_12.1 cusparse_12.1 cusparse_dev_12.1 npp_12.1 npp_dev_12.1 nvrtc_12.1 nvrtc_dev_12.1 nvml_dev_12.1 nvjitlink_12.1 nvtx_12.1" ) -set CUDNN_FOLDER=cudnn-windows-x86_64-8.9.2.26_cuda12-archive +set CUDNN_FOLDER=cudnn-windows-x86_64-9.1.0.70_cuda12-archive set CUDNN_LIB_FOLDER="lib" set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip" if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" ( @@ -90,7 +90,7 @@ if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( set "ARGS=cuda_profiler_api_12.4 thrust_12.4 nvcc_12.4 cuobjdump_12.4 nvprune_12.4 nvprof_12.4 cupti_12.4 cublas_12.4 cublas_dev_12.4 cudart_12.4 cufft_12.4 cufft_dev_12.4 curand_12.4 curand_dev_12.4 cusolver_12.4 cusolver_dev_12.4 cusparse_12.4 cusparse_dev_12.4 npp_12.4 npp_dev_12.4 nvrtc_12.4 nvrtc_dev_12.4 nvml_dev_12.4 nvjitlink_12.4 nvtx_12.4" ) -set CUDNN_FOLDER=cudnn-windows-x86_64-8.9.7.29_cuda12-archive +set CUDNN_FOLDER=cudnn-windows-x86_64-9.1.0.70_cuda12-archive set CUDNN_LIB_FOLDER="lib" set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip" if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" (