diff --git a/.github/workflows/main.disabled b/.github/workflows/main.disabled deleted file mode 100644 index a2cfbb6..0000000 --- a/.github/workflows/main.disabled +++ /dev/null @@ -1,67 +0,0 @@ -name: Build PyTorch Wheels - -on: - push: - branches: - - main - pull_request: - -jobs: - build-wheels: - runs-on: ubuntu-latest - strategy: - matrix: - cibw-arch: ["x86_64"] - cibw-python: ['cp312-*'] - python-version: [3.11] - pytorch-version: [2.4.0, 2.5.0] - cuda-version: [12.4.0] - cuversion: [124] - env: - CIBW_SKIP: cp36-* cp37-* cp38-* cp39-* cp310-* - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - # Build the custom Manylinux Docker image - - name: Build Manylinux Docker Image - run: | - docker build --no-cache -t manylinux2014_${{ matrix.cibw-arch }} \ - scripts/manylinux2014_${{ matrix.cibw-arch }} - - # Set up Python environment - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - # Install cibuildwheel and other required tools - - name: Install Dependencies - run: | - python -m pip install --upgrade pip - python -m pip install cibuildwheel build - - - name: Build wheels - run: | - echo "Building wheels with CUDA ${{ matrix.cuversion}} and PyTorch ${{ matrix.pytorch-version }}" - python -m cibuildwheel --platform linux . - mkdir -p dist - cp wheelhouse/*.whl dist/ - env: - CIBW_BUILD_VERBOSITY: 3 - CIBW_BUILD: ${{ matrix.cibw-python }} - CIBW_SKIP: "*-musllinux* *-win32 *-manylinux_i686" - CIBW_ARCHS: ${{ matrix.cibw-arch }} - CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014_${{ matrix.cibw-arch }} - CIBW_ENVIRONMENT: > - CUDA_HOME=/usr/local/cuda - PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu${{ matrix.cuversion }} - CIBW_REPAIR_WHEEL_COMMAND_LINUX: | - auditwheel repair --exclude libtorch.so --exclude libtorch_cpu.so --exclude libtorch_cuda.so --exclude libc10.so --exclude libc10_cuda.so -w {dest_dir} {wheel} - - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - name: wheels - path: dist/ \ No newline at end of file diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..d64dce9 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,78 @@ +name: Build PyTorch Wheels + +on: + push: + branches: + - main + pull_request: + +jobs: + build-wheels: + runs-on: ubuntu-latest + strategy: + matrix: + cibw-arch: ["x86_64"] + python-version: ["3.10", "3.11", "3.12"] + pytorch-version: ["2.4.0", "2.5.0"] + cuda-version: ["12.1", "12.4"] + env: + CIBW_SKIP: cp36-* cp37-* cp38-* cp39-* + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Github Actions Envs Setup + run: | + CUVERSION="${{ matrix.cuda-version }}" + PYTHONVERSION="${{ matrix.python-version }}" + + CU_VERSION_NO_DOT=${CUVERSION//./} + echo CU_VERSION_NO_DOT=${CU_VERSION_NO_DOT} >> $GITHUB_ENV + + CU_VERSION_DASH=${CUVERSION//./-} + echo CU_VERSION_DASH=${CU_VERSION_DASH} >> $GITHUB_ENV + + PYTHON_VER_NO_DOT=${PYTHONVERSION//./} + echo PYTHON_VER_NO_DOT=${PYTHON_VER_NO_DOT} >> $GITHUB_ENV + + # Build the custom Manylinux Docker image + - name: Build Manylinux Docker Image + run: | + docker build --no-cache \ + -t manylinux2014_"${{ matrix.cibw-arch }}" \ + --build-arg PYTHON_VER="${{ matrix.python-version }}" \ + --build-arg PYTHON_VER_NO_DOT="${{ env.PYTHON_VER_NO_DOT }}" \ + --build-arg CUDA_VER="${{ matrix.cuda-version }}" \ + --build-arg CUDA_VER_NO_DOT="${{ env.CU_VERSION_NO_DOT }}" \ + --build-arg CUDA_VER_DASH="${{ env.CU_VERSION_DASH }}" \ + --build-arg PYTORCH_VERSION="${{ matrix.pytorch-version }}" \ + scripts/manylinux2014_"${{ matrix.cibw-arch }}" + + # Set up Python environment + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "${{ matrix.python-version }}" + + - name: Build wheels + uses: pypa/cibuildwheel@v2.22.0 + env: + CUDA_HOME: /usr/local/cuda + PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cu${{ env.CUVERSION }}" + CIBW_BUILD_VERBOSITY: 3 + CIBW_BUILD: "cp${{ env.PYTHON_VER_NO_DOT }}-*" + CIBW_BUILD_FRONTEND: "pip; args: --no-build-isolation" + CIBW_SKIP: "*-musllinux* *-win32 *-manylinux_i686" + CIBW_ARCHS: "${{ matrix.cibw-arch }}" + CIBW_MANYLINUX_X86_64_IMAGE: "manylinux2014_${{ matrix.cibw-arch }}" + CIBW_ENVIRONMENT: > + CUDA_HOME=/usr/local/cuda + PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu${{ env.CUVERSION }}" + CIBW_REPAIR_WHEEL_COMMAND_LINUX: | + auditwheel repair --exclude libcuda.so --exclude libcuda.so.1 --exclude libc10.so --exclude libtorch.so --exclude libtorch_cpu.so --exclude libtorch_cuda.so --exclude libc10_cuda.so --exclude libcudart.so --exclude libnvToolsExt.so --exclude libnvrtc.so --exclude libnvrtc.so.12 -w {dest_dir} {wheel} + + - uses: actions/upload-artifact@v4 + with: + name: "cuda_mace-py-${{ env.PYTHON_VER_NO_DOT }}-torch-${{matrix.pytorch-version}}+cu${{ env.CU_VERSION_NO_DOT }}-${{ matrix.cibw-arch }}" + path: ./wheelhouse/*.whl \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1e82c87..c1b4ecf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ build/* local/* tests/* +dist/* *.pyc *.pt .vscode cuda_mace.egg-info/ -*.model \ No newline at end of file +*.model diff --git a/cuda_mace/CMakeLists.txt b/cuda_mace/CMakeLists.txt index 77c373b..fc533bd 100644 --- a/cuda_mace/CMakeLists.txt +++ b/cuda_mace/CMakeLists.txt @@ -9,6 +9,8 @@ set(BIN_INSTALL_DIR "bin" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX wher set(INCLUDE_INSTALL_DIR "include" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install headers") find_package(Python COMPONENTS Interpreter REQUIRED) +message(STATUS "Python Version: ${Python_VERSION}") +message (STATUS "Python Path: ${Python_EXECUTABLE}") include(CheckLanguage) check_language(CUDA) @@ -33,7 +35,9 @@ endif() string(STRIP ${TORCH_CMAKE_PATH_OUTPUT} TORCH_CMAKE_PATH_OUTPUT) set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH};${TORCH_CMAKE_PATH_OUTPUT}") -find_package(Torch 1.13 REQUIRED) +message(STATUS "TORCH_CMAKE_PATH_OUTPUT: ${TORCH_CMAKE_PATH_OUTPUT}") + +find_package(Torch 2.0 REQUIRED) add_library(cuda_mace SHARED "jit_wrappers/src/cubic_spline_wrapper.cpp" diff --git a/cuda_mace/jit_wrappers/src/invariant_message_passing_wrapper.cpp b/cuda_mace/jit_wrappers/src/invariant_message_passing_wrapper.cpp index e8b0d95..e0f40df 100644 --- a/cuda_mace/jit_wrappers/src/invariant_message_passing_wrapper.cpp +++ b/cuda_mace/jit_wrappers/src/invariant_message_passing_wrapper.cpp @@ -100,7 +100,7 @@ jit_forward_message_passing(torch::Tensor X, torch::Tensor Y, torch::Tensor radi dim3 bdim(NWARPS_PER_BLOCK * WARP_SIZE, 1, 1); AT_DISPATCH_FLOATING_TYPES( - X.type(), "forward_gpu", + X.scalar_type(), "forward_gpu", ([&] { unsigned int space = 0; void *sptr; @@ -187,7 +187,7 @@ jit_backward_message_passing(torch::Tensor X, torch::Tensor Y, torch::Tensor rad Y, torch::TensorOptions().dtype(Y.dtype()).device(Y.device())); AT_DISPATCH_FLOATING_TYPES( - X.type(), "backward_gpu", ([&] { + X.scalar_type(), "backward_gpu", ([&] { dim3 bdim(NWARPS_PER_BLOCK * WARP_SIZE, 1, 1); dim3 gdim(nnodes, 1); diff --git a/cuda_mace/jit_wrappers/src/symmetric_contraction_wrapper.cpp b/cuda_mace/jit_wrappers/src/symmetric_contraction_wrapper.cpp index b258b6f..c92ee20 100644 --- a/cuda_mace/jit_wrappers/src/symmetric_contraction_wrapper.cpp +++ b/cuda_mace/jit_wrappers/src/symmetric_contraction_wrapper.cpp @@ -66,7 +66,7 @@ std::vector jit_symmetric_contraction_forward( dim3 bdim(WARP_SIZE, NWARPS_PER_BLOCK, 1); AT_DISPATCH_FLOATING_TYPES( - X.type(), "symmetric_contraction_forwards", ([&] { + X.scalar_type(), "symmetric_contraction_forwards", ([&] { unsigned int shared_size = 0; void *sptr = nullptr; @@ -173,7 +173,7 @@ torch::Tensor jit_symmetric_contraction_backward(torch::Tensor gradX, dim3 bdim(WARP_SIZE, 4, 1); AT_DISPATCH_FLOATING_TYPES( - gradX.type(), "symm_contraction_backward", ([&] { + gradX.scalar_type(), "symm_contraction_backward", ([&] { unsigned int space = WARP_SIZE * 16 * sizeof(scalar_t); // buffer_grad storage diff --git a/scripts/manylinux2014_x86_64/Dockerfile b/scripts/manylinux2014_x86_64/Dockerfile index 44891f4..182252e 100644 --- a/scripts/manylinux2014_x86_64/Dockerfile +++ b/scripts/manylinux2014_x86_64/Dockerfile @@ -1,26 +1,34 @@ -# Use manylinux docker image as a base FROM quay.io/pypa/manylinux2014_x86_64 -# ------------ -# Install cuda -# ------------ +# Set environment variables for Python and CUDA versions +ARG PYTHON_VER="3.11" +ARG PYTHON_VER_NO_DOT="311" +ARG CUDA_VER="12.4" +ARG CUDA_VER_NO_DOT="124" +ARG CUDA_VER_DASH="12-4" +ARG PYTORCH_VERSION="2.4.1" -ARG VER="12-4" -ARG ARCH="x86_64" +RUN echo "PYTHON_VERSION: ${PYTHON_VER} NO-DOT: ${PYTHON_VER_NO_DOT}" +# Install system dependencies and CUDA +RUN yum install -y yum-utils gcc gcc-c++ make zlib-devel bzip2-devel libffi-devel \ + && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \ + && yum install -y \ + cuda-toolkit-${CUDA_VER_DASH} \ + && yum clean all \ + && rm -rf /var/cache/yum/* \ + && echo "/usr/local/cuda/lib64" >> /etc/ld.so.conf.d/999_nvidia_cuda.conf -RUN yum install -y yum-utils -RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo -RUN yum -y install cuda-compiler-${VER}.${ARCH} \ - cuda-libraries-${VER}.${ARCH} \ - cuda-libraries-devel-${VER}.${ARCH} -RUN yum clean all -RUN rm -rf /var/cache/yum/* -RUN echo "/usr/local/cuda/lib64" >> /etc/ld.so.conf.d/999_nvidia_cuda.conf + # Remove all other Python versions +#ENV PATH="/opt/python/cp311-cp311/bin:${PATH}" +#RUN ln -sf /opt/python/cp311-cp311/bin/python3.11 /opt/python/cp311-cp311/bin/python -# ------------------------- -# Set environment variables -# ------------------------- +# Set Python version environment variables dynamically +ENV PATH="/opt/python/cp${PYTHON_VER_NO_DOT}-cp${PYTHON_VER_NO_DOT}/bin:${PATH}" +RUN ln -sf /opt/python/cp${PYTHON_VER_NO_DOT}-cp${PYTHON_VER_NO_DOT}/bin/python${PYTHON_VER} /opt/python/cp${PYTHON_VER_NO_DOT}-cp${PYTHON_VER_NO_DOT}/bin/python +RUN python -m ensurepip --upgrade + +# Set environment variables for CUDA ENV PATH="/usr/local/cuda/bin:${PATH}" ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" ENV CUDA_HOME=/usr/local/cuda @@ -28,9 +36,28 @@ ENV CUDA_ROOT=/usr/local/cuda ENV CUDA_PATH=/usr/local/cuda ENV CUDADIR=/usr/local/cuda -RUN echo "CUDA_HOME: ${CUDA_HOME}" -# -------- -# Commands -# -------- +RUN yum install -y git-all + +# Verify the CUDA installation +RUN echo "CUDA_HOME: ${CUDA_HOME}" && \ + nvcc --version + +#RUN pip install torch==2.4.1+cu124 --extra-index-url https://download.pytorch.org/whl/cu124 +#RUN pip install numpy cmake +RUN pip install torch==${PYTORCH_VERSION}+cu${CUDA_VER_NO_DOT} --extra-index-url https://download.pytorch.org/whl/cu${CUDA_VER_NO_DOT} +RUN pip install numpy cmake + +RUN mkdir /workspace + +# Add the remove_python.sh script to the container +COPY remove_unused_python.sh /scripts/remove_unused_python.sh +# Make the script executable +RUN chmod +x /scripts/remove_unused_python.sh +# Run the script to remove all Python versions except Python 3.11 +RUN /scripts/remove_unused_python.sh python${PYTHON_VER} + +# Clean up to reduce image size +#RUN yum clean all && rm -rf /var/cache/yum/* -CMD ["/bin/bash"] \ No newline at end of file +# Default command (bash) +CMD ["/bin/bash"] diff --git a/scripts/manylinux2014_x86_64/remove_unused_python.sh b/scripts/manylinux2014_x86_64/remove_unused_python.sh new file mode 100644 index 0000000..30e4c6f --- /dev/null +++ b/scripts/manylinux2014_x86_64/remove_unused_python.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +#FindPython is weird and doesn't respect $PATH or even CMAKE $Python_Exec variables. +for python in /usr/local/bin/python*; do +if [[ "$python" != *"$1"* ]]; then + echo "Removing $python"; + rm -f "$python"; +fi; +done \ No newline at end of file diff --git a/setup.py b/setup.py index 40063dc..cd724a4 100755 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ __author__ = "Nicholas J. Browning" __credits__ = "Nicholas J. Browning (2023), https://github.com/nickjbrowning" -__license__ = "MIT" +__license__ = "Academic Software License" __version__ = "0.1" __maintainer__ = "Nicholas J. Browning" __email__ = "nickjbrowning@gmail.com" @@ -67,7 +67,7 @@ def run(self): os.makedirs(build_dir, exist_ok=True) cmake_options = [ - f"-DCMAKE_INSTALL_PREFIX={install_dir}" + f"-DCMAKE_INSTALL_PREFIX={install_dir}", #f"-DPYTHON_EXECUTABLE={sys.executable}", ] @@ -82,6 +82,13 @@ def run(self): cmake_options.append(f"-DCMAKE_C_FLAGS={ARCHFLAGS}") cmake_options.append(f"-DCMAKE_CXX_FLAGS={ARCHFLAGS}") + subprocess.run( + ["cmake", "--version"], + cwd=build_dir, + check=True, + ) + + print (["cmake", source_dir, *cmake_options]) subprocess.run( ["cmake", source_dir, *cmake_options], cwd=build_dir,