Merge branch 'devel' into devel

deepmodeling · Oct 31, 2024 · 39d6d17 · 39d6d17
2 parents 2ce4356 + d165fee
commit 39d6d17
Show file tree

Hide file tree

Showing 307 changed files with 3,709 additions and 1,060 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -0,0 +1,3 @@
+FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-24.04
+
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
diff --git a/.devcontainer/READMD.md b/.devcontainer/READMD.md
@@ -0,0 +1,35 @@
+# DeePMD-kit devcontainer environment
+
+This [devcontainer](https://vscode.js.cn/docs/devcontainers/devcontainer-cli) environment setups Python and C++ environment to develop DeePMD-kit.
+One can setup locally or use [GitHub Codespaces](https://docs.github.com/en/codespaces) by clicking the Code button on the DeePMD-kit repository page.
+The whole setup process requires about 10 minutes, so one needs to be patient.
+
+## Python environment
+
+The following packages are installed into the Python environment `.venv`:
+
+- DeePMD-kit (in edit mode)
+- Backends including TensorFlow, PyTorch, JAX
+- LAMMPS
+- MPICH
+- CMake
+- pre-commit (including hooks)
+- Test packages including pytest
+- Doc packages including sphinx
+
+## C++ interface
+
+The C++ interface with TensorFlow and PyTorch support is installed into `dp` directory.
+
+When calling and debuging LAMMPS with DeePMD-kit, use the following scripts instead of the regular `lmp`:
+
+- `.devcontainer/lmp`
+- `.devcontainer/gdb_lmp`
+
+## Rebuild
+
+Usually the Python package does not need to reinstall.
+But when one wants to recompile the C++ code, the following scripts can be executed.
+
+- `.devcontainer/build_cxx.sh`
+- `.devcontainer/build_py.sh`
diff --git a/.devcontainer/build_cxx.sh b/.devcontainer/build_cxx.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -ev
+
+NPROC=$(nproc --all)
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+
+export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch
+TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
+
+mkdir -p ${SCRIPT_PATH}/../buildcxx/
+cd ${SCRIPT_PATH}/../buildcxx/
+cmake -D ENABLE_TENSORFLOW=ON \
+	-D ENABLE_PYTORCH=ON \
+	-D CMAKE_INSTALL_PREFIX=${SCRIPT_PATH}/../dp/ \
+	-D LAMMPS_VERSION=stable_29Aug2024_update1 \
+	-D CMAKE_BUILD_TYPE=Debug \
+	-D BUILD_TESTING:BOOL=TRUE \
+	-D TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+	${SCRIPT_PATH}/../source
+cmake --build . -j${NPROC}
+cmake --install .
diff --git a/.devcontainer/build_py.sh b/.devcontainer/build_py.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -ev
+
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+cd ${SCRIPT_PATH}/..
+
+uv sync --dev --python 3.12 --extra cpu --extra torch --extra jax --extra lmp --extra test --extra docs
+pre-commit install
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,17 @@
+{
+  "name": "DeePMD-kit",
+  "build": {
+    "dockerfile": "Dockerfile"
+  },
+  "features": {
+    "ghcr.io/devcontainers/features/github-cli:1": {}
+  },
+  "postCreateCommand": ".devcontainer/build_py.sh && .devcontainer/download_libtorch.sh && .devcontainer/build_cxx.sh && pre-commit install-hooks",
+  "remoteEnv": {
+    "PATH": "${containerEnv:PATH}:${containerWorkspaceFolder}/.venv/bin",
+    "DP_ENABLE_PYTORCH": "1",
+    "DP_VARIANT": "cpu",
+    "LMP_CXX11_ABI_0": "1",
+    "UV_EXTRA_INDEX_URL": "https://download.pytorch.org/whl/cpu"
+  }
+}
diff --git a/.devcontainer/download_libtorch.sh b/.devcontainer/download_libtorch.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -ev
+
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+cd ${SCRIPT_PATH}/..
+
+wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.5.0%2Bcpu.zip -O ~/libtorch.zip
+unzip ~/libtorch.zip
diff --git a/.devcontainer/gdb_lmp b/.devcontainer/gdb_lmp
@@ -0,0 +1,9 @@
+#!/bin/bash
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+
+export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch
+TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
+
+env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \
+	LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \
+	gdb ${SCRIPT_PATH}/../.venv/lib/python3.12/site-packages/lammps/lmp "$@"
diff --git a/.devcontainer/lmp b/.devcontainer/lmp
@@ -0,0 +1,9 @@
+#!/bin/bash
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+
+export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch
+TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
+
+env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \
+	LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \
+	${SCRIPT_PATH}/../.venv/bin/lmp "$@"
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
@@ -148,6 +148,7 @@ jobs:
       - uses: actions/download-artifact@v4
         with:
           path: source/install/docker/dist
+          pattern: cibw-*-manylinux_x86_64-cu${{ matrix.cuda_version }}*
           merge-multiple: true
       - name: Log in to the Container registry
         uses: docker/login-action@v3
@@ -180,6 +181,7 @@ jobs:
       - uses: actions/download-artifact@v4
         with:
           path: dist/packages
+          pattern: cibw-*
           merge-multiple: true
       - uses: actions/setup-python@v5
         name: Install Python

diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml
@@ -18,7 +18,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - tensorflow_build_version: "2.15"
+          - tensorflow_build_version: "2.18"
             tensorflow_version: ""
             filename: libdeepmd_c.tar.gz
           - tensorflow_build_version: "2.14"

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
@@ -47,7 +47,7 @@ jobs:
          && sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3
       if: false  # skip as we use nvidia image
     - run: python -m pip install -U uv
-    - run: source/install/uv_with_retry.sh pip install --system "tensorflow>=2.15.0rc0" "torch==2.3.1.*"
+    - run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.5.0"
     - run: |
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
         export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
@@ -63,7 +63,7 @@ jobs:
         CUDA_VISIBLE_DEVICES: 0
     - name: Download libtorch
       run: |
-         wget https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.1%2Bcu121.zip -O libtorch.zip
+         wget https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.5.0%2Bcu124.zip -O libtorch.zip
          unzip libtorch.zip
     - run: |
         export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch

diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
@@ -26,7 +26,7 @@ jobs:
     - run: python -m pip install -U uv
     - run: |
         source/install/uv_with_retry.sh pip install --system mpich
-        source/install/uv_with_retry.sh pip install --system "torch==2.3.0+cpu.cxx11.abi" -i https://download.pytorch.org/whl/
+        source/install/uv_with_retry.sh pip install --system torch -i https://download.pytorch.org/whl/cpu
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
         source/install/uv_with_retry.sh pip install --system --only-binary=horovod -e .[cpu,test,jax] horovod[tensorflow-cpu] mpi4py
       env:

diff --git a/.gitignore b/.gitignore
@@ -45,3 +45,8 @@ build_c_tests
 build_c/
 libdeepmd_c/
 .uv/
+libtorch/
+uv.lock
+buildcxx/
+node_modules/
+*.bib.original
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
         exclude: ^source/3rdparty
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.7.0
+    rev: v0.7.1
     hooks:
       - id: ruff
         args: ["--fix"]
@@ -38,6 +38,14 @@ repos:
       - id: ruff-format
         exclude: ^source/3rdparty
         types_or: [python, pyi, jupyter]
+  - repo: https://github.com/pycqa/flake8
+    # flake8 cannot autofix
+    rev: "7.1.1"
+    hooks:
+      - id: flake8
+        additional_dependencies:
+          - torchfix==0.6.0
+          - flake8-pyproject==1.2.3
   # numpydoc
   - repo: https://github.com/Carreau/velin
     rev: 0.0.12
@@ -47,7 +55,7 @@ repos:
         exclude: ^source/3rdparty
   # Python inside docs
   - repo: https://github.com/asottile/blacken-docs
-    rev: 1.19.0
+    rev: 1.19.1
     hooks:
       - id: blacken-docs
   # C++

diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ For more information, check the [documentation](https://deepmd.readthedocs.io/).
 
 ### Highlighted features
 
-- **interfaced with multiple backends**, including TensorFlow and PyTorch, the most popular deep learning frameworks, making the training process highly automatic and efficient.
+- **interfaced with multiple backends**, including TensorFlow, PyTorch, and JAX, the most popular deep learning frameworks, making the training process highly automatic and efficient.
 - **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, including LAMMPS, i-PI, AMBER, CP2K, GROMACS, OpenMM, and ABUCUS.
 - **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems, including organic molecules, metals, semiconductors, insulators, etc.
 - **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing.
@@ -72,7 +72,7 @@ See [our latest paper](https://doi.org/10.1063/5.0155600) for details of all fea
 
 #### v3
 
-- Multiple backends supported. Add a PyTorch backend.
+- Multiple backends supported. Add PyTorch and JAX backends.
 - The DPA-2 model.
 
 ## Install and use DeePMD-kit

diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import importlib
 import os
+import platform
 import site
 from functools import (
     lru_cache,
@@ -22,6 +23,9 @@
     Union,
 )
 
+from packaging.specifiers import (
+    SpecifierSet,
+)
 from packaging.version import (
     Version,
 )
@@ -104,6 +108,20 @@ def get_pt_requirement(pt_version: str = "") -> dict:
     """
     if pt_version is None:
         return {"torch": []}
+    if (
+        os.environ.get("CIBUILDWHEEL", "0") == "1"
+        and platform.system() == "Linux"
+        and platform.machine() == "x86_64"
+    ):
+        cuda_version = os.environ.get("CUDA_VERSION", "12.2")
+        if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"):
+            # CUDA 12.2, cudnn 9
+            pt_version = "2.5.0"
+        elif cuda_version in SpecifierSet(">=11,<12"):
+            # CUDA 11.8, cudnn 8
+            pt_version = "2.3.1"
+        else:
+            raise RuntimeError("Unsupported CUDA version") from None
     if pt_version == "":
         pt_version = os.environ.get("PYTORCH_VERSION", "")
 

diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
@@ -85,14 +85,14 @@ def find_tensorflow() -> tuple[Optional[str], list[str]]:
         if os.environ.get("CIBUILDWHEEL", "0") == "1":
             cuda_version = os.environ.get("CUDA_VERSION", "12.2")
             if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"):
-                # CUDA 12.2
+                # CUDA 12.2, cudnn 9
                 requires.extend(
                     [
-                        "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'",
+                        "tensorflow-cpu>=2.18.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'",
                     ]
                 )
             elif cuda_version in SpecifierSet(">=11,<12"):
-                # CUDA 11.8
+                # CUDA 11.8, cudnn 8
                 requires.extend(
                     [
                         "tensorflow-cpu>=2.5.0rc0,<2.15; platform_machine=='x86_64' and platform_system == 'Linux'",

diff --git a/backend/read_env.py b/backend/read_env.py
@@ -43,7 +43,7 @@ def get_argument_from_env() -> tuple[str, list, list, dict, str, str]:
     """
     cmake_args = []
     extra_scripts = {}
-    # get variant option from the environment varibles, available: cpu, cuda, rocm
+    # get variant option from the environment variables, available: cpu, cuda, rocm
     dp_variant = os.environ.get("DP_VARIANT", "cpu").lower()
     if dp_variant == "cpu" or dp_variant == "":
         cmake_minimum_required_version = "3.16"

diff --git a/deepmd/__init__.py b/deepmd/__init__.py
@@ -17,7 +17,7 @@
 
 
 def DeepPotential(*args, **kwargs):
-    """Factory function that forwards to DeepEval (for compatbility
+    """Factory function that forwards to DeepEval (for compatibility
     and performance).
 
     Parameters

diff --git a/deepmd/backend/jax.py b/deepmd/backend/jax.py
@@ -32,14 +32,13 @@ class JAXBackend(Backend):
     name = "JAX"
     """The formal name of the backend."""
     features: ClassVar[Backend.Feature] = (
-        Backend.Feature(0)
-        # Backend.Feature.ENTRY_POINT
-        # | Backend.Feature.DEEP_EVAL
-        # | Backend.Feature.NEIGHBOR_STAT
-        # | Backend.Feature.IO
+        Backend.Feature.IO
+        | Backend.Feature.ENTRY_POINT
+        | Backend.Feature.DEEP_EVAL
+        | Backend.Feature.NEIGHBOR_STAT
     )
     """The features of the backend."""
-    suffixes: ClassVar[list[str]] = []
+    suffixes: ClassVar[list[str]] = [".hlo", ".jax"]
     """The suffixes of the backend."""
 
     def is_available(self) -> bool:
@@ -72,7 +71,11 @@ def deep_eval(self) -> type["DeepEvalBackend"]:
         type[DeepEvalBackend]
             The Deep Eval backend of the backend.
         """
-        raise NotImplementedError
+        from deepmd.jax.infer.deep_eval import (
+            DeepEval,
+        )
+
+        return DeepEval
 
     @property
     def neighbor_stat(self) -> type["NeighborStat"]:
@@ -83,7 +86,11 @@ def neighbor_stat(self) -> type["NeighborStat"]:
         type[NeighborStat]
             The neighbor statistics of the backend.
         """
-        raise NotImplementedError
+        from deepmd.jax.utils.neighbor_stat import (
+            NeighborStat,
+        )
+
+        return NeighborStat
 
     @property
     def serialize_hook(self) -> Callable[[str], dict]:
@@ -94,7 +101,11 @@ def serialize_hook(self) -> Callable[[str], dict]:
         Callable[[str], dict]
             The serialize hook of the backend.
         """
-        raise NotImplementedError
+        from deepmd.jax.utils.serialization import (
+            serialize_from_file,
+        )
+
+        return serialize_from_file
 
     @property
     def deserialize_hook(self) -> Callable[[str, dict], None]:
@@ -105,4 +116,8 @@ def deserialize_hook(self) -> Callable[[str, dict], None]:
         Callable[[str, dict], None]
             The deserialize hook of the backend.
         """
-        raise NotImplementedError
+        from deepmd.jax.utils.serialization import (
+            deserialize_to_file,
+        )
+
+        return deserialize_to_file
diff --git a/deepmd/backend/suffix.py b/deepmd/backend/suffix.py
@@ -23,7 +23,7 @@ def format_model_suffix(
     """Check and format the suffixes of a filename.
 
     When preferred_backend is not given, this method checks the suffix of the filename
-    is within the suffixes of the any backends (with the given feature) and doesn't do formating.
+    is within the suffixes of the any backends (with the given feature) and doesn't do formatting.
     When preferred_backend is given, strict_prefer must be given.
     If strict_prefer is True and the suffix is not within the suffixes of the preferred backend,
     or strict_prefer is False and the suffix is not within the suffixes of the any backend with the given feature,

diff --git a/deepmd/calculator.py b/deepmd/calculator.py
@@ -32,7 +32,7 @@
 class DP(Calculator):
     """Implementation of ASE deepmd calculator.
 
-    Implemented propertie are `energy`, `forces` and `stress`
+    Implemented properties are `energy`, `forces` and `stress`
 
     Parameters
     ----------
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-24.04

		COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/