From d224fdd5a44c8227d09feeee88083c3b122f02a1 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 20 Sep 2024 23:34:45 -0400
Subject: [PATCH 01/39] fix: fix `DPH5Path.glob` for new keys (#4152)

Fix #4151.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Enhanced path filtering logic to include a broader range of keys when
generating subpaths.

- **Bug Fixes**
	- Improved the accuracy of path results returned by the `glob` method.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/utils/path.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py
index 377953cc35..e794a36cab 100644
--- a/deepmd/utils/path.py
+++ b/deepmd/utils/path.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
 import os
 from abc import (
     ABC,
@@ -373,7 +374,11 @@ def glob(self, pattern: str) -> List["DPPath"]:
             list of paths
         """
         # got paths starts with current path first, which is faster
-        subpaths = [ii for ii in self._keys if ii.startswith(self._name)]
+        subpaths = [
+            ii
+            for ii in itertools.chain(self._keys, self._new_keys)
+            if ii.startswith(self._name)
+        ]
         return [
             type(self)(f"{self.root_path}#{pp}", mode=self.mode)
             for pp in globfilter(subpaths, self._connect_path(pattern))

From 532e30952311a64bc830f3db65836c3481b72886 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Sat, 21 Sep 2024 11:59:31 +0800
Subject: [PATCH 02/39] fix(pt): make `state_dict` safe for `weights_only`
 (#4148)

See #4147 and #4143.
We can first make `state_dict` safe for `weights_only`, then make a
breaking change when loading `state_dict` in the future.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced model saving functionality by ensuring learning rates are
consistently stored as floats, improving type consistency.

- **Bug Fixes**
- Updated model loading behavior in tests to focus solely on model
weights, which may resolve issues related to state dictionary loading.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 deepmd/pt/train/training.py         |  7 +++++--
 source/tests/pt/test_change_bias.py | 10 +++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index a7b9e25b4e..c3d603dadd 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -1030,10 +1030,13 @@ def save_model(self, save_path, lr=0.0, step=0):
             if dist.is_available() and dist.is_initialized()
             else self.wrapper
         )
-        module.train_infos["lr"] = lr
+        module.train_infos["lr"] = float(lr)
         module.train_infos["step"] = step
+        optim_state_dict = deepcopy(self.optimizer.state_dict())
+        for item in optim_state_dict["param_groups"]:
+            item["lr"] = float(item["lr"])
         torch.save(
-            {"model": module.state_dict(), "optimizer": self.optimizer.state_dict()},
+            {"model": module.state_dict(), "optimizer": optim_state_dict},
             save_path,
         )
         checkpoint_dir = save_path.parent
diff --git a/source/tests/pt/test_change_bias.py b/source/tests/pt/test_change_bias.py
index f76be40b3f..febc439f50 100644
--- a/source/tests/pt/test_change_bias.py
+++ b/source/tests/pt/test_change_bias.py
@@ -92,7 +92,9 @@ def test_change_bias_with_data(self):
         run_dp(
             f"dp --pt change-bias {self.model_path!s} -s {self.data_file[0]} -o {self.model_path_data_bias!s}"
         )
-        state_dict = torch.load(str(self.model_path_data_bias), map_location=DEVICE)
+        state_dict = torch.load(
+            str(self.model_path_data_bias), map_location=DEVICE, weights_only=True
+        )
         model_params = state_dict["model"]["_extra_state"]["model_params"]
         model_for_wrapper = get_model_for_wrapper(model_params)
         wrapper = ModelWrapper(model_for_wrapper)
@@ -114,7 +116,7 @@ def test_change_bias_with_data_sys_file(self):
             f"dp --pt change-bias {self.model_path!s} -f {tmp_file.name} -o {self.model_path_data_file_bias!s}"
         )
         state_dict = torch.load(
-            str(self.model_path_data_file_bias), map_location=DEVICE
+            str(self.model_path_data_file_bias), map_location=DEVICE, weights_only=True
         )
         model_params = state_dict["model"]["_extra_state"]["model_params"]
         model_for_wrapper = get_model_for_wrapper(model_params)
@@ -134,7 +136,9 @@ def test_change_bias_with_user_defined(self):
         run_dp(
             f"dp --pt change-bias {self.model_path!s} -b {' '.join([str(_) for _ in user_bias])} -o {self.model_path_user_bias!s}"
         )
-        state_dict = torch.load(str(self.model_path_user_bias), map_location=DEVICE)
+        state_dict = torch.load(
+            str(self.model_path_user_bias), map_location=DEVICE, weights_only=True
+        )
         model_params = state_dict["model"]["_extra_state"]["model_params"]
         model_for_wrapper = get_model_for_wrapper(model_params)
         wrapper = ModelWrapper(model_for_wrapper)

From 6010c7305c551f78c7e9b6ab55984b699578a920 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 21 Sep 2024 10:44:01 -0400
Subject: [PATCH 03/39] chore(pt): move `deepmd.pt.infer.deep_eval.eval_model`
 to tests (#4153)

Per discussion in
https://github.com/deepmodeling/deepmd-kit/pull/4142#issuecomment-2359848991.
It should not be a public API as it lacks maintainance.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a new `eval_model` function in the testing module to
enhance model evaluation capabilities with various input configurations.

- **Bug Fixes**
- Removed the old `eval_model` function from the main module to
streamline functionality and improve code organization.

- **Refactor**
- Consolidated the import of `eval_model` to a common module across
multiple test files for better organization and reduced dependencies.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 deepmd/pt/infer/deep_eval.py                  | 227 -----------------
 source/tests/pt/common.py                     | 239 ++++++++++++++++++
 source/tests/pt/model/test_autodiff.py        |   4 +-
 source/tests/pt/model/test_forward_lower.py   |   6 +-
 source/tests/pt/model/test_null_input.py      |   6 +-
 source/tests/pt/model/test_permutation.py     |   6 +-
 .../pt/model/test_permutation_denoise.py      |   6 +-
 source/tests/pt/model/test_rot.py             |   6 +-
 source/tests/pt/model/test_rot_denoise.py     |   6 +-
 source/tests/pt/model/test_smooth.py          |   6 +-
 source/tests/pt/model/test_smooth_denoise.py  |   6 +-
 source/tests/pt/model/test_trans.py           |   6 +-
 source/tests/pt/model/test_trans_denoise.py   |   6 +-
 source/tests/pt/model/test_unused_params.py   |   6 +-
 14 files changed, 275 insertions(+), 261 deletions(-)

diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index 353109d650..d5eae71731 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -602,230 +602,3 @@ def eval_typeebd(self) -> np.ndarray:
     def get_model_def_script(self) -> str:
         """Get model defination script."""
         return self.model_def_script
-
-
-# For tests only
-def eval_model(
-    model,
-    coords: Union[np.ndarray, torch.Tensor],
-    cells: Optional[Union[np.ndarray, torch.Tensor]],
-    atom_types: Union[np.ndarray, torch.Tensor, List[int]],
-    spins: Optional[Union[np.ndarray, torch.Tensor]] = None,
-    atomic: bool = False,
-    infer_batch_size: int = 2,
-    denoise: bool = False,
-):
-    model = model.to(DEVICE)
-    energy_out = []
-    atomic_energy_out = []
-    force_out = []
-    force_mag_out = []
-    virial_out = []
-    atomic_virial_out = []
-    updated_coord_out = []
-    logits_out = []
-    err_msg = (
-        f"All inputs should be the same format, "
-        f"but found {type(coords)}, {type(cells)}, {type(atom_types)} instead! "
-    )
-    return_tensor = True
-    if isinstance(coords, torch.Tensor):
-        if cells is not None:
-            assert isinstance(cells, torch.Tensor), err_msg
-        if spins is not None:
-            assert isinstance(spins, torch.Tensor), err_msg
-        assert isinstance(atom_types, torch.Tensor) or isinstance(atom_types, list)
-        atom_types = torch.tensor(atom_types, dtype=torch.long, device=DEVICE)
-    elif isinstance(coords, np.ndarray):
-        if cells is not None:
-            assert isinstance(cells, np.ndarray), err_msg
-        if spins is not None:
-            assert isinstance(spins, np.ndarray), err_msg
-        assert isinstance(atom_types, np.ndarray) or isinstance(atom_types, list)
-        atom_types = np.array(atom_types, dtype=np.int32)
-        return_tensor = False
-
-    nframes = coords.shape[0]
-    if len(atom_types.shape) == 1:
-        natoms = len(atom_types)
-        if isinstance(atom_types, torch.Tensor):
-            atom_types = torch.tile(atom_types.unsqueeze(0), [nframes, 1]).reshape(
-                nframes, -1
-            )
-        else:
-            atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
-    else:
-        natoms = len(atom_types[0])
-
-    coord_input = torch.tensor(
-        coords.reshape([-1, natoms, 3]), dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
-    )
-    spin_input = None
-    if spins is not None:
-        spin_input = torch.tensor(
-            spins.reshape([-1, natoms, 3]),
-            dtype=GLOBAL_PT_FLOAT_PRECISION,
-            device=DEVICE,
-        )
-    has_spin = getattr(model, "has_spin", False)
-    if callable(has_spin):
-        has_spin = has_spin()
-    type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE)
-    box_input = None
-    if cells is None:
-        pbc = False
-    else:
-        pbc = True
-        box_input = torch.tensor(
-            cells.reshape([-1, 3, 3]), dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
-        )
-    num_iter = int((nframes + infer_batch_size - 1) / infer_batch_size)
-
-    for ii in range(num_iter):
-        batch_coord = coord_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
-        batch_atype = type_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
-        batch_box = None
-        batch_spin = None
-        if spin_input is not None:
-            batch_spin = spin_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
-        if pbc:
-            batch_box = box_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
-        input_dict = {
-            "coord": batch_coord,
-            "atype": batch_atype,
-            "box": batch_box,
-            "do_atomic_virial": atomic,
-        }
-        if has_spin:
-            input_dict["spin"] = batch_spin
-        batch_output = model(**input_dict)
-        if isinstance(batch_output, tuple):
-            batch_output = batch_output[0]
-        if not return_tensor:
-            if "energy" in batch_output:
-                energy_out.append(batch_output["energy"].detach().cpu().numpy())
-            if "atom_energy" in batch_output:
-                atomic_energy_out.append(
-                    batch_output["atom_energy"].detach().cpu().numpy()
-                )
-            if "force" in batch_output:
-                force_out.append(batch_output["force"].detach().cpu().numpy())
-            if "force_mag" in batch_output:
-                force_mag_out.append(batch_output["force_mag"].detach().cpu().numpy())
-            if "virial" in batch_output:
-                virial_out.append(batch_output["virial"].detach().cpu().numpy())
-            if "atom_virial" in batch_output:
-                atomic_virial_out.append(
-                    batch_output["atom_virial"].detach().cpu().numpy()
-                )
-            if "updated_coord" in batch_output:
-                updated_coord_out.append(
-                    batch_output["updated_coord"].detach().cpu().numpy()
-                )
-            if "logits" in batch_output:
-                logits_out.append(batch_output["logits"].detach().cpu().numpy())
-        else:
-            if "energy" in batch_output:
-                energy_out.append(batch_output["energy"])
-            if "atom_energy" in batch_output:
-                atomic_energy_out.append(batch_output["atom_energy"])
-            if "force" in batch_output:
-                force_out.append(batch_output["force"])
-            if "force_mag" in batch_output:
-                force_mag_out.append(batch_output["force_mag"])
-            if "virial" in batch_output:
-                virial_out.append(batch_output["virial"])
-            if "atom_virial" in batch_output:
-                atomic_virial_out.append(batch_output["atom_virial"])
-            if "updated_coord" in batch_output:
-                updated_coord_out.append(batch_output["updated_coord"])
-            if "logits" in batch_output:
-                logits_out.append(batch_output["logits"])
-    if not return_tensor:
-        energy_out = (
-            np.concatenate(energy_out) if energy_out else np.zeros([nframes, 1])  # pylint: disable=no-explicit-dtype
-        )
-        atomic_energy_out = (
-            np.concatenate(atomic_energy_out)
-            if atomic_energy_out
-            else np.zeros([nframes, natoms, 1])  # pylint: disable=no-explicit-dtype
-        )
-        force_out = (
-            np.concatenate(force_out) if force_out else np.zeros([nframes, natoms, 3])  # pylint: disable=no-explicit-dtype
-        )
-        force_mag_out = (
-            np.concatenate(force_mag_out)
-            if force_mag_out
-            else np.zeros([nframes, natoms, 3])  # pylint: disable=no-explicit-dtype
-        )
-        virial_out = (
-            np.concatenate(virial_out) if virial_out else np.zeros([nframes, 3, 3])  # pylint: disable=no-explicit-dtype
-        )
-        atomic_virial_out = (
-            np.concatenate(atomic_virial_out)
-            if atomic_virial_out
-            else np.zeros([nframes, natoms, 3, 3])  # pylint: disable=no-explicit-dtype
-        )
-        updated_coord_out = (
-            np.concatenate(updated_coord_out) if updated_coord_out else None
-        )
-        logits_out = np.concatenate(logits_out) if logits_out else None
-    else:
-        energy_out = (
-            torch.cat(energy_out)
-            if energy_out
-            else torch.zeros(
-                [nframes, 1], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
-            )
-        )
-        atomic_energy_out = (
-            torch.cat(atomic_energy_out)
-            if atomic_energy_out
-            else torch.zeros(
-                [nframes, natoms, 1], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
-            )
-        )
-        force_out = (
-            torch.cat(force_out)
-            if force_out
-            else torch.zeros(
-                [nframes, natoms, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
-            )
-        )
-        force_mag_out = (
-            torch.cat(force_mag_out)
-            if force_mag_out
-            else torch.zeros(
-                [nframes, natoms, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
-            )
-        )
-        virial_out = (
-            torch.cat(virial_out)
-            if virial_out
-            else torch.zeros(
-                [nframes, 3, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
-            )
-        )
-        atomic_virial_out = (
-            torch.cat(atomic_virial_out)
-            if atomic_virial_out
-            else torch.zeros(
-                [nframes, natoms, 3, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
-            )
-        )
-        updated_coord_out = torch.cat(updated_coord_out) if updated_coord_out else None
-        logits_out = torch.cat(logits_out) if logits_out else None
-    if denoise:
-        return updated_coord_out, logits_out
-    else:
-        results_dict = {
-            "energy": energy_out,
-            "force": force_out,
-            "virial": virial_out,
-        }
-        if has_spin:
-            results_dict["force_mag"] = force_mag_out
-        if atomic:
-            results_dict["atom_energy"] = atomic_energy_out
-            results_dict["atom_virial"] = atomic_virial_out
-        return results_dict
diff --git a/source/tests/pt/common.py b/source/tests/pt/common.py
index 8886522360..16b343be8a 100644
--- a/source/tests/pt/common.py
+++ b/source/tests/pt/common.py
@@ -1,7 +1,20 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import torch
+
 from deepmd.main import (
     main,
 )
+from deepmd.pt.utils.env import (
+    DEVICE,
+    GLOBAL_PT_FLOAT_PRECISION,
+)
 
 
 def run_dp(cmd: str) -> int:
@@ -27,3 +40,229 @@ def run_dp(cmd: str) -> int:
 
     main(cmds)
     return 0
+
+
+def eval_model(
+    model,
+    coords: Union[np.ndarray, torch.Tensor],
+    cells: Optional[Union[np.ndarray, torch.Tensor]],
+    atom_types: Union[np.ndarray, torch.Tensor, List[int]],
+    spins: Optional[Union[np.ndarray, torch.Tensor]] = None,
+    atomic: bool = False,
+    infer_batch_size: int = 2,
+    denoise: bool = False,
+):
+    model = model.to(DEVICE)
+    energy_out = []
+    atomic_energy_out = []
+    force_out = []
+    force_mag_out = []
+    virial_out = []
+    atomic_virial_out = []
+    updated_coord_out = []
+    logits_out = []
+    err_msg = (
+        f"All inputs should be the same format, "
+        f"but found {type(coords)}, {type(cells)}, {type(atom_types)} instead! "
+    )
+    return_tensor = True
+    if isinstance(coords, torch.Tensor):
+        if cells is not None:
+            assert isinstance(cells, torch.Tensor), err_msg
+        if spins is not None:
+            assert isinstance(spins, torch.Tensor), err_msg
+        assert isinstance(atom_types, torch.Tensor) or isinstance(atom_types, list)
+        atom_types = torch.tensor(atom_types, dtype=torch.int32, device=DEVICE)
+    elif isinstance(coords, np.ndarray):
+        if cells is not None:
+            assert isinstance(cells, np.ndarray), err_msg
+        if spins is not None:
+            assert isinstance(spins, np.ndarray), err_msg
+        assert isinstance(atom_types, np.ndarray) or isinstance(atom_types, list)
+        atom_types = np.array(atom_types, dtype=np.int32)
+        return_tensor = False
+
+    nframes = coords.shape[0]
+    if len(atom_types.shape) == 1:
+        natoms = len(atom_types)
+        if isinstance(atom_types, torch.Tensor):
+            atom_types = torch.tile(atom_types.unsqueeze(0), [nframes, 1]).reshape(
+                nframes, -1
+            )
+        else:
+            atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
+    else:
+        natoms = len(atom_types[0])
+
+    coord_input = torch.tensor(
+        coords.reshape([-1, natoms, 3]), dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+    )
+    spin_input = None
+    if spins is not None:
+        spin_input = torch.tensor(
+            spins.reshape([-1, natoms, 3]),
+            dtype=GLOBAL_PT_FLOAT_PRECISION,
+            device=DEVICE,
+        )
+    has_spin = getattr(model, "has_spin", False)
+    if callable(has_spin):
+        has_spin = has_spin()
+    type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE)
+    box_input = None
+    if cells is None:
+        pbc = False
+    else:
+        pbc = True
+        box_input = torch.tensor(
+            cells.reshape([-1, 3, 3]), dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+        )
+    num_iter = int((nframes + infer_batch_size - 1) / infer_batch_size)
+
+    for ii in range(num_iter):
+        batch_coord = coord_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        batch_atype = type_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        batch_box = None
+        batch_spin = None
+        if spin_input is not None:
+            batch_spin = spin_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        if pbc:
+            batch_box = box_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        input_dict = {
+            "coord": batch_coord,
+            "atype": batch_atype,
+            "box": batch_box,
+            "do_atomic_virial": atomic,
+        }
+        if has_spin:
+            input_dict["spin"] = batch_spin
+        batch_output = model(**input_dict)
+        if isinstance(batch_output, tuple):
+            batch_output = batch_output[0]
+        if not return_tensor:
+            if "energy" in batch_output:
+                energy_out.append(batch_output["energy"].detach().cpu().numpy())
+            if "atom_energy" in batch_output:
+                atomic_energy_out.append(
+                    batch_output["atom_energy"].detach().cpu().numpy()
+                )
+            if "force" in batch_output:
+                force_out.append(batch_output["force"].detach().cpu().numpy())
+            if "force_mag" in batch_output:
+                force_mag_out.append(batch_output["force_mag"].detach().cpu().numpy())
+            if "virial" in batch_output:
+                virial_out.append(batch_output["virial"].detach().cpu().numpy())
+            if "atom_virial" in batch_output:
+                atomic_virial_out.append(
+                    batch_output["atom_virial"].detach().cpu().numpy()
+                )
+            if "updated_coord" in batch_output:
+                updated_coord_out.append(
+                    batch_output["updated_coord"].detach().cpu().numpy()
+                )
+            if "logits" in batch_output:
+                logits_out.append(batch_output["logits"].detach().cpu().numpy())
+        else:
+            if "energy" in batch_output:
+                energy_out.append(batch_output["energy"])
+            if "atom_energy" in batch_output:
+                atomic_energy_out.append(batch_output["atom_energy"])
+            if "force" in batch_output:
+                force_out.append(batch_output["force"])
+            if "force_mag" in batch_output:
+                force_mag_out.append(batch_output["force_mag"])
+            if "virial" in batch_output:
+                virial_out.append(batch_output["virial"])
+            if "atom_virial" in batch_output:
+                atomic_virial_out.append(batch_output["atom_virial"])
+            if "updated_coord" in batch_output:
+                updated_coord_out.append(batch_output["updated_coord"])
+            if "logits" in batch_output:
+                logits_out.append(batch_output["logits"])
+    if not return_tensor:
+        energy_out = (
+            np.concatenate(energy_out) if energy_out else np.zeros([nframes, 1])  # pylint: disable=no-explicit-dtype
+        )
+        atomic_energy_out = (
+            np.concatenate(atomic_energy_out)
+            if atomic_energy_out
+            else np.zeros([nframes, natoms, 1])  # pylint: disable=no-explicit-dtype
+        )
+        force_out = (
+            np.concatenate(force_out) if force_out else np.zeros([nframes, natoms, 3])  # pylint: disable=no-explicit-dtype
+        )
+        force_mag_out = (
+            np.concatenate(force_mag_out)
+            if force_mag_out
+            else np.zeros([nframes, natoms, 3])  # pylint: disable=no-explicit-dtype
+        )
+        virial_out = (
+            np.concatenate(virial_out) if virial_out else np.zeros([nframes, 3, 3])  # pylint: disable=no-explicit-dtype
+        )
+        atomic_virial_out = (
+            np.concatenate(atomic_virial_out)
+            if atomic_virial_out
+            else np.zeros([nframes, natoms, 3, 3])  # pylint: disable=no-explicit-dtype
+        )
+        updated_coord_out = (
+            np.concatenate(updated_coord_out) if updated_coord_out else None
+        )
+        logits_out = np.concatenate(logits_out) if logits_out else None
+    else:
+        energy_out = (
+            torch.cat(energy_out)
+            if energy_out
+            else torch.zeros(
+                [nframes, 1], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        atomic_energy_out = (
+            torch.cat(atomic_energy_out)
+            if atomic_energy_out
+            else torch.zeros(
+                [nframes, natoms, 1], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        force_out = (
+            torch.cat(force_out)
+            if force_out
+            else torch.zeros(
+                [nframes, natoms, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        force_mag_out = (
+            torch.cat(force_mag_out)
+            if force_mag_out
+            else torch.zeros(
+                [nframes, natoms, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        virial_out = (
+            torch.cat(virial_out)
+            if virial_out
+            else torch.zeros(
+                [nframes, 3, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        atomic_virial_out = (
+            torch.cat(atomic_virial_out)
+            if atomic_virial_out
+            else torch.zeros(
+                [nframes, natoms, 3, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        updated_coord_out = torch.cat(updated_coord_out) if updated_coord_out else None
+        logits_out = torch.cat(logits_out) if logits_out else None
+    if denoise:
+        return updated_coord_out, logits_out
+    else:
+        results_dict = {
+            "energy": energy_out,
+            "force": force_out,
+            "virial": virial_out,
+        }
+        if has_spin:
+            results_dict["force_mag"] = force_mag_out
+        if atomic:
+            results_dict["atom_energy"] = atomic_energy_out
+            results_dict["atom_virial"] = atomic_virial_out
+        return results_dict
diff --git a/source/tests/pt/model/test_autodiff.py b/source/tests/pt/model/test_autodiff.py
index d891583491..1adcff55fc 100644
--- a/source/tests/pt/model/test_autodiff.py
+++ b/source/tests/pt/model/test_autodiff.py
@@ -21,8 +21,10 @@
 
 dtype = torch.float64
 
-from .test_permutation import (
+from ..common import (
     eval_model,
+)
+from .test_permutation import (
     model_dpa1,
     model_dpa2,
     model_hybrid,
diff --git a/source/tests/pt/model/test_forward_lower.py b/source/tests/pt/model/test_forward_lower.py
index c9857a6343..87a3f5b06e 100644
--- a/source/tests/pt/model/test_forward_lower.py
+++ b/source/tests/pt/model/test_forward_lower.py
@@ -4,9 +4,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -20,6 +17,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation import (  # model_dpau,
     model_dpa1,
     model_dpa2,
diff --git a/source/tests/pt/model/test_null_input.py b/source/tests/pt/model/test_null_input.py
index 1dca7ee119..a2e0fa66db 100644
--- a/source/tests/pt/model/test_null_input.py
+++ b/source/tests/pt/model/test_null_input.py
@@ -5,9 +5,6 @@
 import numpy as np
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
     get_zbl_model,
@@ -22,6 +19,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation import (
     model_dpa1,
     model_dpa2,
diff --git a/source/tests/pt/model/test_permutation.py b/source/tests/pt/model/test_permutation.py
index f5edc6ef64..2fbc5fde3c 100644
--- a/source/tests/pt/model/test_permutation.py
+++ b/source/tests/pt/model/test_permutation.py
@@ -5,9 +5,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -18,6 +15,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 
 CUR_DIR = os.path.dirname(__file__)
 
diff --git a/source/tests/pt/model/test_permutation_denoise.py b/source/tests/pt/model/test_permutation_denoise.py
index 133c48f551..53bf55fb0f 100644
--- a/source/tests/pt/model/test_permutation_denoise.py
+++ b/source/tests/pt/model/test_permutation_denoise.py
@@ -4,9 +4,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -17,6 +14,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation import (  # model_dpau,
     model_dpa1,
     model_dpa2,
diff --git a/source/tests/pt/model/test_rot.py b/source/tests/pt/model/test_rot.py
index 23bdede923..ca6a6375c8 100644
--- a/source/tests/pt/model/test_rot.py
+++ b/source/tests/pt/model/test_rot.py
@@ -4,9 +4,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -17,6 +14,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation import (  # model_dpau,
     model_dos,
     model_dpa1,
diff --git a/source/tests/pt/model/test_rot_denoise.py b/source/tests/pt/model/test_rot_denoise.py
index 5fe99a0d7a..9828ba5225 100644
--- a/source/tests/pt/model/test_rot_denoise.py
+++ b/source/tests/pt/model/test_rot_denoise.py
@@ -4,9 +4,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -17,6 +14,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation_denoise import (
     model_dpa1,
     model_dpa2,
diff --git a/source/tests/pt/model/test_smooth.py b/source/tests/pt/model/test_smooth.py
index c33dddfab5..9a7040f9cc 100644
--- a/source/tests/pt/model/test_smooth.py
+++ b/source/tests/pt/model/test_smooth.py
@@ -4,9 +4,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -17,6 +14,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation import (  # model_dpau,
     model_dos,
     model_dpa1,
diff --git a/source/tests/pt/model/test_smooth_denoise.py b/source/tests/pt/model/test_smooth_denoise.py
index 069c578d52..faa892c5d0 100644
--- a/source/tests/pt/model/test_smooth_denoise.py
+++ b/source/tests/pt/model/test_smooth_denoise.py
@@ -4,9 +4,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -17,6 +14,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation_denoise import (
     model_dpa2,
 )
diff --git a/source/tests/pt/model/test_trans.py b/source/tests/pt/model/test_trans.py
index afd70f8995..b62fac1312 100644
--- a/source/tests/pt/model/test_trans.py
+++ b/source/tests/pt/model/test_trans.py
@@ -4,9 +4,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -17,6 +14,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation import (  # model_dpau,
     model_dos,
     model_dpa1,
diff --git a/source/tests/pt/model/test_trans_denoise.py b/source/tests/pt/model/test_trans_denoise.py
index 2d31d5de50..84ec21929c 100644
--- a/source/tests/pt/model/test_trans_denoise.py
+++ b/source/tests/pt/model/test_trans_denoise.py
@@ -4,9 +4,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -17,6 +14,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation_denoise import (
     model_dpa1,
     model_dpa2,
diff --git a/source/tests/pt/model/test_unused_params.py b/source/tests/pt/model/test_unused_params.py
index e225719e7f..3f068d5e5b 100644
--- a/source/tests/pt/model/test_unused_params.py
+++ b/source/tests/pt/model/test_unused_params.py
@@ -4,9 +4,6 @@
 
 import torch
 
-from deepmd.pt.infer.deep_eval import (
-    eval_model,
-)
 from deepmd.pt.model.model import (
     get_model,
 )
@@ -17,6 +14,9 @@
 from ...seed import (
     GLOBAL_SEED,
 )
+from ..common import (
+    eval_model,
+)
 from .test_permutation import (
     model_dpa2,
 )

From f5cfeab050b818a87a1ff51363d439c3909a7bea Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 22 Sep 2024 21:23:22 -0400
Subject: [PATCH 04/39] fix(pt): fix `compute_output_stats_global` when
 `atomic_output` is `None` (#4155)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Bug Fixes**
- Improved error handling by ensuring that the output data is not `None`
before processing, preventing potential runtime errors.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 deepmd/pt/utils/stat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 6de70eb175..58e02f436d 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -478,7 +478,7 @@ def compute_output_stats_global(
     std_atom_e = {}
     for kk in keys:
         if kk in stats_input:
-            if atomic_output.get_data()[kk].intensive:
+            if atomic_output is not None and atomic_output.get_data()[kk].intensive:
                 task_dim = stats_input[kk].shape[1]
                 assert merged_natoms[kk].shape == (nf[kk], ntypes)
                 stats_input[kk] = (

From 0b72dae39d269963740b27af0d163510d269de4b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 23 Sep 2024 03:26:25 -0400
Subject: [PATCH 05/39] feat(jax): support neural networks (#4156)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced JAX support, enhancing functionality and compatibility with
JAX library.
	- Added new `JAXBackend` class for backend integration with JAX.
	- New functions for converting between NumPy and JAX arrays.

- **Bug Fixes**
- Improved compatibility of neural network layers with array API
standards.

- **Tests**
- Added tests for JAX functionality and consistency checks against
reference outputs.
- Enhanced testing framework for activation functions and type
embeddings.

- **Chores**
	- Updated dependency requirements to include JAX library.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/workflows/test_cuda.yml               |   2 +-
 .github/workflows/test_python.yml             |   2 +-
 deepmd/backend/jax.py                         | 110 ++++++++++++++++++
 deepmd/dpmodel/common.py                      |  22 ++++
 deepmd/dpmodel/utils/network.py               |  50 ++++++--
 deepmd/dpmodel/utils/type_embed.py            |  14 ++-
 deepmd/jax/__init__.py                        |   2 +
 deepmd/jax/common.py                          |  37 ++++++
 deepmd/jax/env.py                             |  14 +++
 deepmd/jax/utils/__init__.py                  |   1 +
 deepmd/jax/utils/network.py                   |  29 +++++
 deepmd/jax/utils/type_embed.py                |  21 ++++
 pyproject.toml                                |   3 +
 .../array_api/test_activation_functions.py    |   1 +
 source/tests/consistent/common.py             |  59 ++++++++++
 source/tests/consistent/test_activation.py    |  26 +++++
 .../tests/consistent/test_type_embedding.py   |  18 +++
 17 files changed, 393 insertions(+), 18 deletions(-)
 create mode 100644 deepmd/backend/jax.py
 create mode 100644 deepmd/jax/__init__.py
 create mode 100644 deepmd/jax/common.py
 create mode 100644 deepmd/jax/env.py
 create mode 100644 deepmd/jax/utils/__init__.py
 create mode 100644 deepmd/jax/utils/network.py
 create mode 100644 deepmd/jax/utils/type_embed.py
 create mode 100644 source/tests/common/dpmodel/array_api/test_activation_functions.py

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index 2883f01b5a..d60a9c909a 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -51,7 +51,7 @@ jobs:
     - run: |
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
         export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
-        source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch] mpi4py
+        source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py
       env:
         DP_VARIANT: cuda
         DP_ENABLE_NATIVE_OPTIMIZATION: 1
diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
index 36f9bd78b8..8274921909 100644
--- a/.github/workflows/test_python.yml
+++ b/.github/workflows/test_python.yml
@@ -28,7 +28,7 @@ jobs:
         source/install/uv_with_retry.sh pip install --system mpich
         source/install/uv_with_retry.sh pip install --system "torch==2.3.0+cpu.cxx11.abi" -i https://download.pytorch.org/whl/
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
-        source/install/uv_with_retry.sh pip install --system --only-binary=horovod -e .[cpu,test] horovod[tensorflow-cpu] mpi4py
+        source/install/uv_with_retry.sh pip install --system --only-binary=horovod -e .[cpu,test,jax] horovod[tensorflow-cpu] mpi4py
       env:
         # Please note that uv has some issues with finding
         # existing TensorFlow package. Currently, it uses
diff --git a/deepmd/backend/jax.py b/deepmd/backend/jax.py
new file mode 100644
index 0000000000..ece0761772
--- /dev/null
+++ b/deepmd/backend/jax.py
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from importlib.util import (
+    find_spec,
+)
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    ClassVar,
+    List,
+    Type,
+)
+
+from deepmd.backend.backend import (
+    Backend,
+)
+
+if TYPE_CHECKING:
+    from argparse import (
+        Namespace,
+    )
+
+    from deepmd.infer.deep_eval import (
+        DeepEvalBackend,
+    )
+    from deepmd.utils.neighbor_stat import (
+        NeighborStat,
+    )
+
+
+@Backend.register("jax")
+class JAXBackend(Backend):
+    """JAX backend."""
+
+    name = "JAX"
+    """The formal name of the backend."""
+    features: ClassVar[Backend.Feature] = (
+        Backend.Feature(0)
+        # Backend.Feature.ENTRY_POINT
+        # | Backend.Feature.DEEP_EVAL
+        # | Backend.Feature.NEIGHBOR_STAT
+        # | Backend.Feature.IO
+    )
+    """The features of the backend."""
+    suffixes: ClassVar[List[str]] = []
+    """The suffixes of the backend."""
+
+    def is_available(self) -> bool:
+        """Check if the backend is available.
+
+        Returns
+        -------
+        bool
+            Whether the backend is available.
+        """
+        return find_spec("jax") is not None
+
+    @property
+    def entry_point_hook(self) -> Callable[["Namespace"], None]:
+        """The entry point hook of the backend.
+
+        Returns
+        -------
+        Callable[[Namespace], None]
+            The entry point hook of the backend.
+        """
+        raise NotImplementedError
+
+    @property
+    def deep_eval(self) -> Type["DeepEvalBackend"]:
+        """The Deep Eval backend of the backend.
+
+        Returns
+        -------
+        type[DeepEvalBackend]
+            The Deep Eval backend of the backend.
+        """
+        raise NotImplementedError
+
+    @property
+    def neighbor_stat(self) -> Type["NeighborStat"]:
+        """The neighbor statistics of the backend.
+
+        Returns
+        -------
+        type[NeighborStat]
+            The neighbor statistics of the backend.
+        """
+        raise NotImplementedError
+
+    @property
+    def serialize_hook(self) -> Callable[[str], dict]:
+        """The serialize hook to convert the model file to a dictionary.
+
+        Returns
+        -------
+        Callable[[str], dict]
+            The serialize hook of the backend.
+        """
+        raise NotImplementedError
+
+    @property
+    def deserialize_hook(self) -> Callable[[str, dict], None]:
+        """The deserialize hook to convert the dictionary to a model file.
+
+        Returns
+        -------
+        Callable[[str, dict], None]
+            The deserialize hook of the backend.
+        """
+        raise NotImplementedError
diff --git a/deepmd/dpmodel/common.py b/deepmd/dpmodel/common.py
index 56cb8ec1e9..d9d57d2d6c 100644
--- a/deepmd/dpmodel/common.py
+++ b/deepmd/dpmodel/common.py
@@ -3,6 +3,10 @@
     ABC,
     abstractmethod,
 )
+from typing import (
+    Any,
+    Optional,
+)
 
 import ml_dtypes
 import numpy as np
@@ -59,6 +63,24 @@ def __call__(self, *args, **kwargs):
         return self.call(*args, **kwargs)
 
 
+def to_numpy_array(x: Any) -> Optional[np.ndarray]:
+    """Convert an array to a NumPy array.
+
+    Parameters
+    ----------
+    x : Any
+        The array to be converted.
+
+    Returns
+    -------
+    Optional[np.ndarray]
+        The NumPy array.
+    """
+    if x is None:
+        return None
+    return np.asarray(x)
+
+
 __all__ = [
     "GLOBAL_NP_FLOAT_PRECISION",
     "GLOBAL_ENER_FLOAT_PRECISION",
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
index 941e2cfc86..22e85c9890 100644
--- a/deepmd/dpmodel/utils/network.py
+++ b/deepmd/dpmodel/utils/network.py
@@ -15,6 +15,7 @@
     Union,
 )
 
+import array_api_compat
 import numpy as np
 
 from deepmd.dpmodel import (
@@ -22,6 +23,12 @@
     PRECISION_DICT,
     NativeOP,
 )
+from deepmd.dpmodel.array_api import (
+    support_array_api,
+)
+from deepmd.dpmodel.common import (
+    to_numpy_array,
+)
 from deepmd.dpmodel.utils.seed import (
     child_seed,
 )
@@ -105,9 +112,9 @@ def serialize(self) -> dict:
             The serialized layer.
         """
         data = {
-            "w": self.w,
-            "b": self.b,
-            "idt": self.idt,
+            "w": to_numpy_array(self.w),
+            "b": to_numpy_array(self.b),
+            "idt": to_numpy_array(self.idt),
         }
         return {
             "@class": "Layer",
@@ -215,6 +222,7 @@ def dim_in(self) -> int:
     def dim_out(self) -> int:
         return self.w.shape[1]
 
+    @support_array_api(version="2022.12")
     def call(self, x: np.ndarray) -> np.ndarray:
         """Forward pass.
 
@@ -230,11 +238,12 @@ def call(self, x: np.ndarray) -> np.ndarray:
         """
         if self.w is None or self.activation_function is None:
             raise ValueError("w, b, and activation_function must be set")
+        xp = array_api_compat.array_namespace(x)
         fn = get_activation_fn(self.activation_function)
         y = (
-            np.matmul(x, self.w) + self.b
+            xp.matmul(x, self.w) + self.b
             if self.b is not None
-            else np.matmul(x, self.w)
+            else xp.matmul(x, self.w)
         )
         y = fn(y)
         if self.idt is not None:
@@ -242,47 +251,64 @@ def call(self, x: np.ndarray) -> np.ndarray:
         if self.resnet and self.w.shape[1] == self.w.shape[0]:
             y += x
         elif self.resnet and self.w.shape[1] == 2 * self.w.shape[0]:
-            y += np.concatenate([x, x], axis=-1)
+            y += xp.concatenate([x, x], axis=-1)
         return y
 
 
+@support_array_api(version="2022.12")
 def get_activation_fn(activation_function: str) -> Callable[[np.ndarray], np.ndarray]:
     activation_function = activation_function.lower()
     if activation_function == "tanh":
-        return np.tanh
+
+        def fn(x):
+            xp = array_api_compat.array_namespace(x)
+            return xp.tanh(x)
+
+        return fn
     elif activation_function == "relu":
 
         def fn(x):
+            xp = array_api_compat.array_namespace(x)
             # https://stackoverflow.com/a/47936476/9567349
-            return x * (x > 0)
+            return x * xp.astype(x > 0, x.dtype)
 
         return fn
     elif activation_function in ("gelu", "gelu_tf"):
 
         def fn(x):
+            xp = array_api_compat.array_namespace(x)
             # generated by GitHub Copilot
-            return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))
+            return (
+                0.5
+                * x
+                * (1 + xp.tanh(xp.sqrt(xp.asarray(2 / xp.pi)) * (x + 0.044715 * x**3)))
+            )
 
         return fn
     elif activation_function == "relu6":
 
         def fn(x):
+            xp = array_api_compat.array_namespace(x)
             # generated by GitHub Copilot
-            return np.minimum(np.maximum(x, 0), 6)
+            return xp.where(
+                x < 0, xp.full_like(x, 0), xp.where(x > 6, xp.full_like(x, 6), x)
+            )
 
         return fn
     elif activation_function == "softplus":
 
         def fn(x):
+            xp = array_api_compat.array_namespace(x)
             # generated by GitHub Copilot
-            return np.log(1 + np.exp(x))
+            return xp.log(1 + xp.exp(x))
 
         return fn
     elif activation_function == "sigmoid":
 
         def fn(x):
+            xp = array_api_compat.array_namespace(x)
             # generated by GitHub Copilot
-            return 1 / (1 + np.exp(-x))
+            return 1 / (1 + xp.exp(-x))
 
         return fn
     elif activation_function.lower() in ("none", "linear"):
diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py
index 2e695171d6..e11c415cfd 100644
--- a/deepmd/dpmodel/utils/type_embed.py
+++ b/deepmd/dpmodel/utils/type_embed.py
@@ -5,8 +5,12 @@
     Union,
 )
 
+import array_api_compat
 import numpy as np
 
+from deepmd.dpmodel.array_api import (
+    support_array_api,
+)
 from deepmd.dpmodel.common import (
     PRECISION_DICT,
     NativeOP,
@@ -92,16 +96,18 @@ def __init__(
             bias=self.use_tebd_bias,
         )
 
+    @support_array_api(version="2022.12")
     def call(self) -> np.ndarray:
         """Compute the type embedding network."""
+        sample_array = self.embedding_net[0]["w"]
+        xp = array_api_compat.array_namespace(sample_array)
         if not self.use_econf_tebd:
-            embed = self.embedding_net(
-                np.eye(self.ntypes, dtype=PRECISION_DICT[self.precision])
-            )
+            embed = self.embedding_net(xp.eye(self.ntypes, dtype=sample_array.dtype))
         else:
             embed = self.embedding_net(self.econf_tebd)
         if self.padding:
-            embed = np.pad(embed, ((0, 1), (0, 0)), mode="constant")
+            embed_pad = xp.zeros((1, embed.shape[-1]), dtype=embed.dtype)
+            embed = xp.concatenate([embed, embed_pad], axis=0)
         return embed
 
     @classmethod
diff --git a/deepmd/jax/__init__.py b/deepmd/jax/__init__.py
new file mode 100644
index 0000000000..2ff078e797
--- /dev/null
+++ b/deepmd/jax/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""JAX backend."""
diff --git a/deepmd/jax/common.py b/deepmd/jax/common.py
new file mode 100644
index 0000000000..550b168b29
--- /dev/null
+++ b/deepmd/jax/common.py
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Union,
+    overload,
+)
+
+import numpy as np
+
+from deepmd.jax.env import (
+    jnp,
+)
+
+
+@overload
+def to_jax_array(array: np.ndarray) -> jnp.ndarray: ...
+
+
+@overload
+def to_jax_array(array: None) -> None: ...
+
+
+def to_jax_array(array: Union[np.ndarray]) -> Union[jnp.ndarray]:
+    """Convert a numpy array to a JAX array.
+
+    Parameters
+    ----------
+    array : np.ndarray
+        The numpy array to convert.
+
+    Returns
+    -------
+    jnp.ndarray
+        The JAX tensor.
+    """
+    if array is None:
+        return None
+    return jnp.array(array)
diff --git a/deepmd/jax/env.py b/deepmd/jax/env.py
new file mode 100644
index 0000000000..34e4aa6240
--- /dev/null
+++ b/deepmd/jax/env.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+
+os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"
+
+import jax
+import jax.numpy as jnp
+
+jax.config.update("jax_enable_x64", True)
+
+__all__ = [
+    "jax",
+    "jnp",
+]
diff --git a/deepmd/jax/utils/__init__.py b/deepmd/jax/utils/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/jax/utils/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/jax/utils/network.py b/deepmd/jax/utils/network.py
new file mode 100644
index 0000000000..629b51b8cd
--- /dev/null
+++ b/deepmd/jax/utils/network.py
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.common import (
+    NativeOP,
+)
+from deepmd.dpmodel.utils.network import NativeLayer as NativeLayerDP
+from deepmd.dpmodel.utils.network import (
+    make_embedding_network,
+    make_fitting_network,
+    make_multilayer_network,
+)
+from deepmd.jax.common import (
+    to_jax_array,
+)
+
+
+class NativeLayer(NativeLayerDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"w", "b", "idt"}:
+            value = to_jax_array(value)
+        return super().__setattr__(name, value)
+
+
+NativeNet = make_multilayer_network(NativeLayer, NativeOP)
+EmbeddingNet = make_embedding_network(NativeNet, NativeLayer)
+FittingNet = make_fitting_network(EmbeddingNet, NativeNet, NativeLayer)
diff --git a/deepmd/jax/utils/type_embed.py b/deepmd/jax/utils/type_embed.py
new file mode 100644
index 0000000000..bc7c469524
--- /dev/null
+++ b/deepmd/jax/utils/type_embed.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.utils.type_embed import TypeEmbedNet as TypeEmbedNetDP
+from deepmd.jax.common import (
+    to_jax_array,
+)
+from deepmd.jax.utils.network import (
+    EmbeddingNet,
+)
+
+
+class TypeEmbedNet(TypeEmbedNetDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"econf_tebd"}:
+            value = to_jax_array(value)
+        if name in {"embedding_net"}:
+            value = EmbeddingNet.deserialize(value.serialize())
+        return super().__setattr__(name, value)
diff --git a/pyproject.toml b/pyproject.toml
index f181b616a3..28fe114e01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -132,6 +132,9 @@ cu12 = [
     "nvidia-cudnn-cu12<9",
     "nvidia-cuda-nvcc-cu12",
 ]
+jax = [
+    'jax>=0.4.33;python_version>="3.10"',
+]
 
 [tool.deepmd_build_backend.scripts]
 dp = "deepmd.main:main"
diff --git a/source/tests/common/dpmodel/array_api/test_activation_functions.py b/source/tests/common/dpmodel/array_api/test_activation_functions.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/common/dpmodel/array_api/test_activation_functions.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/consistent/common.py b/source/tests/consistent/common.py
index edafc7c02e..e8873e528a 100644
--- a/source/tests/consistent/common.py
+++ b/source/tests/consistent/common.py
@@ -35,6 +35,7 @@
 
 INSTALLED_TF = Backend.get_backend("tensorflow")().is_available()
 INSTALLED_PT = Backend.get_backend("pytorch")().is_available()
+INSTALLED_JAX = Backend.get_backend("jax")().is_available()
 
 if os.environ.get("CI") and not (INSTALLED_TF and INSTALLED_PT):
     raise ImportError("TensorFlow or PyTorch should be tested in the CI")
@@ -57,6 +58,7 @@
     "CommonTest",
     "INSTALLED_TF",
     "INSTALLED_PT",
+    "INSTALLED_JAX",
 ]
 
 
@@ -71,6 +73,8 @@ class CommonTest(ABC):
     """Native DP model class."""
     pt_class: ClassVar[Optional[type]]
     """PyTorch model class."""
+    jax_class: ClassVar[Optional[type]]
+    """JAX model class."""
     args: ClassVar[Optional[Union[Argument, List[Argument]]]]
     """Arguments that maps to the `data`."""
     skip_dp: ClassVar[bool] = False
@@ -79,6 +83,9 @@ class CommonTest(ABC):
     """Whether to skip the TensorFlow model."""
     skip_pt: ClassVar[bool] = not INSTALLED_PT
     """Whether to skip the PyTorch model."""
+    # we may usually skip jax before jax is fully supported
+    skip_jax: ClassVar[bool] = True
+    """Whether to skip the JAX model."""
     rtol = 1e-10
     """Relative tolerance for comparing the return value. Override for float32."""
     atol = 1e-10
@@ -149,12 +156,23 @@ def eval_pt(self, pt_obj: Any) -> Any:
             The object of PT
         """
 
+    def eval_jax(self, jax_obj: Any) -> Any:
+        """Evaluate the return value of JAX.
+
+        Parameters
+        ----------
+        jax_obj : Any
+            The object of JAX
+        """
+        raise NotImplementedError("Not implemented")
+
     class RefBackend(Enum):
         """Reference backend."""
 
         TF = 1
         DP = 2
         PT = 3
+        JAX = 5
 
     @abstractmethod
     def extract_ret(self, ret: Any, backend: RefBackend) -> Tuple[np.ndarray, ...]:
@@ -215,6 +233,11 @@ def get_dp_ret_serialization_from_cls(self, obj):
         data = obj.serialize()
         return ret, data
 
+    def get_jax_ret_serialization_from_cls(self, obj):
+        ret = self.eval_jax(obj)
+        data = obj.serialize()
+        return ret, data
+
     def get_reference_backend(self):
         """Get the reference backend.
 
@@ -226,6 +249,8 @@ def get_reference_backend(self):
             return self.RefBackend.TF
         if not self.skip_pt:
             return self.RefBackend.PT
+        if not self.skip_jax:
+            return self.RefBackend.JAX
         raise ValueError("No available reference")
 
     def get_reference_ret_serialization(self, ref: RefBackend):
@@ -359,6 +384,40 @@ def test_pt_self_consistent(self):
             else:
                 self.assertEqual(rr1, rr2)
 
+    def test_jax_consistent_with_ref(self):
+        """Test whether JAX and reference are consistent."""
+        if self.skip_jax:
+            self.skipTest("Unsupported backend")
+        ref_backend = self.get_reference_backend()
+        if ref_backend == self.RefBackend.JAX:
+            self.skipTest("Reference is self")
+        ret1, data1 = self.get_reference_ret_serialization(ref_backend)
+        ret1 = self.extract_ret(ret1, ref_backend)
+        jax_obj = self.jax_class.deserialize(data1)
+        ret2 = self.eval_jax(jax_obj)
+        ret2 = self.extract_ret(ret2, self.RefBackend.JAX)
+        data2 = jax_obj.serialize()
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
+            assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+
+    def test_jax_self_consistent(self):
+        """Test whether JAX is self consistent."""
+        if self.skip_jax:
+            self.skipTest("Unsupported backend")
+        obj1 = self.init_backend_cls(self.jax_class)
+        ret1, data1 = self.get_jax_ret_serialization_from_cls(obj1)
+        obj1 = self.jax_class.deserialize(data1)
+        ret2, data2 = self.get_jax_ret_serialization_from_cls(obj1)
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            if isinstance(rr1, np.ndarray) and isinstance(rr2, np.ndarray):
+                np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
+                assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+            else:
+                self.assertEqual(rr1, rr2)
+
     def tearDown(self) -> None:
         """Clear the TF session."""
         if not self.skip_tf:
diff --git a/source/tests/consistent/test_activation.py b/source/tests/consistent/test_activation.py
index 3fcb9b2fa5..5630e913a8 100644
--- a/source/tests/consistent/test_activation.py
+++ b/source/tests/consistent/test_activation.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import sys
 import unittest
 
 import numpy as np
@@ -12,6 +13,7 @@
     GLOBAL_SEED,
 )
 from .common import (
+    INSTALLED_JAX,
     INSTALLED_PT,
     INSTALLED_TF,
     parameterized,
@@ -28,6 +30,10 @@
     from deepmd.tf.env import (
         tf,
     )
+if INSTALLED_JAX:
+    from deepmd.jax.env import (
+        jnp,
+    )
 
 
 @parameterized(
@@ -57,3 +63,23 @@ def test_pt_consistent_with_ref(self):
                 ActivationFn_pt(self.activation)(to_torch_tensor(self.random_input))
             )
             np.testing.assert_allclose(self.ref, test, atol=1e-10)
+
+    @unittest.skipUnless(
+        sys.version_info >= (3, 9), "array_api_strict doesn't support Python<=3.8"
+    )
+    def test_arary_api_strict(self):
+        import array_api_strict as xp
+
+        xp.set_array_api_strict_flags(
+            api_version=get_activation_fn_dp.array_api_version
+        )
+        input = xp.asarray(self.random_input)
+        test = get_activation_fn_dp(self.activation)(input)
+        np.testing.assert_allclose(self.ref, np.array(test), atol=1e-10)
+
+    @unittest.skipUnless(INSTALLED_JAX, "JAX is not installed")
+    def test_jax_consistent_with_ref(self):
+        input = jnp.from_dlpack(self.random_input)
+        test = get_activation_fn_dp(self.activation)(input)
+        self.assertTrue(isinstance(test, jnp.ndarray))
+        np.testing.assert_allclose(self.ref, np.from_dlpack(test), atol=1e-10)
diff --git a/source/tests/consistent/test_type_embedding.py b/source/tests/consistent/test_type_embedding.py
index 6583dddb5f..c66ef0fbaa 100644
--- a/source/tests/consistent/test_type_embedding.py
+++ b/source/tests/consistent/test_type_embedding.py
@@ -13,6 +13,7 @@
 )
 
 from .common import (
+    INSTALLED_JAX,
     INSTALLED_PT,
     INSTALLED_TF,
     CommonTest,
@@ -30,6 +31,13 @@
     from deepmd.tf.utils.type_embed import TypeEmbedNet as TypeEmbedNetTF
 else:
     TypeEmbedNetTF = object
+if INSTALLED_JAX:
+    from deepmd.jax.env import (
+        jnp,
+    )
+    from deepmd.jax.utils.type_embed import TypeEmbedNet as TypeEmbedNetJAX
+else:
+    TypeEmbedNetJAX = object
 
 
 @parameterized(
@@ -63,7 +71,9 @@ def data(self) -> dict:
     tf_class = TypeEmbedNetTF
     dp_class = TypeEmbedNetDP
     pt_class = TypeEmbedNetPT
+    jax_class = TypeEmbedNetJAX
     args = type_embedding_args()
+    skip_jax = not INSTALLED_JAX
 
     @property
     def addtional_data(self) -> dict:
@@ -103,6 +113,14 @@ def eval_pt(self, pt_obj: Any) -> Any:
             for x in (pt_obj(device=PT_DEVICE),)
         ]
 
+    def eval_jax(self, jax_obj: Any) -> Any:
+        out = jax_obj()
+        # ensure output is not numpy array
+        for x in (out,):
+            if isinstance(x, np.ndarray):
+                raise ValueError("Output is numpy array")
+        return [np.array(x) if isinstance(x, jnp.ndarray) else x for x in (out,)]
+
     def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
         return (ret[0],)
 

From 508759c31c27972c12901f7df8ffd7a6f6bfcf25 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 25 Sep 2024 11:54:15 +0800
Subject: [PATCH 06/39] fix(pt ut): make separated uts deterministic (#4162)

Fix failed uts in #4145 .

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Added a `"seed"` property to multiple JSON configuration files,
enhancing control over randomness in model training and evaluation.
- Introduced a global seed parameter in various test functions to
improve reproducibility across test runs.

- **Bug Fixes**
- Ensured consistent random number generation in tests by integrating a
global seed parameter.

- **Documentation**
- Updated configuration files and test methods to reflect the addition
of the seed parameter for clarity and consistency.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 source/tests/pt/model/models/dpa1.json               |  3 ++-
 source/tests/pt/model/models/dpa2.json               |  1 +
 source/tests/pt/model/test_descriptor_se_r.py        |  3 +++
 source/tests/pt/model/test_dipole_fitting.py         |  6 ++++++
 source/tests/pt/model/test_dpa1.py                   |  3 +++
 source/tests/pt/model/test_dpa2.py                   |  6 ++++++
 source/tests/pt/model/test_embedding_net.py          |  5 ++++-
 source/tests/pt/model/test_ener_fitting.py           |  3 +++
 source/tests/pt/model/test_permutation.py            |  6 ++++++
 source/tests/pt/model/test_polarizability_fitting.py |  6 ++++++
 source/tests/pt/model/test_property_fitting.py       | 10 ++++++++++
 source/tests/pt/model/test_se_atten_v2.py            |  5 +++++
 source/tests/pt/model/test_se_e2_a.py                |  3 +++
 source/tests/pt/model/test_se_t.py                   |  2 ++
 source/tests/pt/model/water/se_atten.json            |  3 ++-
 15 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/source/tests/pt/model/models/dpa1.json b/source/tests/pt/model/models/dpa1.json
index 1321acbd53..a969c290ae 100644
--- a/source/tests/pt/model/models/dpa1.json
+++ b/source/tests/pt/model/models/dpa1.json
@@ -21,7 +21,8 @@
     "activation_function": "tanh",
     "scaling_factor": 1.0,
     "normalize": true,
-    "temperature": 1.0
+    "temperature": 1.0,
+    "seed": 1
   },
   "fitting_net": {
     "neuron": [
diff --git a/source/tests/pt/model/models/dpa2.json b/source/tests/pt/model/models/dpa2.json
index 7495f5d78a..f83e319de3 100644
--- a/source/tests/pt/model/models/dpa2.json
+++ b/source/tests/pt/model/models/dpa2.json
@@ -42,6 +42,7 @@
       "g1_out_conv": false,
       "g1_out_mlp": false
     },
+    "seed": 1,
     "add_tebd_to_repinit_out": false
   },
   "fitting_net": {
diff --git a/source/tests/pt/model/test_descriptor_se_r.py b/source/tests/pt/model/test_descriptor_se_r.py
index a2b9754714..f3692101c5 100644
--- a/source/tests/pt/model/test_descriptor_se_r.py
+++ b/source/tests/pt/model/test_descriptor_se_r.py
@@ -63,6 +63,7 @@ def test_consistency(
                 resnet_dt=idt,
                 old_impl=False,
                 exclude_mask=em,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             dd0.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
@@ -130,6 +131,7 @@ def test_load_stat(self):
                 precision=prec,
                 resnet_dt=idt,
                 old_impl=False,
+                seed=GLOBAL_SEED,
             )
             dd0.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
@@ -180,6 +182,7 @@ def test_jit(
                 precision=prec,
                 resnet_dt=idt,
                 old_impl=False,
+                seed=GLOBAL_SEED,
             )
             dd0.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
diff --git a/source/tests/pt/model/test_dipole_fitting.py b/source/tests/pt/model/test_dipole_fitting.py
index cd3a032ecc..71da2781ac 100644
--- a/source/tests/pt/model/test_dipole_fitting.py
+++ b/source/tests/pt/model/test_dipole_fitting.py
@@ -87,6 +87,7 @@ def test_consistency(
                 numb_fparam=nfp,
                 numb_aparam=nap,
                 mixed_types=self.dd0.mixed_types(),
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             ft1 = DPDipoleFitting.deserialize(ft0.serialize())
             ft2 = DipoleFittingNet.deserialize(ft1.serialize())
@@ -139,6 +140,7 @@ def test_jit(
                 numb_fparam=nfp,
                 numb_aparam=nap,
                 mixed_types=mixed_types,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             torch.jit.script(ft0)
 
@@ -180,6 +182,7 @@ def test_rot(self):
                 numb_fparam=nfp,
                 numb_aparam=nap,
                 mixed_types=self.dd0.mixed_types(),
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             if nfp > 0:
                 ifp = torch.tensor(
@@ -234,6 +237,7 @@ def test_permu(self):
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
+            seed=GLOBAL_SEED,
         ).to(env.DEVICE)
         res = []
         for idx_perm in [[0, 1, 2, 3, 4], [1, 0, 4, 3, 2]]:
@@ -280,6 +284,7 @@ def test_trans(self):
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
+            seed=GLOBAL_SEED,
         ).to(env.DEVICE)
         res = []
         for xyz in [self.coord, coord_s]:
@@ -327,6 +332,7 @@ def setUp(self):
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
+            seed=GLOBAL_SEED,
         ).to(env.DEVICE)
         self.type_mapping = ["O", "H", "B"]
         self.model = DipoleModel(self.dd0, self.ft0, self.type_mapping)
diff --git a/source/tests/pt/model/test_dpa1.py b/source/tests/pt/model/test_dpa1.py
index f1994504fc..b825885311 100644
--- a/source/tests/pt/model/test_dpa1.py
+++ b/source/tests/pt/model/test_dpa1.py
@@ -71,6 +71,7 @@ def test_consistency(
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
                 old_impl=False,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             dd0.se_atten.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.se_atten.stddev = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
@@ -125,6 +126,7 @@ def test_consistency(
                     resnet_dt=idt,
                     smooth_type_embedding=sm,
                     old_impl=True,
+                    seed=GLOBAL_SEED,
                 ).to(env.DEVICE)
                 dd0_state_dict = dd0.se_atten.state_dict()
                 dd3_state_dict = dd3.se_atten.state_dict()
@@ -210,6 +212,7 @@ def test_jit(
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
                 old_impl=False,
+                seed=GLOBAL_SEED,
             )
             dd0.se_atten.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.se_atten.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
diff --git a/source/tests/pt/model/test_dpa2.py b/source/tests/pt/model/test_dpa2.py
index f11be532cb..0beb34c031 100644
--- a/source/tests/pt/model/test_dpa2.py
+++ b/source/tests/pt/model/test_dpa2.py
@@ -20,6 +20,9 @@
     PRECISION_DICT,
 )
 
+from ...seed import (
+    GLOBAL_SEED,
+)
 from .test_env_mat import (
     TestCaseSingleFrameWithNlist,
 )
@@ -152,6 +155,7 @@ def test_consistency(
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
                 old_impl=False,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
 
             dd0.repinit.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
@@ -201,6 +205,7 @@ def test_consistency(
                     add_tebd_to_repinit_out=False,
                     precision=prec,
                     old_impl=True,
+                    seed=GLOBAL_SEED,
                 ).to(env.DEVICE)
                 dd0_state_dict = dd0.state_dict()
                 dd3_state_dict = dd3.state_dict()
@@ -346,6 +351,7 @@ def test_jit(
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
                 old_impl=False,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
 
             dd0.repinit.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
diff --git a/source/tests/pt/model/test_embedding_net.py b/source/tests/pt/model/test_embedding_net.py
index 77d14db2a4..3605316437 100644
--- a/source/tests/pt/model/test_embedding_net.py
+++ b/source/tests/pt/model/test_embedding_net.py
@@ -39,6 +39,9 @@
 )
 from deepmd.tf.descriptor import DescrptSeA as DescrptSeA_tf
 
+from ...seed import (
+    GLOBAL_SEED,
+)
 from ..test_finetune import (
     energy_data_requirement,
 )
@@ -153,7 +156,7 @@ def test_consistency(self):
             sel=self.sel,
             neuron=self.filter_neuron,
             axis_neuron=self.axis_neuron,
-            seed=1,
+            seed=GLOBAL_SEED,
         )
         dp_embedding, dp_force, dp_vars = base_se_a(
             descriptor=dp_d,
diff --git a/source/tests/pt/model/test_ener_fitting.py b/source/tests/pt/model/test_ener_fitting.py
index 07c0d19935..3255db2784 100644
--- a/source/tests/pt/model/test_ener_fitting.py
+++ b/source/tests/pt/model/test_ener_fitting.py
@@ -65,6 +65,7 @@ def test_consistency(
                 mixed_types=mixed_types,
                 exclude_types=et,
                 neuron=nn,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             ft1 = DPInvarFitting.deserialize(ft0.serialize())
             ft2 = InvarFitting.deserialize(ft0.serialize())
@@ -168,6 +169,7 @@ def test_jit(
                 numb_aparam=nap,
                 mixed_types=mixed_types,
                 exclude_types=et,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             torch.jit.script(ft0)
 
@@ -177,6 +179,7 @@ def test_get_set(self):
             self.nt,
             3,
             1,
+            seed=GLOBAL_SEED,
         )
         rng = np.random.default_rng(GLOBAL_SEED)
         foo = rng.normal([3, 4])
diff --git a/source/tests/pt/model/test_permutation.py b/source/tests/pt/model/test_permutation.py
index 2fbc5fde3c..6aec895041 100644
--- a/source/tests/pt/model/test_permutation.py
+++ b/source/tests/pt/model/test_permutation.py
@@ -88,6 +88,7 @@
         "temperature": 1.0,
         "set_davg_zero": True,
         "type_one_side": True,
+        "seed": 1,
     },
     "fitting_net": {
         "neuron": [24, 24, 24],
@@ -155,6 +156,7 @@
             "update_g2_has_attn": True,
             "attn2_has_gate": True,
         },
+        "seed": 1,
         "add_tebd_to_repinit_out": False,
     },
     "fitting_net": {
@@ -207,6 +209,7 @@
             "g1_out_conv": True,
             "g1_out_mlp": True,
         },
+        "seed": 1,
         "add_tebd_to_repinit_out": False,
     },
     "fitting_net": {
@@ -235,6 +238,7 @@
         "temperature": 1.0,
         "set_davg_zero": True,
         "type_one_side": True,
+        "seed": 1,
     },
     "fitting_net": {
         "neuron": [24, 24, 24],
@@ -264,6 +268,7 @@
                 "scaling_factor": 1.0,
                 "normalize": True,
                 "temperature": 1.0,
+                "seed": 1,
             },
             {
                 "type": "dpa2",
@@ -296,6 +301,7 @@
                     "update_g2_has_attn": True,
                     "attn2_has_gate": True,
                 },
+                "seed": 1,
                 "add_tebd_to_repinit_out": False,
             },
         ],
diff --git a/source/tests/pt/model/test_polarizability_fitting.py b/source/tests/pt/model/test_polarizability_fitting.py
index ba1bf2ea29..1ca563a8c2 100644
--- a/source/tests/pt/model/test_polarizability_fitting.py
+++ b/source/tests/pt/model/test_polarizability_fitting.py
@@ -77,6 +77,7 @@ def test_consistency(
                 mixed_types=self.dd0.mixed_types(),
                 fit_diag=fit_diag,
                 scale=scale,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             ft1 = DPPolarFitting.deserialize(ft0.serialize())
             ft2 = PolarFittingNet.deserialize(ft0.serialize())
@@ -143,6 +144,7 @@ def test_jit(
                 numb_aparam=nap,
                 mixed_types=mixed_types,
                 fit_diag=fit_diag,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             torch.jit.script(ft0)
 
@@ -186,6 +188,7 @@ def test_rot(self):
                 mixed_types=self.dd0.mixed_types(),
                 fit_diag=fit_diag,
                 scale=scale,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             if nfp > 0:
                 ifp = torch.tensor(
@@ -248,6 +251,7 @@ def test_permu(self):
                 mixed_types=self.dd0.mixed_types(),
                 fit_diag=fit_diag,
                 scale=scale,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             res = []
             for idx_perm in [[0, 1, 2, 3, 4], [1, 0, 4, 3, 2]]:
@@ -298,6 +302,7 @@ def test_trans(self):
                 mixed_types=self.dd0.mixed_types(),
                 fit_diag=fit_diag,
                 scale=scale,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             res = []
             for xyz in [self.coord, coord_s]:
@@ -347,6 +352,7 @@ def setUp(self):
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
+            seed=GLOBAL_SEED,
         ).to(env.DEVICE)
         self.type_mapping = ["O", "H", "B"]
         self.model = PolarModel(self.dd0, self.ft0, self.type_mapping)
diff --git a/source/tests/pt/model/test_property_fitting.py b/source/tests/pt/model/test_property_fitting.py
index 59a5b1b172..dfe2725f3b 100644
--- a/source/tests/pt/model/test_property_fitting.py
+++ b/source/tests/pt/model/test_property_fitting.py
@@ -32,6 +32,9 @@
     to_numpy_array,
 )
 
+from ...seed import (
+    GLOBAL_SEED,
+)
 from .test_env_mat import (
     TestCaseSingleFrameWithNlist,
 )
@@ -78,6 +81,7 @@ def test_consistency(
                 bias_atom_p=bias_atom_p,
                 intensive=intensive,
                 bias_method=bias_method,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
 
             ft1 = DPProperFittingNet.deserialize(ft0.serialize())
@@ -146,6 +150,7 @@ def test_jit(
                 mixed_types=self.dd0.mixed_types(),
                 intensive=intensive,
                 bias_method=bias_method,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             torch.jit.script(ft0)
 
@@ -199,6 +204,7 @@ def test_trans(self):
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
+            seed=GLOBAL_SEED,
         ).to(env.DEVICE)
         res = []
         for xyz in [self.coord, coord_s]:
@@ -266,6 +272,7 @@ def test_rot(self):
                 mixed_types=self.dd0.mixed_types(),
                 intensive=intensive,
                 bias_method=bias_method,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             if nfp > 0:
                 ifp = torch.tensor(
@@ -320,6 +327,7 @@ def test_permu(self):
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
+            seed=GLOBAL_SEED,
         ).to(env.DEVICE)
         res = []
         for idx_perm in [[0, 1, 2, 3, 4], [1, 0, 4, 3, 2]]:
@@ -367,6 +375,7 @@ def test_trans(self):
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
+            seed=GLOBAL_SEED,
         ).to(env.DEVICE)
         res = []
         for xyz in [self.coord, coord_s]:
@@ -417,6 +426,7 @@ def setUp(self):
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
             intensive=True,
+            seed=GLOBAL_SEED,
         ).to(env.DEVICE)
         self.type_mapping = ["O", "H", "B"]
         self.model = PropertyModel(self.dd0, self.ft0, self.type_mapping)
diff --git a/source/tests/pt/model/test_se_atten_v2.py b/source/tests/pt/model/test_se_atten_v2.py
index caecd0a118..f9857fc728 100644
--- a/source/tests/pt/model/test_se_atten_v2.py
+++ b/source/tests/pt/model/test_se_atten_v2.py
@@ -16,6 +16,9 @@
     PRECISION_DICT,
 )
 
+from ...seed import (
+    GLOBAL_SEED,
+)
 from .test_env_mat import (
     TestCaseSingleFrameWithNlist,
 )
@@ -64,6 +67,7 @@ def test_consistency(
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
                 old_impl=False,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             dd0.se_atten.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.se_atten.stddev = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
@@ -135,6 +139,7 @@ def test_jit(
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
                 old_impl=False,
+                seed=GLOBAL_SEED,
             )
             dd0.se_atten.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.se_atten.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
diff --git a/source/tests/pt/model/test_se_e2_a.py b/source/tests/pt/model/test_se_e2_a.py
index 75d47c9054..abe13ce86e 100644
--- a/source/tests/pt/model/test_se_e2_a.py
+++ b/source/tests/pt/model/test_se_e2_a.py
@@ -60,6 +60,7 @@ def test_consistency(
                 resnet_dt=idt,
                 old_impl=False,
                 exclude_types=em,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             dd0.sea.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.sea.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
@@ -113,6 +114,7 @@ def test_consistency(
                     precision=prec,
                     resnet_dt=idt,
                     old_impl=True,
+                    seed=GLOBAL_SEED,
                 ).to(env.DEVICE)
                 dd0_state_dict = dd0.sea.state_dict()
                 dd3_state_dict = dd3.sea.state_dict()
@@ -168,6 +170,7 @@ def test_jit(
                 precision=prec,
                 resnet_dt=idt,
                 old_impl=False,
+                seed=GLOBAL_SEED,
             )
             dd0.sea.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.sea.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
diff --git a/source/tests/pt/model/test_se_t.py b/source/tests/pt/model/test_se_t.py
index 0d6c87ba8d..d3968d7f03 100644
--- a/source/tests/pt/model/test_se_t.py
+++ b/source/tests/pt/model/test_se_t.py
@@ -63,6 +63,7 @@ def test_consistency(
                 precision=prec,
                 resnet_dt=idt,
                 exclude_types=em,
+                seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             dd0.seat.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.seat.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
@@ -131,6 +132,7 @@ def test_jit(
                 self.sel,
                 precision=prec,
                 resnet_dt=idt,
+                seed=GLOBAL_SEED,
             )
             dd0.seat.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
             dd0.seat.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
diff --git a/source/tests/pt/model/water/se_atten.json b/source/tests/pt/model/water/se_atten.json
index 71cee94d8b..4b4c54e0d2 100644
--- a/source/tests/pt/model/water/se_atten.json
+++ b/source/tests/pt/model/water/se_atten.json
@@ -24,7 +24,8 @@
       "activation_function": "tanh",
       "scaling_factor": 1.0,
       "normalize": false,
-      "temperature": 1.0
+      "temperature": 1.0,
+      "seed": 1
     },
     "fitting_net": {
       "neuron": [

From 0b3f860424550dcec0cdda012138fb9eafcaba92 Mon Sep 17 00:00:00 2001
From: Chenqqian Zhang <100290172+Chengqian-Zhang@users.noreply.github.com>
Date: Wed, 25 Sep 2024 13:41:32 +0800
Subject: [PATCH 07/39] fix(pt): finetuning property/dipole/polar/dos fitting
 with multi-dimensional data causes error (#4145)

Fix issue #4108

If a pretrained model is labeled with energy and the `out_bias` is one
dimension. If we want to finetune a dos/polar/dipole/property model
using this pretrained model, the `out_bias` of finetuning model is
multi-dimension(example: numb_dos = 250). An error occurs:
`RuntimeError: Error(s) in loading state_dict for ModelWrapper:`
` size mismatch for model.Default.atomic_model.out_bias: copying a param
with shape torch.Size([1, 118, 1]) from checkpoint, the shape in current
model is torch.Size([1, 118, 250]).`
` size mismatch for model.Default.atomic_model.out_std: copying a param
with shape torch.Size([1, 118, 1]) from checkpoint, the shape in current
model is torch.Size([1, 118, 250]).`

When using new fitting, old out_bias is useless because we will
recompute the new bias in later code. So we do not need to load old
out_bias when using new fitting finetune.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced parameter collection for fine-tuning, refining criteria for
parameter retention.
- Introduced a model checkpoint file for saving and resuming training
states, facilitating iterative development.

- **Tests**
- Added a new test class to validate training and fine-tuning processes,
ensuring model performance consistency across configurations.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/pt/train/training.py      |  2 +-
 source/tests/pt/test_training.py | 68 ++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index c3d603dadd..9bdc80195f 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -484,7 +484,7 @@ def collect_single_finetune_params(
                             if i != "_extra_state" and f".{_model_key}." in i
                         ]
                         for item_key in target_keys:
-                            if _new_fitting and ".fitting_net." in item_key:
+                            if _new_fitting and (".descriptor." not in item_key):
                                 # print(f'Keep {item_key} in old model!')
                                 _new_state_dict[item_key] = (
                                     _random_state_dict[item_key].clone().detach()
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
index 0833200d47..fa9e5c138a 100644
--- a/source/tests/pt/test_training.py
+++ b/source/tests/pt/test_training.py
@@ -448,5 +448,73 @@ def tearDown(self) -> None:
         DPTrainTest.tearDown(self)
 
 
+class TestPropFintuFromEnerModel(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["model"]["type_map"] = ["H", "C", "N", "O"]
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+        property_input = str(Path(__file__).parent / "property/input.json")
+        with open(property_input) as f:
+            self.config_property = json.load(f)
+        prop_data_file = [str(Path(__file__).parent / "property/single")]
+        self.config_property["training"]["training_data"]["systems"] = prop_data_file
+        self.config_property["training"]["validation_data"]["systems"] = prop_data_file
+        self.config_property["model"]["descriptor"] = deepcopy(model_dpa1["descriptor"])
+        self.config_property["training"]["numb_steps"] = 1
+        self.config_property["training"]["save_freq"] = 1
+
+    def test_dp_train(self):
+        # test training from scratch
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+        state_dict_trained = trainer.wrapper.model.state_dict()
+
+        # test fine-tuning using diffferent fitting_net, here using property fitting
+        finetune_model = self.config["training"].get("save_ckpt", "model.ckpt") + ".pt"
+        self.config_property["model"], finetune_links = get_finetune_rules(
+            finetune_model,
+            self.config_property["model"],
+            model_branch="RANDOM",
+        )
+        trainer_finetune = get_trainer(
+            deepcopy(self.config_property),
+            finetune_model=finetune_model,
+            finetune_links=finetune_links,
+        )
+
+        # check parameters
+        state_dict_finetuned = trainer_finetune.wrapper.model.state_dict()
+        for state_key in state_dict_finetuned:
+            if (
+                "out_bias" not in state_key
+                and "out_std" not in state_key
+                and "fitting" not in state_key
+            ):
+                torch.testing.assert_close(
+                    state_dict_trained[state_key],
+                    state_dict_finetuned[state_key],
+                )
+
+        # check running
+        trainer_finetune.run()
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pt"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+
+
 if __name__ == "__main__":
     unittest.main()

From ab48d90bb9076c7e2278f8414129820342d3c31b Mon Sep 17 00:00:00 2001
From: Chun Cai <amoycaic@gmail.com>
Date: Fri, 27 Sep 2024 08:38:11 +0800
Subject: [PATCH 08/39] Feat: output logs on freezing models (#4165)

`dp --pt freeze` does not output whether the operation finishes
successfully. This PR adds a log message on it.
---
 deepmd/pt/entrypoints/main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index 9133575ec8..3df05cbb47 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -354,6 +354,7 @@ def freeze(FLAGS):
         FLAGS.output,
         extra_files,
     )
+    log.info(f"Saved frozen model to {FLAGS.output}")
 
 
 def change_bias(FLAGS):

From 94fe957ca1ccb8dc7e1c73ca3c3c76ff9411d307 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Fri, 27 Sep 2024 17:27:56 +0800
Subject: [PATCH 09/39] chore: change econf embed to spin representation
 (#4166)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a function to transform electronic configuration data into
a spin representation, enhancing how electronic states are represented.
- Updated the electronic configuration embedding to include negative
values, reflecting a new encoding scheme.
- Added a normalization function for electronic configuration vectors to
improve data consistency.

- **Tests**
- Added a new test for the spin representation functionality to ensure
accuracy of the transformation for iron (Fe).
	- Updated existing tests to align with the new expected output format.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
Co-authored-by: Duo <50307526+iProzd@users.noreply.github.com>
---
 deepmd/dpmodel/utils/type_embed.py     |   5 +-
 deepmd/utils/econf_embd.py             | 288 ++++++++++++++-----------
 source/tests/common/test_econf_embd.py |  16 +-
 3 files changed, 186 insertions(+), 123 deletions(-)

diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py
index e11c415cfd..04c05b6a39 100644
--- a/deepmd/dpmodel/utils/type_embed.py
+++ b/deepmd/dpmodel/utils/type_embed.py
@@ -222,7 +222,9 @@ def change_type_map(
 def get_econf_tebd(type_map, precision: str = "default"):
     from deepmd.utils.econf_embd import (
         ECONF_DIM,
-        electronic_configuration_embedding,
+    )
+    from deepmd.utils.econf_embd import (
+        normalized_electronic_configuration_embedding as electronic_configuration_embedding,
     )
     from deepmd.utils.econf_embd import type_map as periodic_table
 
@@ -240,6 +242,5 @@ def get_econf_tebd(type_map, precision: str = "default"):
         [electronic_configuration_embedding[kk] for kk in type_map],
         dtype=PRECISION_DICT[precision],
     )
-    econf_tebd /= econf_tebd.sum(-1, keepdims=True)  # do normalization
     embed_input_dim = ECONF_DIM
     return econf_tebd, embed_input_dim
diff --git a/deepmd/utils/econf_embd.py b/deepmd/utils/econf_embd.py
index cdd9525f6b..7f12206ae3 100644
--- a/deepmd/utils/econf_embd.py
+++ b/deepmd/utils/econf_embd.py
@@ -1,135 +1,148 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    List,
+)
+
 import numpy as np
 from mendeleev import (
     element,
 )
 
+__all__ = [
+    "electronic_configuration_embedding",
+    "normalized_electronic_configuration_embedding",
+    "make_econf_embedding",
+    "transform_to_spin_rep",
+]
+
 ###
 # made by command
 # ret = make_econf_embedding(type_map, flatten=True)
+# ret = transform_to_spin_rep(ret)
 # print_econf_embedding(ret)
 ###
 # fmt: off
 electronic_configuration_embedding = \
 { kk: np.array(vv, dtype=np.int32) for kk,vv in {
-  "H"  : [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "He" : [2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Li" : [2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Be" : [2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "B"  : [2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "C"  : [2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "N"  : [2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "O"  : [2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "F"  : [2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ne" : [2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Na" : [2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Mg" : [2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Al" : [2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Si" : [2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "P"  : [2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "S"  : [2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Cl" : [2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ar" : [2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "K"  : [2,2,2,2,2,2,2,2,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ca" : [2,2,2,2,2,2,2,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Sc" : [2,2,2,2,2,2,2,2,2,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ti" : [2,2,2,2,2,2,2,2,2,1,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "V"  : [2,2,2,2,2,2,2,2,2,1,1,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Cr" : [2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Mn" : [2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Fe" : [2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Co" : [2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ni" : [2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Cu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Zn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ga" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ge" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "As" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Se" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Br" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Kr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Rb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Sr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Y"  : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Zr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Nb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Mo" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Tc" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ru" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Rh" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Pd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ag" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Cd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "In" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Sn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Sb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Te" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "I"  : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Xe" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Cs" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ba" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "La" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ce" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Pr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Nd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Pm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Sm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Eu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Gd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Tb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Dy" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ho" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Er" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Tm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Yb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Lu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Hf" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ta" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "W"  : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Re" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Os" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Ir" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Pt" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Au" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Hg" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0],
-  "Tl" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0],
-  "Pb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0],
-  "Bi" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,1,0,0,0,0,0,0,0,0,0],
-  "Po" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,0,0,0],
-  "At" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,1,0,0,0,0,0,0,0,0,0],
-  "Rn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0],
-  "Fr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,1,0,0,0],
-  "Ra" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,2,0,0,0],
-  "Ac" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,1,0,0,0,0,2,0,0,0],
-  "Th" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,1,1,0,0,0,2,0,0,0],
-  "Pa" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,2,2,2,2,1,0,0,0,0,2,0,0,0],
-  "U"  : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,2,2,2,2,1,0,0,0,0,2,0,0,0],
-  "Np" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,2,2,2,2,1,0,0,0,0,2,0,0,0],
-  "Pu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,2,2,2,2,0,0,0,0,0,2,0,0,0],
-  "Am" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,0,0,0,0,0,2,0,0,0],
-  "Cm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,1,0,0,0,0,2,0,0,0],
-  "Bk" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,2,2,2,0,0,0,0,0,2,0,0,0],
-  "Cf" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,2,2,2,0,0,0,0,0,2,0,0,0],
-  "Es" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,2,2,2,0,0,0,0,0,2,0,0,0],
-  "Fm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,2,2,2,0,0,0,0,0,2,0,0,0],
-  "Md" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,0,0,0,0,0,2,0,0,0],
-  "No" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,2,0,0,0],
-  "Lr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,2,0,0,0],
-  "Rf" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,2,0,0,0],
-  "Db" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,2,0,0,0],
-  "Sg" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,2,0,0,0],
-  "Bh" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,0,0,0],
-  "Hs" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,0,0,0],
-  "Mt" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,0,0,0],
-  "Ds" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0],
-  "Rg" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0],
-  "Cn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0],
-  "Nh" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0],
-  "Fl" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0],
-  "Mc" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1],
-  "Lv" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1],
-  "Ts" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1],
-  "Og" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2],
+  "H"  : [-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "He" : [ 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Li" : [ 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Be" : [ 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "B"  : [ 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "C"  : [ 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "N"  : [ 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "O"  : [ 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "F"  : [ 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ne" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Na" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Mg" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Al" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Si" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "P"  : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "S"  : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Cl" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ar" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "K"  : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ca" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Sc" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ti" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "V"  : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Cr" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Mn" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Fe" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Co" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ni" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Cu" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Zn" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ga" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ge" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "As" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Se" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Br" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Kr" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Rb" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Sr" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Y"  : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Zr" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Nb" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Mo" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Tc" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ru" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Rh" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Pd" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ag" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Cd" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "In" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Sn" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Sb" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Te" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "I"  : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Xe" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Cs" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ba" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "La" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ce" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Pr" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Nd" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Pm" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Sm" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Eu" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Gd" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Tb" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Dy" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ho" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Er" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Tm" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Yb" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Lu" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Hf" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ta" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "W"  : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Re" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Os" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Ir" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Pt" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Au" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Hg" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Tl" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Pb" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Bi" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Po" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "At" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Rn" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
+  "Fr" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1,-1,-1,-1,-1,-1,-1],
+  "Ra" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Ac" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Th" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Pa" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "U"  : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Np" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Pu" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Am" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Cm" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Bk" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Cf" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Es" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Fm" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Md" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "No" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Lr" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Rf" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Db" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1,-1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Sg" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,-1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Bh" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Hs" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1,-1, 1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Mt" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Ds" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1,-1,-1,-1,-1],
+  "Rg" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1,-1,-1],
+  "Cn" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1,-1],
+  "Nh" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1,-1,-1,-1],
+  "Fl" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1,-1],
+  "Mc" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1,-1, 1],
+  "Lv" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1,-1, 1],
+  "Ts" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1, 1],
+  "Og" : [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 }.items()}
 # fmt: on
 
@@ -172,6 +185,16 @@
 ECONF_DIM = electronic_configuration_embedding[type_map[0]].shape[0]
 
 
+def normalize_vec_length(res):
+    scale = 1.0 / np.sqrt(ECONF_DIM)
+    return {kk: scale * vv for kk, vv in res.items()}
+
+
+normalized_electronic_configuration_embedding = normalize_vec_length(
+    electronic_configuration_embedding
+)
+
+
 def make_empty_list_vec():
     ret = {}
     for kk in conf_keys:
@@ -204,7 +227,10 @@ def make_element_embedding_list_vec(
     return ret
 
 
-def make_econf_embedding(types, flatten=True):
+def make_econf_embedding(
+    types: List[str], flatten: bool = True
+) -> Dict[str, np.ndarray]:
+    """Make the electronic configuration embedding."""
     all_ret = {}
     for ii in types:
         ir = make_element_embedding_list_vec(ii)
@@ -214,7 +240,29 @@ def make_econf_embedding(types, flatten=True):
     return all_ret
 
 
-def print_econf_embedding(res):
+def transform_to_spin_rep(res: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
+    """Tranform electron occupation of 0/1/2 to -1,-1/-1,1/1,1."""
+    ret = {}
+
+    def transform(ii):
+        if ii == 0:
+            return [-1, -1]
+        elif ii == 1:
+            return [-1, 1]
+        elif ii == 2:
+            return [1, 1]
+        else:
+            raise ValueError(f"wrong input value {ii}")
+
+    for kk, vv in res.items():
+        transformed_list = [spin for ii in vv for spin in transform(ii)]
+        new_vv = np.array(transformed_list, dtype=np.int32)
+        ret[kk] = new_vv
+    return ret
+
+
+def print_econf_embedding(res: Dict[str, np.ndarray]):
+    """Print electron configuration embedding."""
     for kk, vv in res.items():
         vvstr = ",".join([str(ii) for ii in vv])
         space = " " * (2 - len(kk))
diff --git a/source/tests/common/test_econf_embd.py b/source/tests/common/test_econf_embd.py
index d24115c860..242ea9ca65 100644
--- a/source/tests/common/test_econf_embd.py
+++ b/source/tests/common/test_econf_embd.py
@@ -4,6 +4,8 @@
 from deepmd.utils.econf_embd import (
     electronic_configuration_embedding,
     make_econf_embedding,
+    normalized_electronic_configuration_embedding,
+    transform_to_spin_rep,
 )
 
 
@@ -40,9 +42,21 @@ def test_fe_flatten(self):
         # fmt: on
         self.assertEqual(list(res), expected_res)
 
+    def test_fe_spin(self):
+        res = make_econf_embedding(["Fe"], flatten=True)
+        res = transform_to_spin_rep(res)["Fe"]
+        # fmt: off
+        expected_res = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,-1,1,-1,1,-1,1,-1,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]
+        # fmt: on
+        self.assertEqual(list(res), expected_res)
+
     def test_dict(self):
         res = electronic_configuration_embedding["Fe"]
         # fmt: off
-        expected_res = [2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+        expected_res = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,-1,1,-1,1,-1,1,-1,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]
         # fmt: on
         self.assertEqual(list(res), expected_res)
+        res = normalized_electronic_configuration_embedding["Fe"]
+        self.assertEqual(
+            list(res), [ii / len(expected_res) ** 0.5 for ii in expected_res]
+        )

From ad8bebe8b8af6e14ce466fa60be61caa4fc7c9db Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 27 Sep 2024 21:56:18 +0000
Subject: [PATCH 10/39] [pre-commit.ci] pre-commit autoupdate (#4159)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.6.5 →
v0.6.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.5...v0.6.7)
- https://github.com/pylint-dev/pylint/: v3.2.7 → v3.3.0
<!--pre-commit.ci end-->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .pre-commit-config.yaml          | 4 ++--
 pyproject.toml                   | 2 +-
 source/checker/README.md         | 4 ++--
 source/checker/deepmd_checker.py | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d2fc1d0ab8..5d34f39752 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
         exclude: ^source/3rdparty
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.6.5
+    rev: v0.6.7
     hooks:
       - id: ruff
         args: ["--fix"]
@@ -146,7 +146,7 @@ repos:
         exclude: .pre-commit-config.yaml|source/lmp
   # customized pylint rules
   - repo: https://github.com/pylint-dev/pylint/
-    rev: v3.2.7
+    rev: v3.3.0
     hooks:
       - id: pylint
         entry: env PYTHONPATH=source/checker pylint
diff --git a/pyproject.toml b/pyproject.toml
index 28fe114e01..a1829016cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -414,4 +414,4 @@ plugins = ["source.3rdparty.coverage_plugins.jit_plugin"]
 [tool.pylint.'MESSAGES CONTROL']
 load-plugins = "deepmd_checker"
 disable = "all"
-enable = "EDP01,EDP02"
+enable = "E8001,E8002"
diff --git a/source/checker/README.md b/source/checker/README.md
index 2a905f93a5..368852034c 100644
--- a/source/checker/README.md
+++ b/source/checker/README.md
@@ -1,4 +1,4 @@
 # DeePMD-kit customized Pylint plugin
 
-- EDP01: Require explicit device when initializing a PyTorch tensor.
-- EDP02: Require explicit dtype when initializing a NumPy array, a TensorFlow tensor, or a PyTorch tensor.
+- E8001: Require explicit device when initializing a PyTorch tensor.
+- E8002: Require explicit dtype when initializing a NumPy array, a TensorFlow tensor, or a PyTorch tensor.
diff --git a/source/checker/deepmd_checker.py b/source/checker/deepmd_checker.py
index 052d011c47..d763835fdc 100644
--- a/source/checker/deepmd_checker.py
+++ b/source/checker/deepmd_checker.py
@@ -21,12 +21,12 @@
 class DPChecker(BaseChecker):
     name = "deepmd-checker"
     msgs: ClassVar[dict] = {
-        "EDP01": (
+        "E8001": (
             "No explicit device.",
             "no-explicit-device",
             "Require explicit device when initializing a PyTorch tensor.",
         ),
-        "EDP02": (
+        "E8002": (
             "No explicit dtype.",
             "no-explicit-dtype",
             "Require explicit dtype when initializing a NumPy array, a TensorFlow tensor, or a PyTorch tensor.",

From a9ffccbec1c014d0549fe89545906f436060bfc6 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 2 Oct 2024 06:06:30 -0400
Subject: [PATCH 11/39] fix: unpin h5py on aarch64 (#4176)

Fix #3864.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Enhanced logic for detecting TensorFlow installation paths and
requirements.
	- Improved error handling for TensorFlow version detection.

- **Bug Fixes**
- Adjusted compatibility specifications for the `h5py` dependency on
aarch64 architecture.

- **Documentation**
- Updated comments to clarify changes in TensorFlow detection and
compatibility issues.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 backend/find_tensorflow.py | 9 +++------
 pyproject.toml             | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index 514490a926..ea11aed1b6 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -156,18 +156,15 @@ def get_tf_requirement(tf_version: str = "") -> dict:
                 "tensorflow; platform_machine=='aarch64' or (platform_machine=='arm64' and platform_system == 'Darwin')",
                 # https://github.com/tensorflow/tensorflow/issues/61830
                 "tensorflow-cpu!=2.15.*; platform_system=='Windows'",
-                # TODO: build(wheel): unpin h5py on aarch64
-                # Revert after https://github.com/h5py/h5py/issues/2408 is fixed;
-                # or set UV_PREFER_BINARY when https://github.com/astral-sh/uv/issues/1794 is resolved.
-                # 3.6.0 is the first version to have aarch64 wheels.
-                "h5py>=3.6.0,<3.11.0; platform_system=='Linux' and platform_machine=='aarch64'",
+                # https://github.com/h5py/h5py/issues/2408
+                "h5py>=3.6.0,!=3.11.0; platform_system=='Linux' and platform_machine=='aarch64'",
                 *extra_requires,
             ],
             "gpu": [
                 "tensorflow",
                 "tensorflow-metal; platform_machine=='arm64' and platform_system == 'Darwin'",
                 # See above.
-                "h5py>=3.6.0,<3.11.0; platform_system=='Linux' and platform_machine=='aarch64'",
+                "h5py>=3.6.0,!=3.11.0; platform_system=='Linux' and platform_machine=='aarch64'",
                 *extra_requires,
             ],
             **extra_select,
diff --git a/pyproject.toml b/pyproject.toml
index a1829016cb..47cf8e018b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,7 +44,7 @@ dependencies = [
     'typing_extensions; python_version < "3.8"',
     'importlib_metadata>=1.4; python_version < "3.8"',
     'h5py',
-    "h5py>=3.6.0,<3.11.0; platform_system=='Linux' and platform_machine=='aarch64'",
+    "h5py>=3.6.0,!=3.11.0; platform_system=='Linux' and platform_machine=='aarch64'",
     'wcmatch',
     'packaging',
     'ml_dtypes',

From 7ce5b032503d92abd104627f9d6b9b54967444cf Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 2 Oct 2024 07:13:01 -0400
Subject: [PATCH 12/39] chore: bump LAMMPS to stable_29Aug2024_update1 (#4179)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

## Release Notes

- **Documentation**
- Updated installation instructions to reference the new LAMMPS version
`stable_29Aug2024_update1`.

- **New Features**
- Enhanced build scripts to support the updated LAMMPS version across
various installation methods.

- **Chores**
- Adjusted dependency management settings to reflect the new LAMMPS
version in configuration files.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 doc/install/install-lammps.md   | 22 +++++++++++-----------
 pyproject.toml                  |  6 +++---
 source/install/build_cc.sh      |  2 +-
 source/install/build_from_c.sh  |  2 +-
 source/install/build_lammps.sh  |  2 +-
 source/install/test_cc.sh       |  2 +-
 source/install/test_cc_local.sh |  2 +-
 7 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/doc/install/install-lammps.md b/doc/install/install-lammps.md
index b43f9998a3..00b887e9c3 100644
--- a/doc/install/install-lammps.md
+++ b/doc/install/install-lammps.md
@@ -17,11 +17,11 @@ DeePMD-kit will generate a module called `USER-DEEPMD` in the `build` directory,
 
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_29Aug2024.tar.gz
-tar xf stable_29Aug2024.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_29Aug2024_update1.tar.gz
+tar xf stable_29Aug2024_update1.tar.gz
 ```
 
-The source code of LAMMPS is stored in the directory `lammps-stable_29Aug2024`.
+The source code of LAMMPS is stored in the directory `lammps-stable_29Aug2024_update1`.
 
 Then, you can [build LAMMPS](https://docs.lammps.org/Build.html) with either make or CMake.
 
@@ -30,7 +30,7 @@ Then, you can [build LAMMPS](https://docs.lammps.org/Build.html) with either mak
 Now go into the LAMMPS code and copy the DeePMD-kit module like this
 
 ```bash
-cd lammps-stable_29Aug2024/src/
+cd lammps-stable_29Aug2024_update1/src/
 cp -r $deepmd_source_dir/source/build/USER-DEEPMD .
 make yes-kspace
 make yes-extra-fix
@@ -60,8 +60,8 @@ make no-user-deepmd
 Now go into the LAMMPS directory and create a directory called `build`:
 
 ```bash
-mkdir -p lammps-stable_29Aug2024/build/
-cd lammps-stable_29Aug2024/build/
+mkdir -p lammps-stable_29Aug2024_update1/build/
+cd lammps-stable_29Aug2024_update1/build/
 ```
 
 Patch the LAMMPS `CMakeLists.txt` file:
@@ -94,15 +94,15 @@ Now download the LAMMPS code (`8Apr2021` or later), and uncompress it:
 
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_29Aug2024.tar.gz
-tar xf stable_29Aug2024.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_29Aug2024_update1.tar.gz
+tar xf stable_29Aug2024_update1.tar.gz
 ```
 
-The source code of LAMMPS is stored in the directory `lammps-stable_29Aug2024`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build`
+The source code of LAMMPS is stored in the directory `lammps-stable_29Aug2024_update1`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build`
 
 ```bash
-mkdir -p lammps-stable_29Aug2024/build/
-cd lammps-stable_29Aug2024/build/
+mkdir -p lammps-stable_29Aug2024_update1/build/
+cd lammps-stable_29Aug2024_update1/build/
 ```
 
 Now build LAMMPS. Note that `PLUGIN` must be enabled, and `BUILD_SHARED_LIBS` must be set to `yes`. You can install any other package you want.
diff --git a/pyproject.toml b/pyproject.toml
index 47cf8e018b..1b825ef441 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -104,7 +104,7 @@ docs = [
     "sphinxcontrib-moderncmakedomain",
 ]
 lmp = [
-    "lammps~=2024.8.29.0.0",
+    "lammps~=2024.8.29.1.0",
 ]
 ipi = [
     "ipi",
@@ -225,7 +225,7 @@ repair-wheel-command = """delocate-wheel --require-archs {delocate_archs} -w {de
 
 [tool.cibuildwheel.macos.environment]
 PIP_PREFER_BINARY = "1"
-DP_LAMMPS_VERSION = "stable_29Aug2024"
+DP_LAMMPS_VERSION = "stable_29Aug2024_update1"
 DP_ENABLE_IPI = "1"
 DP_ENABLE_PYTORCH = "1"
 # for unclear reason, when enabling PyTorch, OpenMP is found accidentally
@@ -261,7 +261,7 @@ before-build = [
 ]
 [tool.cibuildwheel.linux.environment]
 PIP_PREFER_BINARY = "1"
-DP_LAMMPS_VERSION = "stable_29Aug2024"
+DP_LAMMPS_VERSION = "stable_29Aug2024_update1"
 DP_ENABLE_IPI = "1"
 DP_ENABLE_PYTORCH = "1"
 MPI_HOME = "/usr/lib64/mpich"
diff --git a/source/install/build_cc.sh b/source/install/build_cc.sh
index 60101eb9a8..17b5ed0de4 100755
--- a/source/install/build_cc.sh
+++ b/source/install/build_cc.sh
@@ -25,7 +25,7 @@ cmake -D ENABLE_TENSORFLOW=ON \
 	-D CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
 	-D USE_TF_PYTHON_LIBS=TRUE \
 	${CUDA_ARGS} \
-	-D LAMMPS_VERSION=stable_29Aug2024 \
+	-D LAMMPS_VERSION=stable_29Aug2024_update1 \
 	..
 cmake --build . -j${NPROC}
 cmake --install .
diff --git a/source/install/build_from_c.sh b/source/install/build_from_c.sh
index ff9268f649..22739ec531 100755
--- a/source/install/build_from_c.sh
+++ b/source/install/build_from_c.sh
@@ -13,7 +13,7 @@ NPROC=$(nproc --all)
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DDEEPMD_C_ROOT=${DEEPMD_C_ROOT} -DLAMMPS_VERSION=stable_29Aug2024 ..
+cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DDEEPMD_C_ROOT=${DEEPMD_C_ROOT} -DLAMMPS_VERSION=stable_29Aug2024_update1 ..
 cmake --build . -j${NPROC}
 cmake --install .
 cmake --build . --target=lammps
diff --git a/source/install/build_lammps.sh b/source/install/build_lammps.sh
index a1e62691ca..add1194151 100755
--- a/source/install/build_lammps.sh
+++ b/source/install/build_lammps.sh
@@ -14,7 +14,7 @@ BUILD_TMP_DIR=${SCRIPT_PATH}/../build_lammps
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
 # download LAMMMPS
-LAMMPS_VERSION=stable_29Aug2024
+LAMMPS_VERSION=stable_29Aug2024_update1
 if [ ! -d "lammps-${LAMMPS_VERSION}" ]; then
 	curl -L -o lammps.tar.gz https://github.com/lammps/lammps/archive/refs/tags/${LAMMPS_VERSION}.tar.gz
 	tar vxzf lammps.tar.gz
diff --git a/source/install/test_cc.sh b/source/install/test_cc.sh
index ccdaf124cd..1626f36193 100755
--- a/source/install/test_cc.sh
+++ b/source/install/test_cc.sh
@@ -17,7 +17,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_29Aug2024 ${CUDA_ARGS} ..
+cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_29Aug2024_update1 ${CUDA_ARGS} ..
 cmake --build . -j${NPROC}
 cmake --install .
 ctest --output-on-failure
diff --git a/source/install/test_cc_local.sh b/source/install/test_cc_local.sh
index fdb2396a28..8ce4de4b21 100755
--- a/source/install/test_cc_local.sh
+++ b/source/install/test_cc_local.sh
@@ -25,7 +25,7 @@ cmake \
 	-D USE_TF_PYTHON_LIBS=TRUE \
 	-D CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
 	-D BUILD_TESTING:BOOL=TRUE \
-	-D LAMMPS_VERSION=stable_29Aug2024 \
+	-D LAMMPS_VERSION=stable_29Aug2024_update1 \
 	${CUDA_ARGS} ..
 cmake --build . -j${NPROC}
 cmake --install .

From 192a97a47af249d40ad75206d5de28a9048d492b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 6 Oct 2024 13:03:28 -0400
Subject: [PATCH 13/39] breaking: drop Python 3.8 support (#4185)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced new functionality for handling input data conversion
between different versions.
- Added `EnvMatStatSe` class for enhanced environmental matrix
statistics calculations.
- Implemented a mechanism to track the status of atoms (real vs.
virtual) in `BaseAtomicModel`.

- **Bug Fixes**
- Updated Python version requirements across documentation and
configuration files to Python 3.9 or above.

- **Documentation**
- Updated installation guides to reflect the new Python version
requirement and clarified virtual environment setup instructions.

- **Chores**
- Refined dependency management in `pyproject.toml` to support newer
Python versions and improve version control.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .github/workflows/test_python.yml             |   6 +-
 backend/dp_backend.py                         |  10 +-
 backend/dynamic_metadata.py                   |   6 +-
 backend/find_pytorch.py                       |   4 +-
 backend/find_tensorflow.py                    |   4 +-
 backend/read_env.py                           |   5 +-
 deepmd/backend/backend.py                     |  17 +--
 deepmd/backend/dpmodel.py                     |   8 +-
 deepmd/backend/jax.py                         |   8 +-
 deepmd/backend/pytorch.py                     |   8 +-
 deepmd/backend/suffix.py                      |   3 +-
 deepmd/backend/tensorflow.py                  |   8 +-
 deepmd/calculator.py                          |  16 +--
 deepmd/common.py                              |  21 ++-
 .../dpmodel/atomic_model/base_atomic_model.py |  35 +++--
 .../dpmodel/atomic_model/dp_atomic_model.py   |  12 +-
 .../atomic_model/linear_atomic_model.py       |  43 +++---
 .../atomic_model/make_base_atomic_model.py    |  16 +--
 .../atomic_model/pairtab_atomic_model.py      |  18 ++-
 deepmd/dpmodel/descriptor/descriptor.py       |  16 +--
 deepmd/dpmodel/descriptor/dpa1.py             |  50 ++++---
 deepmd/dpmodel/descriptor/dpa2.py             |  34 +++--
 deepmd/dpmodel/descriptor/hybrid.py           |  31 ++--
 .../descriptor/make_base_descriptor.py        |  14 +-
 deepmd/dpmodel/descriptor/repformers.py       |  42 +++---
 deepmd/dpmodel/descriptor/se_atten_v2.py      |  12 +-
 deepmd/dpmodel/descriptor/se_e2_a.py          |  30 ++--
 deepmd/dpmodel/descriptor/se_r.py             |  28 ++--
 deepmd/dpmodel/descriptor/se_t.py             |  30 ++--
 deepmd/dpmodel/descriptor/se_t_tebd.py        |  42 +++---
 deepmd/dpmodel/fitting/dipole_fitting.py      |  18 ++-
 deepmd/dpmodel/fitting/dos_fitting.py         |  11 +-
 deepmd/dpmodel/fitting/ener_fitting.py        |  15 +-
 deepmd/dpmodel/fitting/general_fitting.py     |  34 +++--
 deepmd/dpmodel/fitting/invar_fitting.py       |  22 ++-
 deepmd/dpmodel/fitting/make_base_fitting.py   |   8 +-
 .../dpmodel/fitting/polarizability_fitting.py |  24 ++--
 deepmd/dpmodel/fitting/property_fitting.py    |  13 +-
 deepmd/dpmodel/infer/deep_eval.py             |  18 +--
 deepmd/dpmodel/model/base_model.py            |  15 +-
 deepmd/dpmodel/model/dp_model.py              |   6 +-
 deepmd/dpmodel/model/make_model.py            |  28 ++--
 deepmd/dpmodel/model/spin_model.py            |  12 +-
 deepmd/dpmodel/model/transform_output.py      |  11 +-
 deepmd/dpmodel/output_def.py                  |  43 +++---
 deepmd/dpmodel/utils/exclude_mask.py          |   8 +-
 deepmd/dpmodel/utils/neighbor_stat.py         |   9 +-
 deepmd/dpmodel/utils/network.py               |  20 ++-
 deepmd/dpmodel/utils/nlist.py                 |  22 ++-
 deepmd/dpmodel/utils/seed.py                  |   7 +-
 deepmd/dpmodel/utils/type_embed.py            |  11 +-
 deepmd/dpmodel/utils/update_sel.py            |   5 +-
 deepmd/entrypoints/main.py                    |   2 +-
 deepmd/entrypoints/neighbor_stat.py           |   3 +-
 deepmd/entrypoints/show.py                    |   5 +-
 deepmd/entrypoints/test.py                    |  33 ++---
 deepmd/env.py                                 |  12 +-
 deepmd/infer/deep_dos.py                      |  10 +-
 deepmd/infer/deep_eval.py                     |  18 +--
 deepmd/infer/deep_polar.py                    |   3 +-
 deepmd/infer/deep_pot.py                      |  22 ++-
 deepmd/infer/deep_property.py                 |  13 +-
 deepmd/infer/deep_tensor.py                   |   8 +-
 deepmd/infer/model_devi.py                    |  11 +-
 deepmd/loggers/training.py                    |   3 +-
 deepmd/main.py                                |  17 +--
 deepmd/pt/entrypoints/main.py                 |   3 +-
 deepmd/pt/infer/deep_eval.py                  |  20 ++-
 deepmd/pt/loss/dos.py                         |   5 +-
 deepmd/pt/loss/ener.py                        |   3 +-
 deepmd/pt/loss/ener_spin.py                   |   5 +-
 deepmd/pt/loss/loss.py                        |   5 +-
 deepmd/pt/loss/property.py                    |   5 +-
 deepmd/pt/loss/tensor.py                      |   5 +-
 .../model/atomic_model/base_atomic_model.py   |  71 +++++-----
 .../model/atomic_model/dipole_atomic_model.py |   5 +-
 .../pt/model/atomic_model/dp_atomic_model.py  |  14 +-
 .../model/atomic_model/linear_atomic_model.py |  57 ++++----
 .../atomic_model/pairtab_atomic_model.py      |  30 ++--
 .../model/atomic_model/polar_atomic_model.py  |   5 +-
 .../atomic_model/property_atomic_model.py     |   5 +-
 deepmd/pt/model/descriptor/descriptor.py      |  16 +--
 deepmd/pt/model/descriptor/dpa1.py            |  37 +++--
 deepmd/pt/model/descriptor/dpa2.py            |  39 +++--
 deepmd/pt/model/descriptor/gaussian_lcc.py    |   3 +-
 deepmd/pt/model/descriptor/hybrid.py          |  37 +++--
 deepmd/pt/model/descriptor/repformer_layer.py |  29 ++--
 .../descriptor/repformer_layer_old_impl.py    |  15 +-
 deepmd/pt/model/descriptor/repformers.py      |  25 ++--
 deepmd/pt/model/descriptor/se_a.py            |  51 ++++---
 deepmd/pt/model/descriptor/se_atten.py        |  31 ++--
 deepmd/pt/model/descriptor/se_atten_v2.py     |  14 +-
 deepmd/pt/model/descriptor/se_r.py            |  35 +++--
 deepmd/pt/model/descriptor/se_t.py            |  65 ++++-----
 deepmd/pt/model/descriptor/se_t_tebd.py       |  59 ++++----
 deepmd/pt/model/model/dipole_model.py         |   3 +-
 deepmd/pt/model/model/dos_model.py            |   3 +-
 deepmd/pt/model/model/dp_model.py             |   6 +-
 deepmd/pt/model/model/dp_zbl_model.py         |   9 +-
 deepmd/pt/model/model/ener_model.py           |   5 +-
 deepmd/pt/model/model/frozen.py               |  15 +-
 deepmd/pt/model/model/make_hessian_model.py   |  12 +-
 deepmd/pt/model/model/make_model.py           |  40 +++---
 deepmd/pt/model/model/polar_model.py          |   3 +-
 deepmd/pt/model/model/property_model.py       |   5 +-
 deepmd/pt/model/model/spin_model.py           |  12 +-
 deepmd/pt/model/model/transform_output.py     |  14 +-
 deepmd/pt/model/network/layernorm.py          |   3 +-
 deepmd/pt/model/network/mlp.py                |   6 +-
 deepmd/pt/model/network/network.py            |  15 +-
 deepmd/pt/model/task/dipole.py                |  23 ++-
 deepmd/pt/model/task/dos.py                   |  13 +-
 deepmd/pt/model/task/ener.py                  |  16 +--
 deepmd/pt/model/task/fitting.py               |  37 +++--
 deepmd/pt/model/task/invar_fitting.py         |  21 ++-
 deepmd/pt/model/task/polarizability.py        |  21 ++-
 deepmd/pt/model/task/property.py              |   7 +-
 deepmd/pt/train/training.py                   |   3 +-
 deepmd/pt/train/wrapper.py                    |   9 +-
 deepmd/pt/utils/dataloader.py                 |  11 +-
 deepmd/pt/utils/dataset.py                    |   5 +-
 deepmd/pt/utils/env_mat_stat.py               |  19 ++-
 deepmd/pt/utils/exclude_mask.py               |  15 +-
 deepmd/pt/utils/neighbor_stat.py              |   9 +-
 deepmd/pt/utils/nlist.py                      |  28 ++--
 deepmd/pt/utils/stat.py                       |  38 +++--
 deepmd/pt/utils/update_sel.py                 |   5 +-
 deepmd/pt/utils/utils.py                      |   7 +-
 deepmd/tf/cluster/__init__.py                 |   6 +-
 deepmd/tf/cluster/local.py                    |   8 +-
 deepmd/tf/descriptor/descriptor.py            |  40 +++---
 deepmd/tf/descriptor/hybrid.py                |  21 ++-
 deepmd/tf/descriptor/loc_frame.py             |  16 +--
 deepmd/tf/descriptor/se.py                    |  19 ++-
 deepmd/tf/descriptor/se_a.py                  |  18 ++-
 deepmd/tf/descriptor/se_a_ebd.py              |  11 +-
 deepmd/tf/descriptor/se_a_ebd_v2.py           |   7 +-
 deepmd/tf/descriptor/se_a_ef.py               |  22 ++-
 deepmd/tf/descriptor/se_a_mask.py             |  19 ++-
 deepmd/tf/descriptor/se_atten.py              |  43 +++---
 deepmd/tf/descriptor/se_atten_v2.py           |   7 +-
 deepmd/tf/descriptor/se_r.py                  |  16 +--
 deepmd/tf/descriptor/se_t.py                  |  25 ++--
 deepmd/tf/entrypoints/freeze.py               |   7 +-
 deepmd/tf/entrypoints/ipi.py                  |   5 +-
 deepmd/tf/entrypoints/main.py                 |   5 +-
 deepmd/tf/entrypoints/train.py                |   5 +-
 deepmd/tf/entrypoints/transfer.py             |   9 +-
 deepmd/tf/fit/dipole.py                       |  13 +-
 deepmd/tf/fit/dos.py                          |  13 +-
 deepmd/tf/fit/ener.py                         |  19 ++-
 deepmd/tf/fit/fitting.py                      |   7 +-
 deepmd/tf/fit/polar.py                        |  39 +++--
 deepmd/tf/infer/data_modifier.py              |  10 +-
 deepmd/tf/infer/deep_eval.py                  |  36 +++--
 deepmd/tf/infer/deep_tensor.py                |  15 +-
 deepmd/tf/infer/ewald_recp.py                 |   5 +-
 deepmd/tf/lmp.py                              |   5 +-
 deepmd/tf/loss/dos.py                         |   5 +-
 deepmd/tf/loss/ener.py                        |   7 +-
 deepmd/tf/loss/loss.py                        |  15 +-
 deepmd/tf/loss/tensor.py                      |   5 +-
 deepmd/tf/model/dos.py                        |   3 +-
 deepmd/tf/model/ener.py                       |   3 +-
 deepmd/tf/model/frozen.py                     |   8 +-
 deepmd/tf/model/linear.py                     |  10 +-
 deepmd/tf/model/model.py                      |  25 ++--
 deepmd/tf/model/pairtab.py                    |  10 +-
 deepmd/tf/model/pairwise_dprc.py              |  13 +-
 deepmd/tf/model/tensor.py                     |   3 +-
 deepmd/tf/nvnmd/utils/fio.py                  |   5 +-
 deepmd/tf/train/run_options.py                |   9 +-
 deepmd/tf/train/trainer.py                    |  18 +--
 deepmd/tf/utils/finetune.py                   |   5 +-
 deepmd/tf/utils/graph.py                      |  44 +++---
 deepmd/tf/utils/neighbor_stat.py              |  11 +-
 deepmd/tf/utils/parallel_op.py                |  14 +-
 deepmd/tf/utils/spin.py                       |  13 +-
 deepmd/tf/utils/tabulate.py                   |  11 +-
 deepmd/tf/utils/type_embed.py                 |   7 +-
 deepmd/tf/utils/update_sel.py                 |   5 +-
 deepmd/utils/argcheck.py                      | 134 +++++++++---------
 deepmd/utils/batch_size.py                    |   7 +-
 deepmd/utils/compat.py                        |  83 +++++------
 deepmd/utils/data.py                          |  11 +-
 deepmd/utils/data_system.py                   |  38 +++--
 deepmd/utils/econf_embd.py                    |  12 +-
 deepmd/utils/env_mat_stat.py                  |  28 ++--
 deepmd/utils/finetune.py                      |  38 +++--
 deepmd/utils/hostlist.py                      |   8 +-
 deepmd/utils/neighbor_stat.py                 |   7 +-
 deepmd/utils/out_stat.py                      |   5 +-
 deepmd/utils/pair_tab.py                      |   3 +-
 deepmd/utils/path.py                          |  32 ++---
 deepmd/utils/plugin.py                        |  10 +-
 deepmd/utils/random.py                        |   3 +-
 deepmd/utils/spin.py                          |  24 ++--
 deepmd/utils/update_sel.py                    |  19 ++-
 deepmd/utils/weight_avg.py                    |   9 +-
 doc/development/coding-conventions.rst        |   2 +-
 doc/development/create-a-model-pt.md          |   4 +-
 doc/development/create-a-model-tf.md          |   2 +-
 doc/getting-started/quick_start.ipynb         |   2 +-
 doc/install/easy-install.md                   |   2 +-
 doc/install/install-from-source.md            |   4 +-
 pyproject.toml                                |   7 +-
 source/install/build_tf.py                    |  40 +++---
 .../common/dpmodel/array_api/test_env_mat.py  |   6 +-
 .../tests/common/dpmodel/test_output_def.py   |   5 +-
 source/tests/common/test_argument_parser.py   |  11 +-
 source/tests/common/test_auto_batch_size.py   |   8 +-
 source/tests/consistent/common.py             |  15 +-
 .../tests/consistent/descriptor/test_dpa1.py  |   5 +-
 .../tests/consistent/descriptor/test_dpa2.py  |   5 +-
 .../consistent/descriptor/test_hybrid.py      |   5 +-
 .../consistent/descriptor/test_se_atten_v2.py |   5 +-
 .../consistent/descriptor/test_se_e2_a.py     |   5 +-
 .../tests/consistent/descriptor/test_se_r.py  |   5 +-
 .../tests/consistent/descriptor/test_se_t.py  |   5 +-
 .../consistent/descriptor/test_se_t_tebd.py   |   5 +-
 .../tests/consistent/fitting/test_dipole.py   |   5 +-
 source/tests/consistent/fitting/test_dos.py   |   5 +-
 source/tests/consistent/fitting/test_ener.py  |   5 +-
 source/tests/consistent/fitting/test_polar.py |   5 +-
 .../tests/consistent/fitting/test_property.py |   5 +-
 source/tests/consistent/model/test_ener.py    |   5 +-
 source/tests/consistent/model/test_frozen.py  |   5 +-
 .../tests/consistent/test_type_embedding.py   |   5 +-
 source/tests/infer/case.py                    |   5 +-
 source/tests/pt/common.py                     |   3 +-
 .../pt/model/test_atomic_model_atomic_stat.py |   5 +-
 .../pt/model/test_atomic_model_global_stat.py |   5 +-
 source/tests/pt/model/test_force_grad.py      |   3 +-
 .../pt/model/test_linear_atomic_model_stat.py |   9 +-
 source/tests/pt/model/test_rotation.py        |   3 +-
 .../common/cases/atomic_model/utils.py        |  18 ++-
 .../universal/common/cases/model/utils.py     |  18 ++-
 237 files changed, 1612 insertions(+), 2065 deletions(-)

diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
index 8274921909..87d7266e03 100644
--- a/.github/workflows/test_python.yml
+++ b/.github/workflows/test_python.yml
@@ -16,7 +16,7 @@ jobs:
       fail-fast: false
       matrix:
         group: [1, 2, 3, 4, 5, 6]
-        python: ["3.8", "3.12"]
+        python: ["3.9", "3.12"]
 
     steps:
     - uses: actions/checkout@v4
@@ -34,7 +34,7 @@ jobs:
         # existing TensorFlow package. Currently, it uses
         # TensorFlow in the build dependency, but if it
         # changes, setting `TENSORFLOW_ROOT`.
-        TENSORFLOW_VERSION: ${{ matrix.python == '3.8' && '2.13.1' || '2.16.1' }}
+        TENSORFLOW_VERSION: 2.16.1
         DP_ENABLE_PYTORCH: 1
         DP_BUILD_TESTING: 1
         UV_EXTRA_INDEX_URL: "https://pypi.anaconda.org/njzjz/simple https://pypi.anaconda.org/mpi4py/simple"
@@ -69,7 +69,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.8", "3.12"]
+        python: ["3.9", "3.12"]
     needs: testpython
     steps:
     - name: Get durations from cache
diff --git a/backend/dp_backend.py b/backend/dp_backend.py
index dbd2d2a52b..81c3f20f19 100644
--- a/backend/dp_backend.py
+++ b/backend/dp_backend.py
@@ -1,10 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """A PEP-517 backend to find TensorFlow."""
 
-from typing import (
-    List,
-)
-
 from scikit_build_core import build as _orig
 
 from .find_pytorch import (
@@ -26,7 +22,7 @@
 ]
 
 
-def __dir__() -> List[str]:
+def __dir__() -> list[str]:
     return __all__
 
 
@@ -42,7 +38,7 @@ def __dir__() -> List[str]:
 
 def get_requires_for_build_wheel(
     config_settings: dict,
-) -> List[str]:
+) -> list[str]:
     return (
         _orig.get_requires_for_build_wheel(config_settings)
         + find_tensorflow()[1]
@@ -52,7 +48,7 @@ def get_requires_for_build_wheel(
 
 def get_requires_for_build_editable(
     config_settings: dict,
-) -> List[str]:
+) -> list[str]:
     return (
         _orig.get_requires_for_build_editable(config_settings)
         + find_tensorflow()[1]
diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py
index 83123e6e41..a66e9a2759 100644
--- a/backend/dynamic_metadata.py
+++ b/backend/dynamic_metadata.py
@@ -4,8 +4,6 @@
     Path,
 )
 from typing import (
-    Dict,
-    List,
     Optional,
 )
 
@@ -27,13 +25,13 @@
 __all__ = ["dynamic_metadata"]
 
 
-def __dir__() -> List[str]:
+def __dir__() -> list[str]:
     return __all__
 
 
 def dynamic_metadata(
     field: str,
-    settings: Optional[Dict[str, object]] = None,
+    settings: Optional[dict[str, object]] = None,
 ):
     assert field in ["optional-dependencies", "entry-points", "scripts"]
     _, _, find_libpython_requires, extra_scripts, tf_version, pt_version = (
diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py
index 04f297a963..e01f4e84fe 100644
--- a/backend/find_pytorch.py
+++ b/backend/find_pytorch.py
@@ -18,9 +18,7 @@
     get_path,
 )
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -30,7 +28,7 @@
 
 
 @lru_cache
-def find_pytorch() -> Tuple[Optional[str], List[str]]:
+def find_pytorch() -> tuple[Optional[str], list[str]]:
     """Find PyTorch library.
 
     Tries to find PyTorch in the order of:
diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index ea11aed1b6..5b0de0b2dd 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -17,9 +17,7 @@
     get_path,
 )
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -29,7 +27,7 @@
 
 
 @lru_cache
-def find_tensorflow() -> Tuple[Optional[str], List[str]]:
+def find_tensorflow() -> tuple[Optional[str], list[str]]:
     """Find TensorFlow library.
 
     Tries to find TensorFlow in the order of:
diff --git a/backend/read_env.py b/backend/read_env.py
index ae82778f4e..edc3600115 100644
--- a/backend/read_env.py
+++ b/backend/read_env.py
@@ -5,9 +5,6 @@
 from functools import (
     lru_cache,
 )
-from typing import (
-    Tuple,
-)
 
 from packaging.version import (
     Version,
@@ -24,7 +21,7 @@
 
 
 @lru_cache
-def get_argument_from_env() -> Tuple[str, list, list, dict, str, str]:
+def get_argument_from_env() -> tuple[str, list, list, dict, str, str]:
     """Get the arguments from environment variables.
 
     The environment variables are assumed to be not changed during the build.
diff --git a/deepmd/backend/backend.py b/deepmd/backend/backend.py
index 8f7bca319e..3263169f6f 100644
--- a/deepmd/backend/backend.py
+++ b/deepmd/backend/backend.py
@@ -10,9 +10,6 @@
     TYPE_CHECKING,
     Callable,
     ClassVar,
-    Dict,
-    List,
-    Type,
 )
 
 from deepmd.utils.plugin import (
@@ -45,7 +42,7 @@ class Backend(PluginVariant, make_plugin_registry("backend")):
     """
 
     @staticmethod
-    def get_backend(key: str) -> Type["Backend"]:
+    def get_backend(key: str) -> type["Backend"]:
         """Get the backend by key.
 
         Parameters
@@ -61,7 +58,7 @@ def get_backend(key: str) -> Type["Backend"]:
         return Backend.get_class_by_type(key)
 
     @staticmethod
-    def get_backends() -> Dict[str, Type["Backend"]]:
+    def get_backends() -> dict[str, type["Backend"]]:
         """Get all the registered backend names.
 
         Returns
@@ -74,7 +71,7 @@ def get_backends() -> Dict[str, Type["Backend"]]:
     @staticmethod
     def get_backends_by_feature(
         feature: "Backend.Feature",
-    ) -> Dict[str, Type["Backend"]]:
+    ) -> dict[str, type["Backend"]]:
         """Get all the registered backend names with a specific feature.
 
         Parameters
@@ -94,7 +91,7 @@ def get_backends_by_feature(
         }
 
     @staticmethod
-    def detect_backend_by_model(filename: str) -> Type["Backend"]:
+    def detect_backend_by_model(filename: str) -> type["Backend"]:
         """Detect the backend of the given model file.
 
         Parameters
@@ -128,7 +125,7 @@ class Feature(Flag):
 
     features: ClassVar[Feature] = Feature(0)
     """The features of the backend."""
-    suffixes: ClassVar[List[str]] = []
+    suffixes: ClassVar[list[str]] = []
     """The supported suffixes of the saved model.
 
     The first element is considered as the default suffix."""
@@ -157,7 +154,7 @@ def entry_point_hook(self) -> Callable[["Namespace"], None]:
 
     @property
     @abstractmethod
-    def deep_eval(self) -> Type["DeepEvalBackend"]:
+    def deep_eval(self) -> type["DeepEvalBackend"]:
         """The Deep Eval backend of the backend.
 
         Returns
@@ -169,7 +166,7 @@ def deep_eval(self) -> Type["DeepEvalBackend"]:
 
     @property
     @abstractmethod
-    def neighbor_stat(self) -> Type["NeighborStat"]:
+    def neighbor_stat(self) -> type["NeighborStat"]:
         """The neighbor statistics of the backend.
 
         Returns
diff --git a/deepmd/backend/dpmodel.py b/deepmd/backend/dpmodel.py
index c51d097d5a..7c21b256ae 100644
--- a/deepmd/backend/dpmodel.py
+++ b/deepmd/backend/dpmodel.py
@@ -3,8 +3,6 @@
     TYPE_CHECKING,
     Callable,
     ClassVar,
-    List,
-    Type,
 )
 
 from deepmd.backend.backend import (
@@ -37,7 +35,7 @@ class DPModelBackend(Backend):
         Backend.Feature.DEEP_EVAL | Backend.Feature.NEIGHBOR_STAT | Backend.Feature.IO
     )
     """The features of the backend."""
-    suffixes: ClassVar[List[str]] = [".dp", ".yaml", ".yml"]
+    suffixes: ClassVar[list[str]] = [".dp", ".yaml", ".yml"]
     """The suffixes of the backend."""
 
     def is_available(self) -> bool:
@@ -62,7 +60,7 @@ def entry_point_hook(self) -> Callable[["Namespace"], None]:
         raise NotImplementedError(f"Unsupported backend: {self.name}")
 
     @property
-    def deep_eval(self) -> Type["DeepEvalBackend"]:
+    def deep_eval(self) -> type["DeepEvalBackend"]:
         """The Deep Eval backend of the backend.
 
         Returns
@@ -77,7 +75,7 @@ def deep_eval(self) -> Type["DeepEvalBackend"]:
         return DeepEval
 
     @property
-    def neighbor_stat(self) -> Type["NeighborStat"]:
+    def neighbor_stat(self) -> type["NeighborStat"]:
         """The neighbor statistics of the backend.
 
         Returns
diff --git a/deepmd/backend/jax.py b/deepmd/backend/jax.py
index ece0761772..db92d6bed1 100644
--- a/deepmd/backend/jax.py
+++ b/deepmd/backend/jax.py
@@ -6,8 +6,6 @@
     TYPE_CHECKING,
     Callable,
     ClassVar,
-    List,
-    Type,
 )
 
 from deepmd.backend.backend import (
@@ -41,7 +39,7 @@ class JAXBackend(Backend):
         # | Backend.Feature.IO
     )
     """The features of the backend."""
-    suffixes: ClassVar[List[str]] = []
+    suffixes: ClassVar[list[str]] = []
     """The suffixes of the backend."""
 
     def is_available(self) -> bool:
@@ -66,7 +64,7 @@ def entry_point_hook(self) -> Callable[["Namespace"], None]:
         raise NotImplementedError
 
     @property
-    def deep_eval(self) -> Type["DeepEvalBackend"]:
+    def deep_eval(self) -> type["DeepEvalBackend"]:
         """The Deep Eval backend of the backend.
 
         Returns
@@ -77,7 +75,7 @@ def deep_eval(self) -> Type["DeepEvalBackend"]:
         raise NotImplementedError
 
     @property
-    def neighbor_stat(self) -> Type["NeighborStat"]:
+    def neighbor_stat(self) -> type["NeighborStat"]:
         """The neighbor statistics of the backend.
 
         Returns
diff --git a/deepmd/backend/pytorch.py b/deepmd/backend/pytorch.py
index fb7d30e994..f5b0dd92b2 100644
--- a/deepmd/backend/pytorch.py
+++ b/deepmd/backend/pytorch.py
@@ -6,8 +6,6 @@
     TYPE_CHECKING,
     Callable,
     ClassVar,
-    List,
-    Type,
 )
 
 from deepmd.backend.backend import (
@@ -41,7 +39,7 @@ class PyTorchBackend(Backend):
         | Backend.Feature.IO
     )
     """The features of the backend."""
-    suffixes: ClassVar[List[str]] = [".pth", ".pt"]
+    suffixes: ClassVar[list[str]] = [".pth", ".pt"]
     """The suffixes of the backend."""
 
     def is_available(self) -> bool:
@@ -68,7 +66,7 @@ def entry_point_hook(self) -> Callable[["Namespace"], None]:
         return deepmd_main
 
     @property
-    def deep_eval(self) -> Type["DeepEvalBackend"]:
+    def deep_eval(self) -> type["DeepEvalBackend"]:
         """The Deep Eval backend of the backend.
 
         Returns
@@ -81,7 +79,7 @@ def deep_eval(self) -> Type["DeepEvalBackend"]:
         return DeepEvalPT
 
     @property
-    def neighbor_stat(self) -> Type["NeighborStat"]:
+    def neighbor_stat(self) -> type["NeighborStat"]:
         """The neighbor statistics of the backend.
 
         Returns
diff --git a/deepmd/backend/suffix.py b/deepmd/backend/suffix.py
index 273fbc0951..d694b43488 100644
--- a/deepmd/backend/suffix.py
+++ b/deepmd/backend/suffix.py
@@ -6,7 +6,6 @@
 )
 from typing import (
     Optional,
-    Type,
     Union,
 )
 
@@ -18,7 +17,7 @@
 def format_model_suffix(
     filename: str,
     feature: Optional[Backend.Feature] = None,
-    preferred_backend: Optional[Union[str, Type["Backend"]]] = None,
+    preferred_backend: Optional[Union[str, type["Backend"]]] = None,
     strict_prefer: Optional[bool] = None,
 ) -> str:
     """Check and format the suffixes of a filename.
diff --git a/deepmd/backend/tensorflow.py b/deepmd/backend/tensorflow.py
index 15b03ee7c8..6b73d7c469 100644
--- a/deepmd/backend/tensorflow.py
+++ b/deepmd/backend/tensorflow.py
@@ -6,8 +6,6 @@
     TYPE_CHECKING,
     Callable,
     ClassVar,
-    List,
-    Type,
 )
 
 from deepmd.backend.backend import (
@@ -41,7 +39,7 @@ class TensorFlowBackend(Backend):
         | Backend.Feature.IO
     )
     """The features of the backend."""
-    suffixes: ClassVar[List[str]] = [".pb"]
+    suffixes: ClassVar[list[str]] = [".pb"]
     """The suffixes of the backend."""
 
     def is_available(self) -> bool:
@@ -77,7 +75,7 @@ def entry_point_hook(self) -> Callable[["Namespace"], None]:
         return deepmd_main
 
     @property
-    def deep_eval(self) -> Type["DeepEvalBackend"]:
+    def deep_eval(self) -> type["DeepEvalBackend"]:
         """The Deep Eval backend of the backend.
 
         Returns
@@ -90,7 +88,7 @@ def deep_eval(self) -> Type["DeepEvalBackend"]:
         return DeepEvalTF
 
     @property
-    def neighbor_stat(self) -> Type["NeighborStat"]:
+    def neighbor_stat(self) -> type["NeighborStat"]:
         """The neighbor statistics of the backend.
 
         Returns
diff --git a/deepmd/calculator.py b/deepmd/calculator.py
index 2d3e7ce831..032fa2bcfa 100644
--- a/deepmd/calculator.py
+++ b/deepmd/calculator.py
@@ -7,8 +7,6 @@
 from typing import (
     TYPE_CHECKING,
     ClassVar,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -42,7 +40,7 @@ class DP(Calculator):
         path to the model
     label : str, optional
         calculator label, by default "DP"
-    type_dict : Dict[str, int], optional
+    type_dict : dict[str, int], optional
         mapping of element types and their numbers, best left None and the calculator
         will infer this information from model, by default None
     neighbor_list : ase.neighborlist.NeighborList, optional
@@ -72,7 +70,7 @@ class DP(Calculator):
     """
 
     name = "DP"
-    implemented_properties: ClassVar[List[str]] = [
+    implemented_properties: ClassVar[list[str]] = [
         "energy",
         "free_energy",
         "forces",
@@ -84,7 +82,7 @@ def __init__(
         self,
         model: Union[str, "Path"],
         label: str = "DP",
-        type_dict: Optional[Dict[str, int]] = None,
+        type_dict: Optional[dict[str, int]] = None,
         neighbor_list=None,
         **kwargs,
     ) -> None:
@@ -100,8 +98,8 @@ def __init__(
     def calculate(
         self,
         atoms: Optional["Atoms"] = None,
-        properties: List[str] = ["energy", "forces", "virial"],
-        system_changes: List[str] = all_changes,
+        properties: list[str] = ["energy", "forces", "virial"],
+        system_changes: list[str] = all_changes,
     ):
         """Run calculation with deepmd model.
 
@@ -109,10 +107,10 @@ def calculate(
         ----------
         atoms : Optional[Atoms], optional
             atoms object to run the calculation on, by default None
-        properties : List[str], optional
+        properties : list[str], optional
             unused, only for function signature compatibility,
             by default ["energy", "forces", "stress"]
-        system_changes : List[str], optional
+        system_changes : list[str], optional
             unused, only for function signature compatibility, by default all_changes
         """
         if atoms is not None:
diff --git a/deepmd/common.py b/deepmd/common.py
index f58634f224..fdfeef0e6d 100644
--- a/deepmd/common.py
+++ b/deepmd/common.py
@@ -14,9 +14,6 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    Dict,
-    List,
-    Set,
     TypeVar,
     Union,
     get_args,
@@ -60,8 +57,8 @@
     "linear",
 ]
 # get_args is new in py38
-VALID_PRECISION: Set[_PRECISION] = set(get_args(_PRECISION))
-VALID_ACTIVATION: Set[_ACTIVATION] = set(get_args(_ACTIVATION))
+VALID_PRECISION: set[_PRECISION] = set(get_args(_PRECISION))
+VALID_ACTIVATION: set[_ACTIVATION] = set(get_args(_ACTIVATION))
 
 if TYPE_CHECKING:
     _DICT_VAL = TypeVar("_DICT_VAL")
@@ -127,17 +124,17 @@ def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
 
 
 def j_deprecated(
-    jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = []
+    jdata: dict[str, "_DICT_VAL"], key: str, deprecated_key: list[str] = []
 ) -> "_DICT_VAL":
     """Assert that supplied dictionary conaines specified key.
 
     Parameters
     ----------
-    jdata : Dict[str, _DICT_VAL]
+    jdata : dict[str, _DICT_VAL]
         dictionary to check
     key : str
         key to check
-    deprecated_key : List[str], optional
+    deprecated_key : list[str], optional
         list of deprecated keys, by default []
 
     Returns
@@ -161,7 +158,7 @@ def j_deprecated(
         return jdata[key]
 
 
-def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
+def j_loader(filename: Union[str, Path]) -> dict[str, Any]:
     """Load yaml or json settings file.
 
     Parameters
@@ -171,7 +168,7 @@ def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
 
     Returns
     -------
-    Dict[str, Any]
+    dict[str, Any]
         loaded dictionary
 
     Raises
@@ -190,7 +187,7 @@ def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
         raise TypeError("config file must be json, or yaml/yml")
 
 
-def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
+def expand_sys_str(root_dir: Union[str, Path]) -> list[str]:
     """Recursively iterate over directories taking those that contain `type.raw` file.
 
     Parameters
@@ -200,7 +197,7 @@ def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
 
     Returns
     -------
-    List[str]
+    list[str]
         list of string pointing to system directories
     """
     root_dir = DPPath(root_dir)
diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py
index 5ea65a9d73..c29a76b3f1 100644
--- a/deepmd/dpmodel/atomic_model/base_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/base_atomic_model.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import copy
 from typing import (
-    Dict,
-    List,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -36,11 +33,11 @@
 class BaseAtomicModel(BaseAtomicModel_, NativeOP):
     def __init__(
         self,
-        type_map: List[str],
-        atom_exclude_types: List[int] = [],
-        pair_exclude_types: List[Tuple[int, int]] = [],
+        type_map: list[str],
+        atom_exclude_types: list[int] = [],
+        pair_exclude_types: list[tuple[int, int]] = [],
         rcond: Optional[float] = None,
-        preset_out_bias: Optional[Dict[str, np.ndarray]] = None,
+        preset_out_bias: Optional[dict[str, np.ndarray]] = None,
     ):
         super().__init__()
         self.type_map = type_map
@@ -52,7 +49,7 @@ def __init__(
     def init_out_stat(self):
         """Initialize the output bias."""
         ntypes = self.get_ntypes()
-        self.bias_keys: List[str] = list(self.fitting_output_def().keys())
+        self.bias_keys: list[str] = list(self.fitting_output_def().keys())
         self.max_out_size = max(
             [self.atomic_output_def()[kk].size for kk in self.bias_keys]
         )
@@ -78,13 +75,13 @@ def __getitem__(self, key):
         else:
             raise KeyError(key)
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.type_map
 
     def reinit_atom_exclude(
         self,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
     ):
         self.atom_exclude_types = exclude_types
         if exclude_types == []:
@@ -94,7 +91,7 @@ def reinit_atom_exclude(
 
     def reinit_pair_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.pair_exclude_types = exclude_types
         if exclude_types == []:
@@ -119,7 +116,7 @@ def atomic_output_def(self) -> FittingOutputDef:
         )
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -143,7 +140,7 @@ def forward_common_atomic(
         mapping: Optional[np.ndarray] = None,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Common interface for atomic inference.
 
         This method accept extended coordinates, extended atom typs, neighbor list,
@@ -217,7 +214,7 @@ def call(
         mapping: Optional[np.ndarray] = None,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         return self.forward_common_atomic(
             extended_coord,
             extended_atype,
@@ -251,7 +248,7 @@ def deserialize(cls, data: dict) -> "BaseAtomicModel":
 
     def apply_out_stat(
         self,
-        ret: Dict[str, np.ndarray],
+        ret: dict[str, np.ndarray],
         atype: np.ndarray,
     ):
         """Apply the stat to each atomic output.
@@ -274,7 +271,7 @@ def apply_out_stat(
 
     def _varsize(
         self,
-        shape: List[int],
+        shape: list[int],
     ) -> int:
         output_size = 1
         len_shape = len(shape)
@@ -286,7 +283,7 @@ def _get_bias_index(
         self,
         kk: str,
     ) -> int:
-        res: List[int] = []
+        res: list[int] = []
         for i, e in enumerate(self.bias_keys):
             if e == kk:
                 res.append(i)
@@ -295,8 +292,8 @@ def _get_bias_index(
 
     def _fetch_out_stat(
         self,
-        keys: List[str],
-    ) -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray]]:
+        keys: list[str],
+    ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
         ret_bias = {}
         ret_std = {}
         ntypes = self.get_ntypes()
diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
index a446bde06f..7e576eb484 100644
--- a/deepmd/dpmodel/atomic_model/dp_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import copy
 from typing import (
-    Dict,
-    List,
     Optional,
 )
 
@@ -46,7 +44,7 @@ def __init__(
         self,
         descriptor,
         fitting,
-        type_map: List[str],
+        type_map: list[str],
         **kwargs,
     ):
         super().__init__(type_map, **kwargs)
@@ -64,7 +62,7 @@ def get_rcut(self) -> float:
         """Get the cut-off radius."""
         return self.descriptor.get_rcut()
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Get the neighbor selection."""
         return self.descriptor.get_sel()
 
@@ -96,7 +94,7 @@ def forward_atomic(
         mapping: Optional[np.ndarray] = None,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Models' atomic predictions.
 
         Parameters
@@ -140,7 +138,7 @@ def forward_atomic(
         return ret
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -192,7 +190,7 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.fitting.get_dim_aparam()
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
diff --git a/deepmd/dpmodel/atomic_model/linear_atomic_model.py b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
index d522347f41..79a51635d2 100644
--- a/deepmd/dpmodel/atomic_model/linear_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import copy
 from typing import (
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -48,8 +45,8 @@ class LinearEnergyAtomicModel(BaseAtomicModel):
 
     def __init__(
         self,
-        models: List[BaseAtomicModel],
-        type_map: List[str],
+        models: list[BaseAtomicModel],
+        type_map: list[str],
         **kwargs,
     ):
         super().__init__(type_map, **kwargs)
@@ -104,12 +101,12 @@ def get_rcut(self) -> float:
         """Get the cut-off radius."""
         return max(self.get_model_rcuts())
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.type_map
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -125,22 +122,22 @@ def change_type_map(
                 else None,
             )
 
-    def get_model_rcuts(self) -> List[float]:
+    def get_model_rcuts(self) -> list[float]:
         """Get the cut-off radius for each individual models."""
         return [model.get_rcut() for model in self.models]
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         return [max([model.get_nsel() for model in self.models])]
 
-    def get_model_nsels(self) -> List[int]:
+    def get_model_nsels(self) -> list[int]:
         """Get the processed sels for each individual models. Not distinguishing types."""
         return [model.get_nsel() for model in self.models]
 
-    def get_model_sels(self) -> List[Union[int, List[int]]]:
+    def get_model_sels(self) -> list[Union[int, list[int]]]:
         """Get the sels for each individual models."""
         return [model.get_sel() for model in self.models]
 
-    def _sort_rcuts_sels(self) -> Tuple[List[float], List[int]]:
+    def _sort_rcuts_sels(self) -> tuple[list[float], list[int]]:
         # sort the pair of rcut and sels in ascending order, first based on sel, then on rcut.
         zipped = sorted(
             zip(self.get_model_rcuts(), self.get_model_nsels()),
@@ -156,7 +153,7 @@ def forward_atomic(
         mapping: Optional[np.ndarray] = None,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Return atomic prediction.
 
         Parameters
@@ -219,16 +216,16 @@ def forward_atomic(
         return fit_ret
 
     @staticmethod
-    def remap_atype(ori_map: List[str], new_map: List[str]) -> np.ndarray:
+    def remap_atype(ori_map: list[str], new_map: list[str]) -> np.ndarray:
         """
         This method is used to map the atype from the common type_map to the original type_map of
         indivial AtomicModels.
 
         Parameters
         ----------
-        ori_map : List[str]
+        ori_map : list[str]
             The original type map of an AtomicModel.
-        new_map : List[str]
+        new_map : list[str]
             The common type map of the DPZBLLinearEnergyAtomicModel, created by the `get_type_map` method,
             must be a subset of the ori_map.
 
@@ -284,8 +281,8 @@ def _compute_weight(
         self,
         extended_coord: np.ndarray,
         extended_atype: np.ndarray,
-        nlists_: List[np.ndarray],
-    ) -> List[np.ndarray]:
+        nlists_: list[np.ndarray],
+    ) -> list[np.ndarray]:
         """This should be a list of user defined weights that matches the number of models to be combined."""
         nmodels = len(self.models)
         nframes, nloc, _ = nlists_[0].shape
@@ -300,7 +297,7 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return max([model.get_dim_aparam() for model in self.models])
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
@@ -347,7 +344,7 @@ def __init__(
         zbl_model: PairTabAtomicModel,
         sw_rmin: float,
         sw_rmax: float,
-        type_map: List[str],
+        type_map: list[str],
         smin_alpha: Optional[float] = 0.1,
         **kwargs,
     ):
@@ -391,13 +388,13 @@ def _compute_weight(
         self,
         extended_coord: np.ndarray,
         extended_atype: np.ndarray,
-        nlists_: List[np.ndarray],
-    ) -> List[np.ndarray]:
+        nlists_: list[np.ndarray],
+    ) -> list[np.ndarray]:
         """ZBL weight.
 
         Returns
         -------
-        List[np.ndarray]
+        list[np.ndarray]
             the atomic ZBL weight for interpolation. (nframes, nloc, 1)
         """
         assert (
diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
index bf345eaa12..6c0fc88e2c 100644
--- a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
@@ -4,8 +4,6 @@
     abstractmethod,
 )
 from typing import (
-    Dict,
-    List,
     Optional,
 )
 
@@ -57,7 +55,7 @@ def get_rcut(self) -> float:
             pass
 
         @abstractmethod
-        def get_type_map(self) -> List[str]:
+        def get_type_map(self) -> list[str]:
             """Get the type map."""
             pass
 
@@ -66,7 +64,7 @@ def get_ntypes(self) -> int:
             return len(self.get_type_map())
 
         @abstractmethod
-        def get_sel(self) -> List[int]:
+        def get_sel(self) -> list[int]:
             """Returns the number of selected atoms for each type."""
             pass
 
@@ -87,7 +85,7 @@ def get_dim_aparam(self) -> int:
             """Get the number (dimension) of atomic parameters of this atomic model."""
 
         @abstractmethod
-        def get_sel_type(self) -> List[int]:
+        def get_sel_type(self) -> list[int]:
             """Get the selected atom types of this model.
 
             Only atoms with selected atom types have atomic contribution
@@ -132,7 +130,7 @@ def fwd(
             mapping: Optional[t_tensor] = None,
             fparam: Optional[t_tensor] = None,
             aparam: Optional[t_tensor] = None,
-        ) -> Dict[str, t_tensor]:
+        ) -> dict[str, t_tensor]:
             pass
 
         @abstractmethod
@@ -146,7 +144,7 @@ def deserialize(cls, data: dict):
 
         @abstractmethod
         def change_type_map(
-            self, type_map: List[str], model_with_new_type_stat=None
+            self, type_map: list[str], model_with_new_type_stat=None
         ) -> None:
             pass
 
@@ -182,7 +180,7 @@ def do_grad_r(
             """
             odef = self.fitting_output_def()
             if var_name is None:
-                require: List[bool] = []
+                require: list[bool] = []
                 for vv in odef.keys():
                     require.append(self.do_grad_(vv, "r"))
                 return any(require)
@@ -199,7 +197,7 @@ def do_grad_c(
             """
             odef = self.fitting_output_def()
             if var_name is None:
-                require: List[bool] = []
+                require: list[bool] = []
                 for vv in odef.keys():
                     require.append(self.do_grad_(vv, "c"))
                 return any(require)
diff --git a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
index 4218c24e3e..22471d3f32 100644
--- a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import copy
 from typing import (
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -57,10 +55,10 @@ def __init__(
         self,
         tab_file: str,
         rcut: float,
-        sel: Union[int, List[int]],
-        type_map: List[str],
+        sel: Union[int, list[int]],
+        type_map: list[str],
         rcond: Optional[float] = None,
-        atom_ener: Optional[List[float]] = None,
+        atom_ener: Optional[list[float]] = None,
         **kwargs,
     ):
         super().__init__(type_map, **kwargs)
@@ -109,10 +107,10 @@ def fitting_output_def(self) -> FittingOutputDef:
     def get_rcut(self) -> float:
         return self.rcut
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         return self.type_map
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         return [self.sel]
 
     def get_nsel(self) -> int:
@@ -140,7 +138,7 @@ def need_sorted_nlist_for_lower(self) -> bool:
         return False
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -190,7 +188,7 @@ def forward_atomic(
         mapping: Optional[np.ndarray] = None,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         nframes, nloc, nnei = nlist.shape
         extended_coord = extended_coord.reshape(nframes, -1, 3)
 
@@ -394,7 +392,7 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return 0
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
diff --git a/deepmd/dpmodel/descriptor/descriptor.py b/deepmd/dpmodel/descriptor/descriptor.py
index e48479cca8..6d0644f856 100644
--- a/deepmd/dpmodel/descriptor/descriptor.py
+++ b/deepmd/dpmodel/descriptor/descriptor.py
@@ -6,8 +6,6 @@
 )
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -57,7 +55,7 @@ def get_nsel(self) -> int:
         pass
 
     @abstractmethod
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         pass
 
@@ -83,7 +81,7 @@ def get_dim_emb(self) -> int:
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -91,11 +89,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -104,7 +102,7 @@ def compute_input_stats(
         """
         raise NotImplementedError
 
-    def get_stats(self) -> Dict[str, StatItem]:
+    def get_stats(self) -> dict[str, StatItem]:
         """Get the statistics of the descriptor."""
         raise NotImplementedError
 
@@ -152,7 +150,7 @@ def extend_descrpt_stat(des, type_map, des_with_stat=None):
     ----------
     des : DescriptorBlock
         The descriptor block to be extended.
-    type_map : List[str]
+    type_map : list[str]
         The name of each type of atoms to be extended.
     des_with_stat : DescriptorBlock, Optional
         The descriptor block has additional statistics of types from newly provided `type_map`.
diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
index 70cb818eef..5ba3fc11b2 100644
--- a/deepmd/dpmodel/descriptor/dpa1.py
+++ b/deepmd/dpmodel/descriptor/dpa1.py
@@ -2,9 +2,7 @@
 from typing import (
     Any,
     Callable,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -171,7 +169,7 @@ class DescrptDPA1(NativeOP, BaseDescriptor):
             (Only support False to keep consistent with other backend references.)
             (Not used in this version. True option is not implemented.)
             If mask the diagonal of attention weights
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     env_protection: float
@@ -203,7 +201,7 @@ class DescrptDPA1(NativeOP, BaseDescriptor):
             Whether to use electronic configuration type embedding.
     use_tebd_bias : bool, Optional
             Whether to use bias in the type embedding layer.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     spin
             (Only support None to keep consistent with other backend references.)
@@ -227,9 +225,9 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
-        neuron: List[int] = [25, 50, 100],
+        neuron: list[int] = [25, 50, 100],
         axis_neuron: int = 8,
         tebd_dim: int = 8,
         tebd_input_mode: str = "concat",
@@ -240,7 +238,7 @@ def __init__(
         attn_layer: int = 2,
         attn_dotr: bool = True,
         attn_mask: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
@@ -256,9 +254,9 @@ def __init__(
         stripped_type_embedding: Optional[bool] = None,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         # consistent with argcheck, not used though
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
         ## seed, uniform_seed, not included.
         # Ensure compatibility with the deprecated stripped_type_embedding option.
@@ -333,7 +331,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return self.se_atten.get_nsel()
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.se_atten.get_sel()
 
@@ -341,7 +339,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.se_atten.get_ntypes()
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
@@ -395,7 +393,7 @@ def dim_out(self):
     def dim_emb(self):
         return self.get_dim_emb()
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         raise NotImplementedError
 
@@ -408,12 +406,12 @@ def set_stat_mean_and_stddev(
         self.se_atten.mean = mean
         self.se_atten.stddev = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[np.ndarray, np.ndarray]:
+    def get_stat_mean_and_stddev(self) -> tuple[np.ndarray, np.ndarray]:
         """Get mean and stddev for descriptor."""
         return self.se_atten.mean, self.se_atten.stddev
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -588,9 +586,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA1":
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -623,9 +621,9 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
-        neuron: List[int] = [25, 50, 100],
+        neuron: list[int] = [25, 50, 100],
         axis_neuron: int = 8,
         tebd_dim: int = 8,
         tebd_input_mode: str = "concat",
@@ -635,7 +633,7 @@ def __init__(
         attn_layer: int = 2,
         attn_dotr: bool = True,
         attn_mask: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
@@ -646,7 +644,7 @@ def __init__(
         trainable_ln: bool = True,
         ln_eps: Optional[float] = 1e-5,
         smooth: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
         self.rcut = rcut
         self.rcut_smth = rcut_smth
@@ -748,7 +746,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -817,7 +815,7 @@ def dim_emb(self):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data."""
@@ -829,7 +827,7 @@ def get_stats(self):
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
@@ -981,7 +979,7 @@ def __init__(
         ln_eps: float = 1e-5,
         smooth: bool = True,
         precision: str = DEFAULT_PRECISION,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         """Construct a neighbor-wise attention net."""
         super().__init__()
@@ -1109,7 +1107,7 @@ def __init__(
         ln_eps: float = 1e-5,
         smooth: bool = True,
         precision: str = DEFAULT_PRECISION,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         """Construct a neighbor-wise attention layer."""
         super().__init__()
@@ -1215,7 +1213,7 @@ def __init__(
         bias: bool = True,
         smooth: bool = True,
         precision: str = DEFAULT_PRECISION,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         """Construct a multi-head neighbor-wise attention net."""
         super().__init__()
diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py
index 43c57f443f..285dc724a7 100644
--- a/deepmd/dpmodel/descriptor/dpa2.py
+++ b/deepmd/dpmodel/descriptor/dpa2.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -70,7 +68,7 @@ def __init__(
         rcut: float,
         rcut_smth: float,
         nsel: int,
-        neuron: List[int] = [25, 50, 100],
+        neuron: list[int] = [25, 50, 100],
         axis_neuron: int = 16,
         tebd_dim: int = 8,
         tebd_input_mode: str = "concat",
@@ -79,7 +77,7 @@ def __init__(
         resnet_dt: bool = False,
         type_one_side: bool = False,
         use_three_body: bool = False,
-        three_body_neuron: List[int] = [2, 4, 8],
+        three_body_neuron: list[int] = [2, 4, 8],
         three_body_sel: int = 40,
         three_body_rcut: float = 4.0,
         three_body_rcut_smth: float = 0.5,
@@ -371,14 +369,14 @@ def __init__(
         concat_output_tebd: bool = True,
         precision: str = "float64",
         smooth: bool = True,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         add_tebd_to_repinit_out: bool = False,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
     ):
         r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
 
@@ -394,7 +392,7 @@ def __init__(
             The precision of the embedding net parameters.
         smooth : bool, optional
             Whether to use smoothness in processes such as attention weights calculation.
-        exclude_types : List[List[int]], optional
+        exclude_types : list[list[int]], optional
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
         env_protection : float, optional
@@ -410,7 +408,7 @@ def __init__(
             Whether to use electronic configuration type embedding.
         use_tebd_bias : bool, Optional
             Whether to use bias in the type embedding layer.
-        type_map : List[str], Optional
+        type_map : list[str], Optional
             A list of strings. Give the name to each type of atoms.
 
         Returns
@@ -602,7 +600,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -610,7 +608,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
@@ -660,7 +658,7 @@ def share_params(self, base_class, shared_level, resume=False):
         raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -723,14 +721,14 @@ def dim_emb(self):
         """Returns the embedding dimension g2."""
         return self.get_dim_emb()
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         raise NotImplementedError
 
     def set_stat_mean_and_stddev(
         self,
-        mean: List[np.ndarray],
-        stddev: List[np.ndarray],
+        mean: list[np.ndarray],
+        stddev: list[np.ndarray],
     ) -> None:
         """Update mean and stddev for descriptor."""
         descrpt_list = [self.repinit, self.repformers]
@@ -740,7 +738,7 @@ def set_stat_mean_and_stddev(
             descrpt.mean = mean[ii]
             descrpt.stddev = stddev[ii]
 
-    def get_stat_mean_and_stddev(self) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+    def get_stat_mean_and_stddev(self) -> tuple[list[np.ndarray], list[np.ndarray]]:
         """Get mean and stddev for descriptor."""
         mean_list = [self.repinit.mean, self.repformers.mean]
         stddev_list = [
@@ -1015,9 +1013,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA2":
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/dpmodel/descriptor/hybrid.py b/deepmd/dpmodel/descriptor/hybrid.py
index 4cd4e230ae..3aa8882db1 100644
--- a/deepmd/dpmodel/descriptor/hybrid.py
+++ b/deepmd/dpmodel/descriptor/hybrid.py
@@ -2,10 +2,7 @@
 import math
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -37,14 +34,14 @@ class DescrptHybrid(BaseDescriptor, NativeOP):
 
     Parameters
     ----------
-    list : list : List[Union[BaseDescriptor, Dict[str, Any]]]
+    list : list : list[Union[BaseDescriptor, dict[str, Any]]]
         Build a descriptor from the concatenation of the list of descriptors.
         The descriptor can be either an object or a dictionary.
     """
 
     def __init__(
         self,
-        list: List[Union[BaseDescriptor, Dict[str, Any]]],
+        list: list[Union[BaseDescriptor, dict[str, Any]]],
     ) -> None:
         super().__init__()
         # warning: list is conflict with built-in list
@@ -69,7 +66,7 @@ def __init__(
             ), f"number of atom types in {ii}th descrptor {self.descrpt_list[0].__class__.__name__} does not match others"
         # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type
         hybrid_sel = self.get_sel()
-        self.nlist_cut_idx: List[np.ndarray] = []
+        self.nlist_cut_idx: list[np.ndarray] = []
         if self.mixed_types() and not all(
             descrpt.mixed_types() for descrpt in self.descrpt_list
         ):
@@ -107,7 +104,7 @@ def get_rcut_smth(self) -> float:
         # Note: Using the minimum rcut_smth might not be appropriate in all scenarios. Consider using a different approach or provide detailed documentation on why the minimum value is chosen.
         return np.min([descrpt.get_rcut_smth() for descrpt in self.descrpt_list]).item()
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         if self.mixed_types():
             return [
@@ -124,7 +121,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.descrpt_list[0].get_ntypes()
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.descrpt_list[0].get_type_map()
 
@@ -169,7 +166,7 @@ def share_params(self, base_class, shared_level, resume=False):
         raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -182,15 +179,15 @@ def change_type_map(
                 else None,
             )
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         for descrpt in self.descrpt_list:
             descrpt.compute_input_stats(merged, path)
 
     def set_stat_mean_and_stddev(
         self,
-        mean: List[Union[np.ndarray, List[np.ndarray]]],
-        stddev: List[Union[np.ndarray, List[np.ndarray]]],
+        mean: list[Union[np.ndarray, list[np.ndarray]]],
+        stddev: list[Union[np.ndarray, list[np.ndarray]]],
     ) -> None:
         """Update mean and stddev for descriptor."""
         for ii, descrpt in enumerate(self.descrpt_list):
@@ -198,9 +195,9 @@ def set_stat_mean_and_stddev(
 
     def get_stat_mean_and_stddev(
         self,
-    ) -> Tuple[
-        List[Union[np.ndarray, List[np.ndarray]]],
-        List[Union[np.ndarray, List[np.ndarray]]],
+    ) -> tuple[
+        list[Union[np.ndarray, list[np.ndarray]]],
+        list[Union[np.ndarray, list[np.ndarray]]],
     ]:
         """Get mean and stddev for descriptor."""
         mean_list = []
@@ -279,9 +276,9 @@ def call(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
index 6ce54c6f12..a9b434d5f5 100644
--- a/deepmd/dpmodel/descriptor/make_base_descriptor.py
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -5,9 +5,7 @@
 )
 from typing import (
     Callable,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -61,7 +59,7 @@ def get_rcut_smth(self) -> float:
             pass
 
         @abstractmethod
-        def get_sel(self) -> List[int]:
+        def get_sel(self) -> list[int]:
             """Returns the number of selected neighboring atoms for each type."""
             pass
 
@@ -79,7 +77,7 @@ def get_ntypes(self) -> int:
             pass
 
         @abstractmethod
-        def get_type_map(self) -> List[str]:
+        def get_type_map(self) -> list[str]:
             """Get the name to each type of atoms."""
             pass
 
@@ -124,7 +122,7 @@ def share_params(self, base_class, shared_level, resume=False):
 
         @abstractmethod
         def change_type_map(
-            self, type_map: List[str], model_with_new_type_stat=None
+            self, type_map: list[str], model_with_new_type_stat=None
         ) -> None:
             """Change the type related params to new ones, according to `type_map` and the original one in the model.
             If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -143,7 +141,7 @@ def get_stat_mean_and_stddev(self):
 
         def compute_input_stats(
             self,
-            merged: Union[Callable[[], List[dict]], List[dict]],
+            merged: Union[Callable[[], list[dict]], list[dict]],
             path: Optional[DPPath] = None,
         ):
             """Update mean and stddev for descriptor elements."""
@@ -188,9 +186,9 @@ def deserialize(cls, data: dict) -> "BD":
         def update_sel(
             cls,
             train_data: DeepmdDataSystem,
-            type_map: Optional[List[str]],
+            type_map: Optional[list[str]],
             local_jdata: dict,
-        ) -> Tuple[dict, Optional[float]]:
+        ) -> tuple[dict, Optional[float]]:
             """Update the selection and perform neighbor statistics.
 
             Parameters
diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py
index 7254f0bc3d..ec8be21a53 100644
--- a/deepmd/dpmodel/descriptor/repformers.py
+++ b/deepmd/dpmodel/descriptor/repformers.py
@@ -1,9 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Callable,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -110,7 +108,7 @@ class DescrptBlockRepformers(NativeOP, DescriptorBlock):
         The precision of the embedding net parameters.
     smooth : bool, optional
         Whether to use smoothness in processes such as attention weights calculation.
-    exclude_types : List[List[int]], optional
+    exclude_types : list[list[int]], optional
         The excluded pairs of types which have no interaction with each other.
         For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     env_protection : float, optional
@@ -159,7 +157,7 @@ def __init__(
         update_residual_init: str = "norm",
         set_davg_zero: bool = True,
         smooth: bool = True,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         precision: str = "float64",
         trainable_ln: bool = True,
@@ -167,7 +165,7 @@ def __init__(
         g1_out_conv: bool = True,
         g1_out_mlp: bool = True,
         ln_eps: Optional[float] = 1e-5,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.rcut = rcut
@@ -272,7 +270,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -337,7 +335,7 @@ def dim_emb(self):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data."""
@@ -349,7 +347,7 @@ def get_stats(self):
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
@@ -436,7 +434,7 @@ def get_residual(
     _mode: str = "norm",
     trainable: bool = True,
     precision: str = "float64",
-    seed: Optional[Union[int, List[int]]] = None,
+    seed: Optional[Union[int, list[int]]] = None,
 ) -> np.ndarray:
     """
     Get residual tensor for one update vector.
@@ -694,7 +692,7 @@ def __init__(
         smooth: bool = True,
         attnw_shift: float = 20.0,
         precision: str = "float64",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         """Return neighbor-wise multi-head self-attention maps, with gate mechanism."""
         super().__init__()
@@ -812,7 +810,7 @@ def __init__(
         input_dim: int,
         head_num: int,
         precision: str = "float64",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.input_dim = input_dim
@@ -897,7 +895,7 @@ def __init__(
         input_dim: int,
         head_num: int,
         precision: str = "float64",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.input_dim = input_dim
@@ -970,7 +968,7 @@ def __init__(
         smooth: bool = True,
         attnw_shift: float = 20.0,
         precision: str = "float64",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.input_dim = input_dim
@@ -1132,7 +1130,7 @@ def __init__(
         g1_out_conv: bool = True,
         g1_out_mlp: bool = True,
         ln_eps: Optional[float] = 1e-5,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.epsilon = 1e-4  # protection of 1./nnei
@@ -1508,10 +1506,10 @@ def call(
         assert (nf, nloc) == g1.shape[:2]
         assert (nf, nloc, nnei) == h2.shape[:3]
 
-        g2_update: List[np.ndarray] = [g2]
-        h2_update: List[np.ndarray] = [h2]
-        g1_update: List[np.ndarray] = [g1]
-        g1_mlp: List[np.ndarray] = [g1] if not self.g1_out_mlp else []
+        g2_update: list[np.ndarray] = [g2]
+        h2_update: list[np.ndarray] = [h2]
+        g1_update: list[np.ndarray] = [g1]
+        g1_mlp: list[np.ndarray] = [g1] if not self.g1_out_mlp else []
         if self.g1_out_mlp:
             assert self.g1_self_mlp is not None
             g1_self_mlp = self.act(self.g1_self_mlp(g1))
@@ -1613,7 +1611,7 @@ def call(
 
     def list_update_res_avg(
         self,
-        update_list: List[np.ndarray],
+        update_list: list[np.ndarray],
     ) -> np.ndarray:
         nitem = len(update_list)
         uu = update_list[0]
@@ -1621,7 +1619,7 @@ def list_update_res_avg(
             uu = uu + update_list[ii]
         return uu / (float(nitem) ** 0.5)
 
-    def list_update_res_incr(self, update_list: List[np.ndarray]) -> np.ndarray:
+    def list_update_res_incr(self, update_list: list[np.ndarray]) -> np.ndarray:
         nitem = len(update_list)
         uu = update_list[0]
         scale = 1.0 / (float(nitem - 1) ** 0.5) if nitem > 1 else 0.0
@@ -1630,7 +1628,7 @@ def list_update_res_incr(self, update_list: List[np.ndarray]) -> np.ndarray:
         return uu
 
     def list_update_res_residual(
-        self, update_list: List[np.ndarray], update_name: str = "g1"
+        self, update_list: list[np.ndarray], update_name: str = "g1"
     ) -> np.ndarray:
         nitem = len(update_list)
         uu = update_list[0]
@@ -1648,7 +1646,7 @@ def list_update_res_residual(
         return uu
 
     def list_update(
-        self, update_list: List[np.ndarray], update_name: str = "g1"
+        self, update_list: list[np.ndarray], update_name: str = "g1"
     ) -> np.ndarray:
         if self.update_style == "res_avg":
             return self.list_update_res_avg(update_list)
diff --git a/deepmd/dpmodel/descriptor/se_atten_v2.py b/deepmd/dpmodel/descriptor/se_atten_v2.py
index d29580062c..e0ac222524 100644
--- a/deepmd/dpmodel/descriptor/se_atten_v2.py
+++ b/deepmd/dpmodel/descriptor/se_atten_v2.py
@@ -1,9 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Any,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -38,9 +36,9 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
-        neuron: List[int] = [25, 50, 100],
+        neuron: list[int] = [25, 50, 100],
         axis_neuron: int = 8,
         tebd_dim: int = 8,
         resnet_dt: bool = False,
@@ -50,7 +48,7 @@ def __init__(
         attn_layer: int = 2,
         attn_dotr: bool = True,
         attn_mask: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
@@ -65,9 +63,9 @@ def __init__(
         stripped_type_embedding: Optional[bool] = None,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         # consistent with argcheck, not used though
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
         DescrptDPA1.__init__(
             self,
diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
index 11856521c8..29577ef79e 100644
--- a/deepmd/dpmodel/descriptor/se_e2_a.py
+++ b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -3,9 +3,7 @@
 import itertools
 from typing import (
     Any,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -108,7 +106,7 @@ class DescrptSeA(NativeOP, BaseDescriptor):
             If the weights of embedding net are trainable.
     type_one_side
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     env_protection: float
@@ -121,7 +119,7 @@ class DescrptSeA(NativeOP, BaseDescriptor):
             The precision of the embedding net parameters. Supported options are |PRECISION|
     spin
             The deepspin object.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     ntypes : int
             Number of element types.
@@ -147,22 +145,22 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
         type_one_side: bool = True,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         env_protection: float = 0.0,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         spin: Optional[Any] = None,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         ntypes: Optional[int] = None,  # to be compat with input
         # consistent with argcheck, not used though
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
         del ntypes
         ## seed, uniform_seed, not included.
@@ -282,7 +280,7 @@ def share_params(self, base_class, shared_level, resume=False):
         raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -297,11 +295,11 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         raise NotImplementedError
 
@@ -314,7 +312,7 @@ def set_stat_mean_and_stddev(
         self.davg = mean
         self.dstd = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[np.ndarray, np.ndarray]:
+    def get_stat_mean_and_stddev(self) -> tuple[np.ndarray, np.ndarray]:
         """Get mean and stddev for descriptor."""
         return self.davg, self.dstd
 
@@ -331,7 +329,7 @@ def cal_g(
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
@@ -473,9 +471,9 @@ def deserialize(cls, data: dict) -> "DescrptSeA":
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py
index 2d9f6f5a52..c9d27175d6 100644
--- a/deepmd/dpmodel/descriptor/se_r.py
+++ b/deepmd/dpmodel/descriptor/se_r.py
@@ -2,9 +2,7 @@
 import copy
 from typing import (
     Any,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -68,7 +66,7 @@ class DescrptSeR(NativeOP, BaseDescriptor):
             If the weights of embedding net are trainable.
     type_one_side
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     set_davg_zero
@@ -79,7 +77,7 @@ class DescrptSeR(NativeOP, BaseDescriptor):
             The precision of the embedding net parameters. Supported options are |PRECISION|
     spin
             The deepspin object.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     ntypes : int
             Number of element types.
@@ -105,21 +103,21 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         resnet_dt: bool = False,
         trainable: bool = True,
         type_one_side: bool = True,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         env_protection: float = 0.0,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         spin: Optional[Any] = None,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         ntypes: Optional[int] = None,  # to be compat with input
         # consistent with argcheck, not used though
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
         del ntypes
         ## seed, uniform_seed, not included.
@@ -240,7 +238,7 @@ def share_params(self, base_class, shared_level, resume=False):
         raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -255,11 +253,11 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         raise NotImplementedError
 
@@ -272,7 +270,7 @@ def set_stat_mean_and_stddev(
         self.davg = mean
         self.dstd = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[np.ndarray, np.ndarray]:
+    def get_stat_mean_and_stddev(self) -> tuple[np.ndarray, np.ndarray]:
         """Get mean and stddev for descriptor."""
         return self.davg, self.dstd
 
@@ -398,9 +396,9 @@ def deserialize(cls, data: dict) -> "DescrptSeR":
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/dpmodel/descriptor/se_t.py b/deepmd/dpmodel/descriptor/se_t.py
index 364600aa8b..f2ea751c50 100644
--- a/deepmd/dpmodel/descriptor/se_t.py
+++ b/deepmd/dpmodel/descriptor/se_t.py
@@ -2,9 +2,7 @@
 import copy
 import itertools
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -73,7 +71,7 @@ class DescrptSeT(NativeOP, BaseDescriptor):
             The activation function in the embedding net. Supported options are |ACTIVATION_FN|
     env_protection : float
             Protection parameter to prevent division by zero errors during environment matrix calculations.
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     precision : str
@@ -82,7 +80,7 @@ class DescrptSeT(NativeOP, BaseDescriptor):
             If the weights of embedding net are trainable.
     seed : int, Optional
             Random seed for initializing the network parameters.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     ntypes : int
             Number of element types.
@@ -93,17 +91,17 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         resnet_dt: bool = False,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         env_protection: float = 0.0,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         precision: str = DEFAULT_PRECISION,
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
-        type_map: Optional[List[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
         ntypes: Optional[int] = None,  # to be compat with input
     ) -> None:
         del ntypes
@@ -174,7 +172,7 @@ def dim_out(self):
         return self.get_dim_out()
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -235,11 +233,11 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         raise NotImplementedError
 
@@ -252,13 +250,13 @@ def set_stat_mean_and_stddev(
         self.davg = mean
         self.dstd = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[np.ndarray, np.ndarray]:
+    def get_stat_mean_and_stddev(self) -> tuple[np.ndarray, np.ndarray]:
         """Get mean and stddev for descriptor."""
         return self.davg, self.dstd
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
@@ -399,9 +397,9 @@ def deserialize(cls, data: dict) -> "DescrptSeT":
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/dpmodel/descriptor/se_t_tebd.py b/deepmd/dpmodel/descriptor/se_t_tebd.py
index b6e362d2d7..147a335926 100644
--- a/deepmd/dpmodel/descriptor/se_t_tebd.py
+++ b/deepmd/dpmodel/descriptor/se_t_tebd.py
@@ -1,9 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Callable,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -64,7 +62,7 @@ class DescrptSeTTebd(NativeOP, BaseDescriptor):
             The cut-off radius
     rcut_smth
             From where the environment matrix should be smoothed
-    sel : Union[List[int], int]
+    sel : Union[list[int], int]
             list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius
             int: the total maxmum number of atoms in the cut-off radius
     ntypes : int
@@ -86,7 +84,7 @@ class DescrptSeTTebd(NativeOP, BaseDescriptor):
             The activation function in the embedding net. Supported options are |ACTIVATION_FN|
     env_protection: float
             Protection parameter to prevent division by zero errors during environment matrix calculations.
-    exclude_types : List[Tuple[int, int]]
+    exclude_types : list[tuple[int, int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     precision
@@ -95,7 +93,7 @@ class DescrptSeTTebd(NativeOP, BaseDescriptor):
             If the weights of embedding net are trainable.
     seed
             Random seed for initializing the network parameters.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     concat_output_tebd: bool
             Whether to concat type embedding at the output of the descriptor.
@@ -112,7 +110,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
         neuron: list = [2, 4, 8],
         tebd_dim: int = 8,
@@ -121,11 +119,11 @@ def __init__(
         set_davg_zero: bool = True,
         activation_function: str = "tanh",
         env_protection: float = 0.0,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         precision: str = "float64",
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
-        type_map: Optional[List[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
         concat_output_tebd: bool = True,
         use_econf_tebd: bool = False,
         use_tebd_bias=False,
@@ -178,7 +176,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return self.se_ttebd.get_nsel()
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.se_ttebd.get_sel()
 
@@ -186,7 +184,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.se_ttebd.get_ntypes()
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
@@ -240,7 +238,7 @@ def dim_out(self):
     def dim_emb(self):
         return self.get_dim_emb()
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         raise NotImplementedError
 
@@ -253,12 +251,12 @@ def set_stat_mean_and_stddev(
         self.se_ttebd.mean = mean
         self.se_ttebd.stddev = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[np.ndarray, np.ndarray]:
+    def get_stat_mean_and_stddev(self) -> tuple[np.ndarray, np.ndarray]:
         """Get mean and stddev for descriptor."""
         return self.se_ttebd.mean, self.se_ttebd.stddev
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -412,9 +410,9 @@ def deserialize(cls, data: dict) -> "DescrptSeTTebd":
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -447,7 +445,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
         neuron: list = [25, 50, 100],
         tebd_dim: int = 8,
@@ -456,10 +454,10 @@ def __init__(
         activation_function="tanh",
         precision: str = "float64",
         resnet_dt: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         smooth: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
         self.rcut = rcut
         self.rcut_smth = rcut_smth
@@ -541,7 +539,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -610,7 +608,7 @@ def dim_emb(self):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data."""
@@ -622,7 +620,7 @@ def get_stats(self):
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py
index 20e732823b..f67bbc93a4 100644
--- a/deepmd/dpmodel/fitting/dipole_fitting.py
+++ b/deepmd/dpmodel/fitting/dipole_fitting.py
@@ -2,8 +2,6 @@
 import copy
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -81,7 +79,7 @@ class DipoleFitting(GeneralFitting):
     c_differentiable
             If the variable is differentiated with respect to the cell tensor (pbc case).
             Only reducible variable are differentiable.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -90,25 +88,25 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         embedding_width: int,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
         rcond: Optional[float] = None,
         tot_ener_zero: bool = False,
-        trainable: Optional[List[bool]] = None,
+        trainable: Optional[list[bool]] = None,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
-        layer_name: Optional[List[Optional[str]]] = None,
+        layer_name: Optional[list[Optional[str]]] = None,
         use_aparam_as_mask: bool = False,
         spin: Any = None,
         mixed_types: bool = False,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
         r_differentiable: bool = True,
         c_differentiable: bool = True,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         old_impl=False,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         if tot_ener_zero:
             raise NotImplementedError("tot_ener_zero is not implemented")
@@ -188,7 +186,7 @@ def call(
         h2: Optional[np.ndarray] = None,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Calculate the fitting.
 
         Parameters
diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py
index 0d4cee68e2..e9cd4a17ae 100644
--- a/deepmd/dpmodel/fitting/dos_fitting.py
+++ b/deepmd/dpmodel/fitting/dos_fitting.py
@@ -2,7 +2,6 @@
 import copy
 from typing import (
     TYPE_CHECKING,
-    List,
     Optional,
     Union,
 )
@@ -33,19 +32,19 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         numb_dos: int = 300,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
         bias_dos: Optional[np.ndarray] = None,
         rcond: Optional[float] = None,
-        trainable: Union[bool, List[bool]] = True,
+        trainable: Union[bool, list[bool]] = True,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         mixed_types: bool = False,
-        exclude_types: List[int] = [],
-        type_map: Optional[List[str]] = None,
-        seed: Optional[Union[int, List[int]]] = None,
+        exclude_types: list[int] = [],
+        type_map: Optional[list[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         if bias_dos is not None:
             self.bias_dos = bias_dos
diff --git a/deepmd/dpmodel/fitting/ener_fitting.py b/deepmd/dpmodel/fitting/ener_fitting.py
index 60f23f9628..9a1eae0156 100644
--- a/deepmd/dpmodel/fitting/ener_fitting.py
+++ b/deepmd/dpmodel/fitting/ener_fitting.py
@@ -3,7 +3,6 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    List,
     Optional,
     Union,
 )
@@ -30,23 +29,23 @@ def __init__(
         self,
         ntypes: int,
         dim_descrpt: int,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
         rcond: Optional[float] = None,
         tot_ener_zero: bool = False,
-        trainable: Optional[List[bool]] = None,
-        atom_ener: Optional[List[float]] = None,
+        trainable: Optional[list[bool]] = None,
+        atom_ener: Optional[list[float]] = None,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
-        layer_name: Optional[List[Optional[str]]] = None,
+        layer_name: Optional[list[Optional[str]]] = None,
         use_aparam_as_mask: bool = False,
         spin: Any = None,
         mixed_types: bool = False,
-        exclude_types: List[int] = [],
-        type_map: Optional[List[str]] = None,
-        seed: Optional[Union[int, List[int]]] = None,
+        exclude_types: list[int] = [],
+        type_map: Optional[list[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__(
             var_name="energy",
diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
index a20405018e..a587f69449 100644
--- a/deepmd/dpmodel/fitting/general_fitting.py
+++ b/deepmd/dpmodel/fitting/general_fitting.py
@@ -5,8 +5,6 @@
 )
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -78,15 +76,15 @@ class GeneralFitting(NativeOP, BaseFitting):
     mixed_types
             If true, use a uniform fitting net for all atom types, otherwise use
             different fitting nets for different atom types.
-    exclude_types: List[int]
+    exclude_types: list[int]
             Atomic contributions of the excluded atom types are set zero.
-    remove_vaccum_contribution: List[bool], optional
+    remove_vaccum_contribution: list[bool], optional
         Remove vaccum contribution before the bias is added. The list assigned each
         type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same
         length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
-    seed: Optional[Union[int, List[int]]]
+    seed: Optional[Union[int, list[int]]]
         Random seed for initializing the network parameters.
     """
 
@@ -95,24 +93,24 @@ def __init__(
         var_name: str,
         ntypes: int,
         dim_descrpt: int,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
         bias_atom_e: Optional[np.ndarray] = None,
         rcond: Optional[float] = None,
         tot_ener_zero: bool = False,
-        trainable: Optional[List[bool]] = None,
+        trainable: Optional[list[bool]] = None,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
-        layer_name: Optional[List[Optional[str]]] = None,
+        layer_name: Optional[list[Optional[str]]] = None,
         use_aparam_as_mask: bool = False,
         spin: Any = None,
         mixed_types: bool = True,
-        exclude_types: List[int] = [],
-        remove_vaccum_contribution: Optional[List[bool]] = None,
-        type_map: Optional[List[str]] = None,
-        seed: Optional[Union[int, List[int]]] = None,
+        exclude_types: list[int] = [],
+        remove_vaccum_contribution: Optional[list[bool]] = None,
+        type_map: Optional[list[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         self.var_name = var_name
         self.ntypes = ntypes
@@ -192,7 +190,7 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.numb_aparam
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
@@ -201,12 +199,12 @@ def get_sel_type(self) -> List[int]:
         """
         return [ii for ii in range(self.ntypes) if ii not in self.exclude_types]
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -261,7 +259,7 @@ def __getitem__(self, key):
 
     def reinit_exclude(
         self,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = AtomExcludeMask(self.ntypes, self.exclude_types)
@@ -322,7 +320,7 @@ def _call_common(
         h2: Optional[np.ndarray] = None,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Calculate the fitting.
 
         Parameters
diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py
index 2e469eefe1..893853bb38 100644
--- a/deepmd/dpmodel/fitting/invar_fitting.py
+++ b/deepmd/dpmodel/fitting/invar_fitting.py
@@ -2,8 +2,6 @@
 import copy
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -105,9 +103,9 @@ class InvarFitting(GeneralFitting):
             And the aparam will not be used as the atomic parameters for embedding.
     mixed_types
             If false, different atomic types uses different fitting net, otherwise different atom types share the same fitting net.
-    exclude_types: List[int]
+    exclude_types: list[int]
             Atomic contributions of the excluded atom types are set zero.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
 
     """
@@ -118,24 +116,24 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         dim_out: int,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
         bias_atom: Optional[np.ndarray] = None,
         rcond: Optional[float] = None,
         tot_ener_zero: bool = False,
-        trainable: Optional[List[bool]] = None,
-        atom_ener: Optional[List[float]] = None,
+        trainable: Optional[list[bool]] = None,
+        atom_ener: Optional[list[float]] = None,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
-        layer_name: Optional[List[Optional[str]]] = None,
+        layer_name: Optional[list[Optional[str]]] = None,
         use_aparam_as_mask: bool = False,
         spin: Any = None,
         mixed_types: bool = True,
-        exclude_types: List[int] = [],
-        type_map: Optional[List[str]] = None,
-        seed: Optional[Union[int, List[int]]] = None,
+        exclude_types: list[int] = [],
+        type_map: Optional[list[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         if tot_ener_zero:
             raise NotImplementedError("tot_ener_zero is not implemented")
@@ -219,7 +217,7 @@ def call(
         h2: Optional[np.ndarray] = None,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Calculate the fitting.
 
         Parameters
diff --git a/deepmd/dpmodel/fitting/make_base_fitting.py b/deepmd/dpmodel/fitting/make_base_fitting.py
index 417ccc892a..a67273356d 100644
--- a/deepmd/dpmodel/fitting/make_base_fitting.py
+++ b/deepmd/dpmodel/fitting/make_base_fitting.py
@@ -4,8 +4,6 @@
     abstractmethod,
 )
 from typing import (
-    Dict,
-    List,
     Optional,
 )
 
@@ -60,7 +58,7 @@ def fwd(
             h2: Optional[t_tensor] = None,
             fparam: Optional[t_tensor] = None,
             aparam: Optional[t_tensor] = None,
-        ) -> Dict[str, t_tensor]:
+        ) -> dict[str, t_tensor]:
             """Calculate fitting."""
             pass
 
@@ -69,13 +67,13 @@ def compute_output_stats(self, merged):
             raise NotImplementedError
 
         @abstractmethod
-        def get_type_map(self) -> List[str]:
+        def get_type_map(self) -> list[str]:
             """Get the name to each type of atoms."""
             pass
 
         @abstractmethod
         def change_type_map(
-            self, type_map: List[str], model_with_new_type_stat=None
+            self, type_map: list[str], model_with_new_type_stat=None
         ) -> None:
             """Change the type related params to new ones, according to `type_map` and the original one in the model.
             If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py
index d3036fe8b8..2ff5052a83 100644
--- a/deepmd/dpmodel/fitting/polarizability_fitting.py
+++ b/deepmd/dpmodel/fitting/polarizability_fitting.py
@@ -2,8 +2,6 @@
 import copy
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -82,11 +80,11 @@ class PolarFitting(GeneralFitting):
     fit_diag : bool
             Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to
             normal polarizability matrix by contracting with the rotation matrix.
-    scale : List[float]
+    scale : list[float]
             The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i]
     shift_diag : bool
             Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -95,26 +93,26 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         embedding_width: int,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
         rcond: Optional[float] = None,
         tot_ener_zero: bool = False,
-        trainable: Optional[List[bool]] = None,
+        trainable: Optional[list[bool]] = None,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
-        layer_name: Optional[List[Optional[str]]] = None,
+        layer_name: Optional[list[Optional[str]]] = None,
         use_aparam_as_mask: bool = False,
         spin: Any = None,
         mixed_types: bool = False,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
         old_impl: bool = False,
         fit_diag: bool = True,
-        scale: Optional[List[float]] = None,
+        scale: Optional[list[float]] = None,
         shift_diag: bool = True,
-        type_map: Optional[List[str]] = None,
-        seed: Optional[Union[int, List[int]]] = None,
+        type_map: Optional[list[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         if tot_ener_zero:
             raise NotImplementedError("tot_ener_zero is not implemented")
@@ -223,7 +221,7 @@ def output_def(self):
         )
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -257,7 +255,7 @@ def call(
         h2: Optional[np.ndarray] = None,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Calculate the fitting.
 
         Parameters
diff --git a/deepmd/dpmodel/fitting/property_fitting.py b/deepmd/dpmodel/fitting/property_fitting.py
index 014dda4188..1a8fe44aae 100644
--- a/deepmd/dpmodel/fitting/property_fitting.py
+++ b/deepmd/dpmodel/fitting/property_fitting.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import copy
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -60,9 +59,9 @@ class PropertyFittingNet(InvarFitting):
             The precision of the embedding net parameters. Supported options are |PRECISION|
     mixed_types
             If false, different atomic types uses different fitting net, otherwise different atom types share the same fitting net.
-    exclude_types: List[int]
+    exclude_types: list[int]
             Atomic contributions of the excluded atom types are set zero.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -71,10 +70,10 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         task_dim: int = 1,
-        neuron: List[int] = [128, 128, 128],
+        neuron: list[int] = [128, 128, 128],
         bias_atom_p: Optional[np.ndarray] = None,
         rcond: Optional[float] = None,
-        trainable: Union[bool, List[bool]] = True,
+        trainable: Union[bool, list[bool]] = True,
         intensive: bool = False,
         bias_method: str = "normal",
         resnet_dt: bool = True,
@@ -83,8 +82,8 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         mixed_types: bool = True,
-        exclude_types: List[int] = [],
-        type_map: Optional[List[str]] = None,
+        exclude_types: list[int] = [],
+        type_map: Optional[list[str]] = None,
         # not used
         seed: Optional[int] = None,
     ):
diff --git a/deepmd/dpmodel/infer/deep_eval.py b/deepmd/dpmodel/infer/deep_eval.py
index 02625f5331..695edb29d2 100644
--- a/deepmd/dpmodel/infer/deep_eval.py
+++ b/deepmd/dpmodel/infer/deep_eval.py
@@ -4,11 +4,7 @@
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
-    Type,
     Union,
 )
 
@@ -109,7 +105,7 @@ def get_ntypes(self) -> int:
         """Get the number of atom types of this model."""
         return len(self.type_map)
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map (element name of the atom types) of this model."""
         return self.type_map
 
@@ -122,7 +118,7 @@ def get_dim_aparam(self) -> int:
         return self.dp.get_dim_aparam()
 
     @property
-    def model_type(self) -> Type["DeepEvalWrapper"]:
+    def model_type(self) -> type["DeepEvalWrapper"]:
         """The the evaluator of the model type."""
         model_output_type = self.dp.model_output_type()
         if "energy" in model_output_type:
@@ -138,7 +134,7 @@ def model_type(self) -> Type["DeepEvalWrapper"]:
         else:
             raise RuntimeError("Unknown model type")
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
@@ -168,7 +164,7 @@ def eval(
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
         **kwargs: Any,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Evaluate the energy, force and virial by using this DP.
 
         Parameters
@@ -226,7 +222,7 @@ def eval(
             )
         )
 
-    def _get_request_defs(self, atomic: bool) -> List[OutputVariableDef]:
+    def _get_request_defs(self, atomic: bool) -> list[OutputVariableDef]:
         """Get the requested output definitions.
 
         When atomic is True, all output_def are requested.
@@ -290,7 +286,7 @@ def _get_natoms_and_nframes(
         coords: np.ndarray,
         atom_types: np.ndarray,
         mixed_type: bool = False,
-    ) -> Tuple[int, int]:
+    ) -> tuple[int, int]:
         if mixed_type:
             natoms = len(atom_types[0])
         else:
@@ -307,7 +303,7 @@ def _eval_model(
         coords: np.ndarray,
         cells: Optional[np.ndarray],
         atom_types: np.ndarray,
-        request_defs: List[OutputVariableDef],
+        request_defs: list[OutputVariableDef],
     ):
         model = self.dp
 
diff --git a/deepmd/dpmodel/model/base_model.py b/deepmd/dpmodel/model/base_model.py
index c6d482c72f..3f71003bad 100644
--- a/deepmd/dpmodel/model/base_model.py
+++ b/deepmd/dpmodel/model/base_model.py
@@ -7,10 +7,7 @@
 )
 from typing import (
     Any,
-    List,
     Optional,
-    Tuple,
-    Type,
 )
 
 from deepmd.utils.data_system import (
@@ -22,7 +19,7 @@
 )
 
 
-def make_base_model() -> Type[object]:
+def make_base_model() -> type[object]:
     class BaseBaseModel(ABC, PluginVariant, make_plugin_registry("model")):
         """Base class for final exported model that will be directly used for inference.
 
@@ -67,7 +64,7 @@ def __call__(self, *args: Any, **kwds: Any) -> Any:
             pass
 
         @abstractmethod
-        def get_type_map(self) -> List[str]:
+        def get_type_map(self) -> list[str]:
             """Get the type map."""
 
         @abstractmethod
@@ -83,7 +80,7 @@ def get_dim_aparam(self):
             """Get the number (dimension) of atomic parameters of this atomic model."""
 
         @abstractmethod
-        def get_sel_type(self) -> List[int]:
+        def get_sel_type(self) -> list[int]:
             """Get the selected atom types of this model.
 
             Only atoms with selected atom types have atomic contribution
@@ -99,7 +96,7 @@ def is_aparam_nall(self) -> bool:
             """
 
         @abstractmethod
-        def model_output_type(self) -> List[str]:
+        def model_output_type(self) -> list[str]:
             """Get the output type for the model."""
 
         @abstractmethod
@@ -166,9 +163,9 @@ def get_nsel(self) -> int:
         def update_sel(
             cls,
             train_data: DeepmdDataSystem,
-            type_map: Optional[List[str]],
+            type_map: Optional[list[str]],
             local_jdata: dict,
-        ) -> Tuple[dict, Optional[float]]:
+        ) -> tuple[dict, Optional[float]]:
             """Update the selection and perform neighbor statistics.
 
             Parameters
diff --git a/deepmd/dpmodel/model/dp_model.py b/deepmd/dpmodel/model/dp_model.py
index 1597ba0b14..eda0414398 100644
--- a/deepmd/dpmodel/model/dp_model.py
+++ b/deepmd/dpmodel/model/dp_model.py
@@ -2,9 +2,7 @@
 
 
 from typing import (
-    List,
     Optional,
-    Tuple,
 )
 
 from deepmd.dpmodel.descriptor.base_descriptor import (
@@ -21,9 +19,9 @@ class DPModelCommon:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py
index ee4c1f035a..8cdb7e1f25 100644
--- a/deepmd/dpmodel/model/make_model.py
+++ b/deepmd/dpmodel/model/make_model.py
@@ -1,10 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Dict,
-    List,
     Optional,
-    Tuple,
-    Type,
 )
 
 import numpy as np
@@ -42,7 +38,7 @@
 )
 
 
-def make_model(T_AtomicModel: Type[BaseAtomicModel]):
+def make_model(T_AtomicModel: type[BaseAtomicModel]):
     """Make a model as a derived class of an atomic model.
 
     The model provide two interfaces.
@@ -87,7 +83,7 @@ def model_output_def(self):
             """Get the output def for the model."""
             return ModelOutputDef(self.atomic_output_def())
 
-        def model_output_type(self) -> List[str]:
+        def model_output_type(self) -> list[str]:
             """Get the output type for the model."""
             output_def = self.model_output_def()
             var_defs = output_def.var_defs
@@ -106,7 +102,7 @@ def call(
             fparam: Optional[np.ndarray] = None,
             aparam: Optional[np.ndarray] = None,
             do_atomic_virial: bool = False,
-        ) -> Dict[str, np.ndarray]:
+        ) -> dict[str, np.ndarray]:
             """Return model prediction.
 
             Parameters
@@ -128,7 +124,7 @@ def call(
             Returns
             -------
             ret_dict
-                The result dict of type Dict[str,np.ndarray].
+                The result dict of type dict[str,np.ndarray].
                 The keys are defined by the `ModelOutputDef`.
 
             """
@@ -249,7 +245,7 @@ def input_type_cast(
             box: Optional[np.ndarray] = None,
             fparam: Optional[np.ndarray] = None,
             aparam: Optional[np.ndarray] = None,
-        ) -> Tuple[
+        ) -> tuple[
             np.ndarray,
             Optional[np.ndarray],
             Optional[np.ndarray],
@@ -263,7 +259,7 @@ def input_type_cast(
             ###
             ### type checking would not pass jit, convert to coord prec anyway
             ###
-            _lst: List[Optional[np.ndarray]] = [
+            _lst: list[Optional[np.ndarray]] = [
                 vv.astype(coord.dtype) if vv is not None else None
                 for vv in [box, fparam, aparam]
             ]
@@ -285,9 +281,9 @@ def input_type_cast(
 
         def output_type_cast(
             self,
-            model_ret: Dict[str, np.ndarray],
+            model_ret: dict[str, np.ndarray],
             input_prec: str,
-        ) -> Dict[str, np.ndarray]:
+        ) -> dict[str, np.ndarray]:
             """Convert the model output to the input prec."""
             do_cast = (
                 input_prec
@@ -427,7 +423,7 @@ def do_grad_c(
             return self.atomic_model.do_grad_c(var_name)
 
         def change_type_map(
-            self, type_map: List[str], model_with_new_type_stat=None
+            self, type_map: list[str], model_with_new_type_stat=None
         ) -> None:
             """Change the type related params to new ones, according to `type_map` and the original one in the model.
             If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -449,7 +445,7 @@ def get_dim_aparam(self) -> int:
             """Get the number (dimension) of atomic parameters of this atomic model."""
             return self.atomic_model.get_dim_aparam()
 
-        def get_sel_type(self) -> List[int]:
+        def get_sel_type(self) -> list[int]:
             """Get the selected atom types of this model.
 
             Only atoms with selected atom types have atomic contribution
@@ -469,7 +465,7 @@ def get_rcut(self) -> float:
             """Get the cut-off radius."""
             return self.atomic_model.get_rcut()
 
-        def get_type_map(self) -> List[str]:
+        def get_type_map(self) -> list[str]:
             """Get the type map."""
             return self.atomic_model.get_type_map()
 
@@ -481,7 +477,7 @@ def get_nnei(self) -> int:
             """Returns the total number of selected neighboring atoms in the cut-off radius."""
             return self.atomic_model.get_nnei()
 
-        def get_sel(self) -> List[int]:
+        def get_sel(self) -> list[int]:
             """Returns the number of selected atoms for each type."""
             return self.atomic_model.get_sel()
 
diff --git a/deepmd/dpmodel/model/spin_model.py b/deepmd/dpmodel/model/spin_model.py
index d9c96a979e..b0801fe59e 100644
--- a/deepmd/dpmodel/model/spin_model.py
+++ b/deepmd/dpmodel/model/spin_model.py
@@ -1,7 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Dict,
-    List,
     Optional,
 )
 
@@ -222,7 +220,7 @@ def expand_aparam(aparam, nloc: int):
             )
         return aparam
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map."""
         tmap = self.backbone_model.get_type_map()
         ntypes = len(tmap) // 2  # ignore the virtual type
@@ -244,7 +242,7 @@ def get_dim_aparam(self):
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.backbone_model.get_dim_aparam()
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
         Only atoms with selected atom types have atomic contribution
         to the result of the model.
@@ -258,7 +256,7 @@ def is_aparam_nall(self) -> bool:
         """
         return self.backbone_model.is_aparam_nall()
 
-    def model_output_type(self) -> List[str]:
+    def model_output_type(self) -> list[str]:
         """Get the output type for the model."""
         return self.backbone_model.model_output_type()
 
@@ -333,7 +331,7 @@ def call(
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Return model prediction.
 
         Parameters
@@ -358,7 +356,7 @@ def call(
         Returns
         -------
         ret_dict
-            The result dict of type Dict[str,np.ndarray].
+            The result dict of type dict[str,np.ndarray].
             The keys are defined by the `ModelOutputDef`.
 
         """
diff --git a/deepmd/dpmodel/model/transform_output.py b/deepmd/dpmodel/model/transform_output.py
index 67fb016389..43c275b1be 100644
--- a/deepmd/dpmodel/model/transform_output.py
+++ b/deepmd/dpmodel/model/transform_output.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Dict,
-)
 
 import numpy as np
 
@@ -17,11 +14,11 @@
 
 
 def fit_output_to_model_output(
-    fit_ret: Dict[str, np.ndarray],
+    fit_ret: dict[str, np.ndarray],
     fit_output_def: FittingOutputDef,
     coord_ext: np.ndarray,
     do_atomic_virial: bool = False,
-) -> Dict[str, np.ndarray]:
+) -> dict[str, np.ndarray]:
     """Transform the output of the fitting network to
     the model output.
 
@@ -49,11 +46,11 @@ def fit_output_to_model_output(
 
 
 def communicate_extended_output(
-    model_ret: Dict[str, np.ndarray],
+    model_ret: dict[str, np.ndarray],
     model_output_def: ModelOutputDef,
     mapping: np.ndarray,  # nf x nloc
     do_atomic_virial: bool = False,
-) -> Dict[str, np.ndarray]:
+) -> dict[str, np.ndarray]:
     """Transform the output of the model network defined on
     local and ghost (extended) atoms to local atoms.
 
diff --git a/deepmd/dpmodel/output_def.py b/deepmd/dpmodel/output_def.py
index d55ea3988d..2ceb4f412a 100644
--- a/deepmd/dpmodel/output_def.py
+++ b/deepmd/dpmodel/output_def.py
@@ -3,16 +3,11 @@
 from enum import (
     IntEnum,
 )
-from typing import (
-    Dict,
-    List,
-    Tuple,
-)
 
 
 def check_shape(
-    shape: List[int],
-    def_shape: List[int],
+    shape: list[int],
+    def_shape: list[int],
 ):
     """Check if the shape satisfies the defined shape."""
     assert len(shape) == len(def_shape)
@@ -193,7 +188,7 @@ class OutputVariableDef:
     def __init__(
         self,
         name: str,
-        shape: List[int],
+        shape: list[int],
         reducible: bool = False,
         r_differentiable: bool = False,
         c_differentiable: bool = False,
@@ -256,7 +251,7 @@ class FittingOutputDef:
 
     def __init__(
         self,
-        var_defs: List[OutputVariableDef],
+        var_defs: list[OutputVariableDef],
     ):
         self.var_defs = {vv.name: vv for vv in var_defs}
 
@@ -266,7 +261,7 @@ def __getitem__(
     ) -> OutputVariableDef:
         return self.var_defs[key]
 
-    def get_data(self) -> Dict[str, OutputVariableDef]:
+    def get_data(self) -> dict[str, OutputVariableDef]:
         return self.var_defs
 
     def keys(self):
@@ -298,7 +293,7 @@ def __init__(
         self.def_hess_r, _ = do_derivative(self.def_derv_r)
         self.def_derv_c_redu = do_reduce(self.def_derv_c)
         self.def_mask = do_mask(self.def_outp.get_data())
-        self.var_defs: Dict[str, OutputVariableDef] = {}
+        self.var_defs: dict[str, OutputVariableDef] = {}
         for ii in [
             self.def_outp.get_data(),
             self.def_redu,
@@ -318,7 +313,7 @@ def __getitem__(
 
     def get_data(
         self,
-    ) -> Dict[str, OutputVariableDef]:
+    ) -> dict[str, OutputVariableDef]:
         return self.var_defs
 
     def keys(self):
@@ -347,11 +342,11 @@ def get_reduce_name(name: str) -> str:
     return name + "_redu"
 
 
-def get_deriv_name(name: str) -> Tuple[str, str]:
+def get_deriv_name(name: str) -> tuple[str, str]:
     return name + "_derv_r", name + "_derv_c"
 
 
-def get_deriv_name_mag(name: str) -> Tuple[str, str]:
+def get_deriv_name_mag(name: str) -> tuple[str, str]:
     return name + "_derv_r_mag", name + "_derv_c_mag"
 
 
@@ -424,9 +419,9 @@ def check_deriv(var_def: OutputVariableDef) -> bool:
 
 
 def do_reduce(
-    def_outp_data: Dict[str, OutputVariableDef],
-) -> Dict[str, OutputVariableDef]:
-    def_redu: Dict[str, OutputVariableDef] = {}
+    def_outp_data: dict[str, OutputVariableDef],
+) -> dict[str, OutputVariableDef]:
+    def_redu: dict[str, OutputVariableDef] = {}
     for kk, vv in def_outp_data.items():
         if vv.reducible:
             rk = get_reduce_name(kk)
@@ -443,9 +438,9 @@ def do_reduce(
 
 
 def do_mask(
-    def_outp_data: Dict[str, OutputVariableDef],
-) -> Dict[str, OutputVariableDef]:
-    def_mask: Dict[str, OutputVariableDef] = {}
+    def_outp_data: dict[str, OutputVariableDef],
+) -> dict[str, OutputVariableDef]:
+    def_mask: dict[str, OutputVariableDef] = {}
     # for deep eval when has atomic mask
     def_mask["mask"] = OutputVariableDef(
         name="mask",
@@ -468,10 +463,10 @@ def do_mask(
 
 
 def do_derivative(
-    def_outp_data: Dict[str, OutputVariableDef],
-) -> Tuple[Dict[str, OutputVariableDef], Dict[str, OutputVariableDef]]:
-    def_derv_r: Dict[str, OutputVariableDef] = {}
-    def_derv_c: Dict[str, OutputVariableDef] = {}
+    def_outp_data: dict[str, OutputVariableDef],
+) -> tuple[dict[str, OutputVariableDef], dict[str, OutputVariableDef]]:
+    def_derv_r: dict[str, OutputVariableDef] = {}
+    def_derv_c: dict[str, OutputVariableDef] = {}
     for kk, vv in def_outp_data.items():
         rkr, rkc = get_deriv_name(kk)
         rkrm, rkcm = get_deriv_name_mag(kk)
diff --git a/deepmd/dpmodel/utils/exclude_mask.py b/deepmd/dpmodel/utils/exclude_mask.py
index ff668b8153..d0a739b9d4 100644
--- a/deepmd/dpmodel/utils/exclude_mask.py
+++ b/deepmd/dpmodel/utils/exclude_mask.py
@@ -1,8 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    List,
-    Tuple,
-)
 
 import numpy as np
 
@@ -13,7 +9,7 @@ class AtomExcludeMask:
     def __init__(
         self,
         ntypes: int,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
     ):
         self.ntypes = ntypes
         self.exclude_types = exclude_types
@@ -59,7 +55,7 @@ class PairExcludeMask:
     def __init__(
         self,
         ntypes: int,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.ntypes = ntypes
         self.exclude_types = set()
diff --git a/deepmd/dpmodel/utils/neighbor_stat.py b/deepmd/dpmodel/utils/neighbor_stat.py
index 96b39d20ad..744a4476cd 100644
--- a/deepmd/dpmodel/utils/neighbor_stat.py
+++ b/deepmd/dpmodel/utils/neighbor_stat.py
@@ -1,8 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
+from collections.abc import (
     Iterator,
+)
+from typing import (
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -47,7 +48,7 @@ def call(
         coord: np.ndarray,
         atype: np.ndarray,
         cell: Optional[np.ndarray],
-    ) -> Tuple[float, np.ndarray]:
+    ) -> tuple[float, np.ndarray]:
         """Calculate the neareest neighbor distance between atoms, maximum nbor size of
         atoms and the output data range of the environment matrix.
 
@@ -130,7 +131,7 @@ def __init__(
 
     def iterator(
         self, data: DeepmdDataSystem
-    ) -> Iterator[Tuple[np.ndarray, float, str]]:
+    ) -> Iterator[tuple[np.ndarray, float, str]]:
         """Abstract method for producing data.
 
         Yields
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
index 22e85c9890..e1242c3669 100644
--- a/deepmd/dpmodel/utils/network.py
+++ b/deepmd/dpmodel/utils/network.py
@@ -9,8 +9,6 @@
 from typing import (
     Callable,
     ClassVar,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -86,7 +84,7 @@ def __init__(
         activation_function: Optional[str] = None,
         resnet: bool = False,
         precision: str = DEFAULT_PRECISION,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
         prec = PRECISION_DICT[precision.lower()]
         self.precision = precision
@@ -347,7 +345,7 @@ def __init__(
         uni_init: bool = True,
         trainable: bool = True,
         precision: str = DEFAULT_PRECISION,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
         self.eps = eps
         self.uni_init = uni_init
@@ -494,7 +492,7 @@ class NN(ModuleBase):
             The layers of the network.
         """
 
-        def __init__(self, layers: Optional[List[dict]] = None) -> None:
+        def __init__(self, layers: Optional[list[dict]] = None) -> None:
             super().__init__()
             if layers is None:
                 layers = []
@@ -604,11 +602,11 @@ class EN(T_Network):
         def __init__(
             self,
             in_dim,
-            neuron: List[int] = [24, 48, 96],
+            neuron: list[int] = [24, 48, 96],
             activation_function: str = "tanh",
             resnet_dt: bool = False,
             precision: str = DEFAULT_PRECISION,
-            seed: Optional[Union[int, List[int]]] = None,
+            seed: Optional[Union[int, list[int]]] = None,
             bias: bool = True,
         ):
             layers = []
@@ -709,12 +707,12 @@ def __init__(
             self,
             in_dim,
             out_dim,
-            neuron: List[int] = [24, 48, 96],
+            neuron: list[int] = [24, 48, 96],
             activation_function: str = "tanh",
             resnet_dt: bool = False,
             precision: str = DEFAULT_PRECISION,
             bias_out: bool = True,
-            seed: Optional[Union[int, List[int]]] = None,
+            seed: Optional[Union[int, list[int]]] = None,
         ):
             super().__init__(
                 in_dim,
@@ -804,7 +802,7 @@ class NetworkCollection:
     """
 
     # subclass may override this
-    NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = {
+    NETWORK_TYPE_MAP: ClassVar[dict[str, type]] = {
         "network": NativeNet,
         "embedding_network": EmbeddingNet,
         "fitting_network": FittingNet,
@@ -815,7 +813,7 @@ def __init__(
         ndim: int,
         ntypes: int,
         network_type: str = "network",
-        networks: List[Union[NativeNet, dict]] = [],
+        networks: list[Union[NativeNet, dict]] = [],
     ):
         self.ndim = ndim
         self.ntypes = ntypes
diff --git a/deepmd/dpmodel/utils/nlist.py b/deepmd/dpmodel/utils/nlist.py
index c935377e6a..4d0b3e3286 100644
--- a/deepmd/dpmodel/utils/nlist.py
+++ b/deepmd/dpmodel/utils/nlist.py
@@ -1,7 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -18,7 +16,7 @@ def extend_input_and_build_neighbor_list(
     coord,
     atype,
     rcut: float,
-    sel: List[int],
+    sel: list[int],
     mixed_types: bool = False,
     box: Optional[np.ndarray] = None,
 ):
@@ -51,7 +49,7 @@ def build_neighbor_list(
     atype: np.ndarray,
     nloc: int,
     rcut: float,
-    sel: Union[int, List[int]],
+    sel: Union[int, list[int]],
     distinguish_types: bool = True,
 ) -> np.ndarray:
     """Build neightbor list for a single frame. keeps nsel neighbors.
@@ -67,7 +65,7 @@ def build_neighbor_list(
         number of local atoms.
     rcut : float
         cut-off radius
-    sel : int or List[int]
+    sel : int or list[int]
         maximal number of neighbors (of each type).
         if distinguish_types==True, nsel should be list and
         the length of nsel should be equal to number of
@@ -145,7 +143,7 @@ def build_neighbor_list(
 def nlist_distinguish_types(
     nlist: np.ndarray,
     atype: np.ndarray,
-    sel: List[int],
+    sel: list[int],
 ):
     """Given a nlist that does not distinguish atom types, return a nlist that
     distinguish atom types.
@@ -179,9 +177,9 @@ def get_multiple_nlist_key(rcut: float, nsel: int) -> str:
 def build_multiple_neighbor_list(
     coord: np.ndarray,
     nlist: np.ndarray,
-    rcuts: List[float],
-    nsels: List[int],
-) -> Dict[str, np.ndarray]:
+    rcuts: list[float],
+    nsels: list[int],
+) -> dict[str, np.ndarray]:
     """Input one neighbor list, and produce multiple neighbor lists with
     different cutoff radius and numbers of selection out of it.  The
     required rcuts and nsels should be smaller or equal to the input nlist.
@@ -193,14 +191,14 @@ def build_multiple_neighbor_list(
     nlist : np.ndarray
         Neighbor list of shape [batch_size, nloc, nsel], the neighbors
         should be stored in an ascending order.
-    rcuts : List[float]
+    rcuts : list[float]
         list of cut-off radius in ascending order.
-    nsels : List[int]
+    nsels : list[int]
         maximal number of neighbors in ascending order.
 
     Returns
     -------
-    nlist_dict : Dict[str, np.ndarray]
+    nlist_dict : dict[str, np.ndarray]
         A dict of nlists, key given by get_multiple_nlist_key(rc, nsel)
         value being the corresponding nlist.
 
diff --git a/deepmd/dpmodel/utils/seed.py b/deepmd/dpmodel/utils/seed.py
index 4ceab80066..165ff558b9 100644
--- a/deepmd/dpmodel/utils/seed.py
+++ b/deepmd/dpmodel/utils/seed.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
     overload,
@@ -12,10 +11,10 @@ def child_seed(seed: None, idx: int) -> None: ...
 
 
 @overload
-def child_seed(seed: Union[int, List[int]], idx: int) -> List[int]: ...
+def child_seed(seed: Union[int, list[int]], idx: int) -> list[int]: ...
 
 
-def child_seed(seed: Optional[Union[int, List[int]]], idx: int) -> Optional[List[int]]:
+def child_seed(seed: Optional[Union[int, list[int]]], idx: int) -> Optional[list[int]]:
     """Generate a child seed from a parent seed.
 
     Parameters
@@ -27,7 +26,7 @@ def child_seed(seed: Optional[Union[int, List[int]]], idx: int) -> Optional[List
 
     Returns
     -------
-    Optional[List[int]]
+    Optional[list[int]]
         The child seed.
     """
     # See https://numpy.org/doc/stable/reference/random/parallel.html#sequence-of-integer-seeds
diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py
index 04c05b6a39..d67d8e50fd 100644
--- a/deepmd/dpmodel/utils/type_embed.py
+++ b/deepmd/dpmodel/utils/type_embed.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -51,7 +50,7 @@ class TypeEmbedNet(NativeOP):
         Whether to use electronic configuration type embedding.
     use_tebd_bias : bool, Optional
         Whether to use bias in the type embedding layer.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
     """
 
@@ -59,16 +58,16 @@ def __init__(
         self,
         *,
         ntypes: int,
-        neuron: List[int],
+        neuron: list[int],
         resnet_dt: bool = False,
         activation_function: str = "tanh",
         precision: str = "default",
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         padding: bool = False,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
     ) -> None:
         self.ntypes = ntypes
         self.neuron = neuron
@@ -162,7 +161,7 @@ def serialize(self) -> dict:
         }
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
diff --git a/deepmd/dpmodel/utils/update_sel.py b/deepmd/dpmodel/utils/update_sel.py
index dc38a6a041..3f2900771f 100644
--- a/deepmd/dpmodel/utils/update_sel.py
+++ b/deepmd/dpmodel/utils/update_sel.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Type,
-)
 
 from deepmd.dpmodel.utils.neighbor_stat import (
     NeighborStat,
@@ -13,5 +10,5 @@
 
 class UpdateSel(BaseUpdateSel):
     @property
-    def neighbor_stat(self) -> Type[NeighborStat]:
+    def neighbor_stat(self) -> type[NeighborStat]:
         return NeighborStat
diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
index ba2eb90247..05f660cb9a 100644
--- a/deepmd/entrypoints/main.py
+++ b/deepmd/entrypoints/main.py
@@ -43,7 +43,7 @@ def main(args: argparse.Namespace):
 
     Parameters
     ----------
-    args : List[str] or argparse.Namespace, optional
+    args : list[str] or argparse.Namespace, optional
         list of command line arguments, used to avoid calling from the subprocess,
         as it is quite slow to import tensorflow; if Namespace is given, it will
         be used directly
diff --git a/deepmd/entrypoints/neighbor_stat.py b/deepmd/entrypoints/neighbor_stat.py
index 8840851b91..62dceb24fd 100644
--- a/deepmd/entrypoints/neighbor_stat.py
+++ b/deepmd/entrypoints/neighbor_stat.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
-    List,
     Optional,
 )
 
@@ -22,7 +21,7 @@ def neighbor_stat(
     *,
     system: str,
     rcut: float,
-    type_map: Optional[List[str]],
+    type_map: Optional[list[str]],
     mixed_type: bool = False,
     backend: str = "tensorflow",
     **kwargs,
diff --git a/deepmd/entrypoints/show.py b/deepmd/entrypoints/show.py
index 6f72c4614d..4cad5f312c 100644
--- a/deepmd/entrypoints/show.py
+++ b/deepmd/entrypoints/show.py
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
-from typing import (
-    List,
-)
 
 from deepmd.infer.deep_eval import (
     DeepEval,
@@ -14,7 +11,7 @@
 def show(
     *,
     INPUT: str,
-    ATTRIBUTES: List[str],
+    ATTRIBUTES: list[str],
     **kwargs,
 ):
     model = DeepEval(INPUT, head=0)
diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index 6709a9cb29..ad445fdea1 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -7,10 +7,7 @@
 )
 from typing import (
     TYPE_CHECKING,
-    Dict,
-    List,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -266,7 +263,7 @@ def test_ener(
     detail_file: Optional[str],
     has_atom_ener: bool,
     append_detail: bool = False,
-) -> Tuple[List[np.ndarray], List[int]]:
+) -> tuple[list[np.ndarray], list[int]]:
     """Test energy type model.
 
     Parameters
@@ -288,7 +285,7 @@ def test_ener(
 
     Returns
     -------
-    Tuple[List[np.ndarray], List[int]]
+    tuple[list[np.ndarray], list[int]]
         arrays with results and their shapes
     """
     data.add("energy", 1, atomic=False, must=False, high_prec=True)
@@ -564,7 +561,7 @@ def test_ener(
         }
 
 
-def print_ener_sys_avg(avg: Dict[str, float]):
+def print_ener_sys_avg(avg: dict[str, float]):
     """Print errors summary for energy type potential.
 
     Parameters
@@ -598,7 +595,7 @@ def test_dos(
     detail_file: Optional[str],
     has_atom_dos: bool,
     append_detail: bool = False,
-) -> Tuple[List[np.ndarray], List[int]]:
+) -> tuple[list[np.ndarray], list[int]]:
     """Test DOS type model.
 
     Parameters
@@ -620,7 +617,7 @@ def test_dos(
 
     Returns
     -------
-    Tuple[List[np.ndarray], List[int]]
+    tuple[list[np.ndarray], list[int]]
         arrays with results and their shapes
     """
     data.add("dos", dp.numb_dos, atomic=False, must=True, high_prec=True)
@@ -736,7 +733,7 @@ def test_dos(
     }
 
 
-def print_dos_sys_avg(avg: Dict[str, float]):
+def print_dos_sys_avg(avg: dict[str, float]):
     """Print errors summary for DOS type potential.
 
     Parameters
@@ -758,7 +755,7 @@ def test_property(
     detail_file: Optional[str],
     has_atom_property: bool,
     append_detail: bool = False,
-) -> Tuple[List[np.ndarray], List[int]]:
+) -> tuple[list[np.ndarray], list[int]]:
     """Test Property type model.
 
     Parameters
@@ -780,7 +777,7 @@ def test_property(
 
     Returns
     -------
-    Tuple[List[np.ndarray], List[int]]
+    tuple[list[np.ndarray], list[int]]
         arrays with results and their shapes
     """
     data.add("property", dp.task_dim, atomic=False, must=True, high_prec=True)
@@ -890,7 +887,7 @@ def test_property(
     }
 
 
-def print_property_sys_avg(avg: Dict[str, float]):
+def print_property_sys_avg(avg: dict[str, float]):
     """Print errors summary for Property type potential.
 
     Parameters
@@ -940,7 +937,7 @@ def test_wfc(
     data: DeepmdData,
     numb_test: int,
     detail_file: Optional[str],
-) -> Tuple[List[np.ndarray], List[int]]:
+) -> tuple[list[np.ndarray], list[int]]:
     """Test energy type model.
 
     Parameters
@@ -956,7 +953,7 @@ def test_wfc(
 
     Returns
     -------
-    Tuple[List[np.ndarray], List[int]]
+    tuple[list[np.ndarray], list[int]]
         arrays with results and their shapes
     """
     data.add(
@@ -1004,7 +1001,7 @@ def test_polar(
     detail_file: Optional[str],
     *,
     atomic: bool,
-) -> Tuple[List[np.ndarray], List[int]]:
+) -> tuple[list[np.ndarray], list[int]]:
     """Test energy type model.
 
     Parameters
@@ -1022,7 +1019,7 @@ def test_polar(
 
     Returns
     -------
-    Tuple[List[np.ndarray], List[int]]
+    tuple[list[np.ndarray], list[int]]
         arrays with results and their shapes
     """
     data.add(
@@ -1145,7 +1142,7 @@ def test_dipole(
     numb_test: int,
     detail_file: Optional[str],
     atomic: bool,
-) -> Tuple[List[np.ndarray], List[int]]:
+) -> tuple[list[np.ndarray], list[int]]:
     """Test energy type model.
 
     Parameters
@@ -1163,7 +1160,7 @@ def test_dipole(
 
     Returns
     -------
-    Tuple[List[np.ndarray], List[int]]
+    tuple[list[np.ndarray], list[int]]
         arrays with results and their shapes
     """
     data.add(
diff --git a/deepmd/env.py b/deepmd/env.py
index 139e95b824..605dfeed99 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -7,10 +7,6 @@
 from pathlib import (
     Path,
 )
-from typing import (
-    Dict,
-    Tuple,
-)
 
 import numpy as np
 
@@ -105,7 +101,7 @@ def set_default_nthreads():
         set_env_if_empty("DP_INTER_OP_PARALLELISM_THREADS", "0", verbose=False)
 
 
-def get_default_nthreads() -> Tuple[int, int]:
+def get_default_nthreads() -> tuple[int, int]:
     """Get paralellism settings.
 
     The method will first read the environment variables with the prefix `DP_`.
@@ -114,7 +110,7 @@ def get_default_nthreads() -> Tuple[int, int]:
 
     Returns
     -------
-    Tuple[int, int]
+    tuple[int, int]
         number of `DP_INTRA_OP_PARALLELISM_THREADS` and
         `DP_INTER_OP_PARALLELISM_THREADS`
     """
@@ -133,7 +129,7 @@ def get_default_nthreads() -> Tuple[int, int]:
 
 def _get_package_constants(
     config_file: Path = CONFIG_FILE,
-) -> Dict[str, str]:
+) -> dict[str, str]:
     """Read package constants set at compile time by CMake to dictionary.
 
     Parameters
@@ -143,7 +139,7 @@ def _get_package_constants(
 
     Returns
     -------
-    Dict[str, str]
+    dict[str, str]
         dictionary with package constants
     """
     if not config_file.is_file():
diff --git a/deepmd/infer/deep_dos.py b/deepmd/infer/deep_dos.py
index b26555627f..0d7ccee2b6 100644
--- a/deepmd/infer/deep_dos.py
+++ b/deepmd/infer/deep_dos.py
@@ -1,9 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Any,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -64,13 +62,13 @@ def eval(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: Union[List[int], np.ndarray],
+        atom_types: Union[list[int], np.ndarray],
         atomic: bool = False,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
         mixed_type: bool = False,
         **kwargs: Any,
-    ) -> Tuple[np.ndarray, ...]:
+    ) -> tuple[np.ndarray, ...]:
         """Evaluate energy, force, and virial. If atomic is True,
         also return atomic energy and atomic virial.
 
@@ -81,7 +79,7 @@ def eval(
         cells : np.ndarray
             The cell vectors of the system, in shape (nframes, 9). If the system
             is not periodic, set it to None.
-        atom_types : List[int] or np.ndarray
+        atom_types : list[int] or np.ndarray
             The types of the atoms. If mixed_type is False, the shape is (natoms,);
             otherwise, the shape is (nframes, natoms).
         atomic : bool, optional
@@ -92,7 +90,7 @@ def eval(
             The atomic parameters, by default None.
         mixed_type : bool, optional
             Whether the atom_types is mixed type, by default False.
-        **kwargs : Dict[str, Any]
+        **kwargs : dict[str, Any]
             Keyword arguments.
 
         Returns
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index f35094df3d..4d0134c37c 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -7,11 +7,7 @@
     TYPE_CHECKING,
     Any,
     ClassVar,
-    Dict,
-    List,
     Optional,
-    Tuple,
-    Type,
     Union,
 )
 
@@ -111,7 +107,7 @@ def eval(
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
         **kwargs: Any,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Evaluate the energy, force and virial by using this DP.
 
         Parameters
@@ -158,7 +154,7 @@ def get_ntypes(self) -> int:
         """Get the number of atom types of this model."""
 
     @abstractmethod
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map (element name of the atom types) of this model."""
 
     @abstractmethod
@@ -256,11 +252,11 @@ def _check_mixed_types(self, atom_types: np.ndarray) -> bool:
 
     @property
     @abstractmethod
-    def model_type(self) -> Type["DeepEval"]:
+    def model_type(self) -> type["DeepEval"]:
         """The the evaluator of the model type."""
 
     @abstractmethod
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
@@ -355,7 +351,7 @@ def get_ntypes(self) -> int:
         """Get the number of atom types of this model."""
         return self.deep_eval.get_ntypes()
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map (element name of the atom types) of this model."""
         return self.deep_eval.get_type_map()
 
@@ -372,7 +368,7 @@ def _get_natoms_and_nframes(
         coords: np.ndarray,
         atom_types: np.ndarray,
         mixed_type: bool = False,
-    ) -> Tuple[int, int]:
+    ) -> tuple[int, int]:
         if mixed_type or atom_types.ndim > 1:
             natoms = len(atom_types[0])
         else:
@@ -525,7 +521,7 @@ def _standard_input(self, coords, cells, atom_types, fparam, aparam, mixed_type)
                 )
         return coords, cells, atom_types, fparam, aparam, nframes, natoms
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
diff --git a/deepmd/infer/deep_polar.py b/deepmd/infer/deep_polar.py
index 22561a0685..7220e53637 100644
--- a/deepmd/infer/deep_polar.py
+++ b/deepmd/infer/deep_polar.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -51,7 +50,7 @@ def eval(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: Union[List[int], np.ndarray],
+        atom_types: Union[list[int], np.ndarray],
         atomic: bool = False,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py
index 0632fd1c84..4755bc276a 100644
--- a/deepmd/infer/deep_pot.py
+++ b/deepmd/infer/deep_pot.py
@@ -1,10 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Any,
-    List,
     Literal,
     Optional,
-    Tuple,
     Union,
     overload,
 )
@@ -95,13 +93,13 @@ def eval(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: Union[List[int], np.ndarray],
+        atom_types: Union[list[int], np.ndarray],
         atomic: Literal[True],
         fparam: Optional[np.ndarray],
         aparam: Optional[np.ndarray],
         mixed_type: bool,
         **kwargs: Any,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         pass
 
     @overload
@@ -109,13 +107,13 @@ def eval(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: Union[List[int], np.ndarray],
+        atom_types: Union[list[int], np.ndarray],
         atomic: Literal[False],
         fparam: Optional[np.ndarray],
         aparam: Optional[np.ndarray],
         mixed_type: bool,
         **kwargs: Any,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         pass
 
     @overload
@@ -123,26 +121,26 @@ def eval(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: Union[List[int], np.ndarray],
+        atom_types: Union[list[int], np.ndarray],
         atomic: bool,
         fparam: Optional[np.ndarray],
         aparam: Optional[np.ndarray],
         mixed_type: bool,
         **kwargs: Any,
-    ) -> Tuple[np.ndarray, ...]:
+    ) -> tuple[np.ndarray, ...]:
         pass
 
     def eval(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: Union[List[int], np.ndarray],
+        atom_types: Union[list[int], np.ndarray],
         atomic: bool = False,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
         mixed_type: bool = False,
         **kwargs: Any,
-    ) -> Tuple[np.ndarray, ...]:
+    ) -> tuple[np.ndarray, ...]:
         """Evaluate energy, force, and virial. If atomic is True,
         also return atomic energy and atomic virial.
 
@@ -153,7 +151,7 @@ def eval(
         cells : np.ndarray
             The cell vectors of the system, in shape (nframes, 9). If the system
             is not periodic, set it to None.
-        atom_types : List[int] or np.ndarray
+        atom_types : list[int] or np.ndarray
             The types of the atoms. If mixed_type is False, the shape is (natoms,);
             otherwise, the shape is (nframes, natoms).
         atomic : bool, optional
@@ -164,7 +162,7 @@ def eval(
             The atomic parameters, by default None.
         mixed_type : bool, optional
             Whether the atom_types is mixed type, by default False.
-        **kwargs : Dict[str, Any]
+        **kwargs : dict[str, Any]
             Keyword arguments.
 
         Returns
diff --git a/deepmd/infer/deep_property.py b/deepmd/infer/deep_property.py
index 5376fb1efc..4a3283cf32 100644
--- a/deepmd/infer/deep_property.py
+++ b/deepmd/infer/deep_property.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -69,13 +66,13 @@ def eval(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: Union[List[int], np.ndarray],
+        atom_types: Union[list[int], np.ndarray],
         atomic: bool = False,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
         mixed_type: bool = False,
-        **kwargs: Dict[str, Any],
-    ) -> Tuple[np.ndarray, ...]:
+        **kwargs: dict[str, Any],
+    ) -> tuple[np.ndarray, ...]:
         """Evaluate properties. If atomic is True, also return atomic property.
 
         Parameters
@@ -85,7 +82,7 @@ def eval(
         cells : np.ndarray
             The cell vectors of the system, in shape (nframes, 9). If the system
             is not periodic, set it to None.
-        atom_types : List[int] or np.ndarray
+        atom_types : list[int] or np.ndarray
             The types of the atoms. If mixed_type is False, the shape is (natoms,);
             otherwise, the shape is (nframes, natoms).
         atomic : bool, optional
@@ -96,7 +93,7 @@ def eval(
             The atomic parameters, by default None.
         mixed_type : bool, optional
             Whether the atom_types is mixed type, by default False.
-        **kwargs : Dict[str, Any]
+        **kwargs : dict[str, Any]
             Keyword arguments.
 
         Returns
diff --git a/deepmd/infer/deep_tensor.py b/deepmd/infer/deep_tensor.py
index 48918e7c75..bb5bc12697 100644
--- a/deepmd/infer/deep_tensor.py
+++ b/deepmd/infer/deep_tensor.py
@@ -3,9 +3,7 @@
     abstractmethod,
 )
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -44,7 +42,7 @@ def eval(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: Union[List[int], np.ndarray],
+        atom_types: Union[list[int], np.ndarray],
         atomic: bool = True,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
@@ -119,7 +117,7 @@ def eval_full(
         aparam: Optional[np.ndarray] = None,
         mixed_type: bool = False,
         **kwargs: dict,
-    ) -> Tuple[np.ndarray, ...]:
+    ) -> tuple[np.ndarray, ...]:
         """Evaluate the model with interface similar to the energy model.
         Will return global tensor, component-wise force and virial
         and optionally atomic tensor and atomic virial.
@@ -250,7 +248,7 @@ def eval_full(
         aparam: Optional[np.ndarray] = None,
         mixed_type: bool = False,
         **kwargs: dict,
-    ) -> Tuple[np.ndarray, ...]:
+    ) -> tuple[np.ndarray, ...]:
         """Unsupported method."""
         raise RuntimeError(
             "This model does not support eval_full method. Use eval instead."
diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
index 83708c7114..29e1eec741 100644
--- a/deepmd/infer/model_devi.py
+++ b/deepmd/infer/model_devi.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Optional,
-    Tuple,
     overload,
 )
 
@@ -29,7 +28,7 @@ def calc_model_devi_f(
     real_f: Optional[np.ndarray] = None,
     relative: Optional[float] = None,
     atomic: Literal[False] = ...,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
 
 
 @overload
@@ -38,7 +37,7 @@ def calc_model_devi_f(
     real_f: Optional[np.ndarray] = None,
     relative: Optional[float] = None,
     atomic: Literal[True] = ...,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: ...
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: ...
 
 
 @overload
@@ -47,7 +46,7 @@ def calc_model_devi_f(
     real_f: Optional[np.ndarray] = None,
     relative: Optional[float] = None,
     atomic: bool = False,
-) -> Tuple[np.ndarray, ...]: ...
+) -> tuple[np.ndarray, ...]: ...
 
 
 def calc_model_devi_f(
@@ -55,7 +54,7 @@ def calc_model_devi_f(
     real_f: Optional[np.ndarray] = None,
     relative: Optional[float] = None,
     atomic: bool = False,
-) -> Tuple[np.ndarray, ...]:
+) -> tuple[np.ndarray, ...]:
     """Calculate model deviation of force.
 
     Parameters
@@ -141,7 +140,7 @@ def calc_model_devi_v(
     vs: np.ndarray,
     real_v: Optional[np.ndarray] = None,
     relative: Optional[float] = None,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
     """Calculate model deviation of virial.
 
     Parameters
diff --git a/deepmd/loggers/training.py b/deepmd/loggers/training.py
index 954473e309..b2fff4788b 100644
--- a/deepmd/loggers/training.py
+++ b/deepmd/loggers/training.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Dict,
     Optional,
 )
 
@@ -16,7 +15,7 @@ def format_training_message(
 def format_training_message_per_task(
     batch: int,
     task_name: str,
-    rmse: Dict[str, float],
+    rmse: dict[str, float],
     learning_rate: Optional[float],
 ):
     if task_name:
diff --git a/deepmd/main.py b/deepmd/main.py
index c271152a06..60b8da2850 100644
--- a/deepmd/main.py
+++ b/deepmd/main.py
@@ -14,10 +14,7 @@
     defaultdict,
 )
 from typing import (
-    Dict,
-    List,
     Optional,
-    Type,
 )
 
 from deepmd.backend.backend import (
@@ -57,10 +54,10 @@ class RawTextArgumentDefaultsHelpFormatter(
     """This formatter is used to print multile-line help message with default value."""
 
 
-BACKENDS: Dict[str, Type[Backend]] = Backend.get_backends_by_feature(
+BACKENDS: dict[str, type[Backend]] = Backend.get_backends_by_feature(
     Backend.Feature.ENTRY_POINT
 )
-BACKEND_TABLE: Dict[str, str] = {kk: vv.name.lower() for kk, vv in BACKENDS.items()}
+BACKEND_TABLE: dict[str, str] = {kk: vv.name.lower() for kk, vv in BACKENDS.items()}
 
 
 class BackendOption(argparse.Action):
@@ -130,7 +127,7 @@ def main_parser() -> argparse.ArgumentParser:
         ),
     )
 
-    BACKEND_ALIAS: Dict[str, List[str]] = defaultdict(list)
+    BACKEND_ALIAS: dict[str, list[str]] = defaultdict(list)
     for alias, backend in BACKEND_TABLE.items():
         BACKEND_ALIAS[backend].append(alias)
     for backend, alias in BACKEND_ALIAS.items():
@@ -856,12 +853,12 @@ def main_parser() -> argparse.ArgumentParser:
     return parser
 
 
-def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
+def parse_args(args: Optional[list[str]] = None) -> argparse.Namespace:
     """Parse arguments and convert argument strings to objects.
 
     Parameters
     ----------
-    args : List[str]
+    args : list[str]
         list of command line arguments, main purpose is testing default option None
         takes arguments from sys.argv
 
@@ -880,12 +877,12 @@ def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
     return parsed_args
 
 
-def main(args: Optional[List[str]] = None):
+def main(args: Optional[list[str]] = None):
     """DeePMD-kit new entry point.
 
     Parameters
     ----------
-    args : List[str]
+    args : list[str]
         list of command line arguments, main purpose is testing default option None
         takes arguments from sys.argv
 
diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index 3df05cbb47..a0694c41c5 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -8,7 +8,6 @@
     Path,
 )
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -485,7 +484,7 @@ def change_bias(FLAGS):
 
 
 @record
-def main(args: Optional[Union[List[str], argparse.Namespace]] = None):
+def main(args: Optional[Union[list[str], argparse.Namespace]] = None):
     if not isinstance(args, argparse.Namespace):
         FLAGS = parse_args(args=args)
     else:
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index d5eae71731..538dc65371 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -4,11 +4,7 @@
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
-    Type,
     Union,
 )
 
@@ -170,7 +166,7 @@ def get_ntypes(self) -> int:
         """Get the number of atom types of this model."""
         return len(self.type_map)
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map (element name of the atom types) of this model."""
         return self.type_map
 
@@ -186,7 +182,7 @@ def get_intensive(self) -> bool:
         return self.dp.model["Default"].get_intensive()
 
     @property
-    def model_type(self) -> Type["DeepEvalWrapper"]:
+    def model_type(self) -> type["DeepEvalWrapper"]:
         """The the evaluator of the model type."""
         model_output_type = self.dp.model["Default"].model_output_type()
         if "energy" in model_output_type:
@@ -206,7 +202,7 @@ def model_type(self) -> Type["DeepEvalWrapper"]:
         else:
             raise RuntimeError("Unknown model type")
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
@@ -244,7 +240,7 @@ def eval(
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
         **kwargs: Any,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Evaluate the energy, force and virial by using this DP.
 
         Parameters
@@ -311,7 +307,7 @@ def eval(
             )
         )
 
-    def _get_request_defs(self, atomic: bool) -> List[OutputVariableDef]:
+    def _get_request_defs(self, atomic: bool) -> list[OutputVariableDef]:
         """Get the requested output definitions.
 
         When atomic is True, all output_def are requested.
@@ -376,7 +372,7 @@ def _get_natoms_and_nframes(
         coords: np.ndarray,
         atom_types: np.ndarray,
         mixed_type: bool = False,
-    ) -> Tuple[int, int]:
+    ) -> tuple[int, int]:
         if mixed_type:
             natoms = len(atom_types[0])
         else:
@@ -395,7 +391,7 @@ def _eval_model(
         atom_types: np.ndarray,
         fparam: Optional[np.ndarray],
         aparam: Optional[np.ndarray],
-        request_defs: List[OutputVariableDef],
+        request_defs: list[OutputVariableDef],
     ):
         model = self.dp.to(DEVICE)
 
@@ -476,7 +472,7 @@ def _eval_model_spin(
         spins: np.ndarray,
         fparam: Optional[np.ndarray],
         aparam: Optional[np.ndarray],
-        request_defs: List[OutputVariableDef],
+        request_defs: list[OutputVariableDef],
     ):
         model = self.dp.to(DEVICE)
 
diff --git a/deepmd/pt/loss/dos.py b/deepmd/pt/loss/dos.py
index 7fd2e04ff2..84513b6bf9 100644
--- a/deepmd/pt/loss/dos.py
+++ b/deepmd/pt/loss/dos.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    List,
-)
 
 import torch
 
@@ -230,7 +227,7 @@ def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False
         return model_pred, loss, more_loss
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         label_requirement = []
         if self.has_ados or self.has_acdf:
diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index 092fbc1f76..f40110a749 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
 )
 
@@ -336,7 +335,7 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
         return model_pred, loss, more_loss
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         label_requirement = []
         if self.has_e:
diff --git a/deepmd/pt/loss/ener_spin.py b/deepmd/pt/loss/ener_spin.py
index 78210a778b..09a053451f 100644
--- a/deepmd/pt/loss/ener_spin.py
+++ b/deepmd/pt/loss/ener_spin.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    List,
-)
 
 import torch
 import torch.nn.functional as F
@@ -276,7 +273,7 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
         return model_pred, loss, more_loss
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         label_requirement = []
         if self.has_e:
diff --git a/deepmd/pt/loss/loss.py b/deepmd/pt/loss/loss.py
index 7e26f6571a..1a091e074e 100644
--- a/deepmd/pt/loss/loss.py
+++ b/deepmd/pt/loss/loss.py
@@ -3,9 +3,6 @@
     ABC,
     abstractmethod,
 )
-from typing import (
-    List,
-)
 
 import torch
 
@@ -25,7 +22,7 @@ def forward(self, input_dict, model, label, natoms, learning_rate):
 
     @property
     @abstractmethod
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         pass
 
diff --git a/deepmd/pt/loss/property.py b/deepmd/pt/loss/property.py
index e4f86091bc..ba120e3d6c 100644
--- a/deepmd/pt/loss/property.py
+++ b/deepmd/pt/loss/property.py
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
-from typing import (
-    List,
-)
 
 import torch
 import torch.nn.functional as F
@@ -138,7 +135,7 @@ def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False
         return model_pred, loss, more_loss
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         label_requirement = []
         label_requirement.append(
diff --git a/deepmd/pt/loss/tensor.py b/deepmd/pt/loss/tensor.py
index 3dcf21af1d..32d25cc9f1 100644
--- a/deepmd/pt/loss/tensor.py
+++ b/deepmd/pt/loss/tensor.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    List,
-)
 
 import torch
 
@@ -151,7 +148,7 @@ def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False
         return model_pred, loss, more_loss
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         label_requirement = []
         if self.has_local_weight:
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
index 4742fe66a3..bd3c2b49ab 100644
--- a/deepmd/pt/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -4,10 +4,7 @@
 import logging
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -67,7 +64,7 @@ class BaseAtomicModel(torch.nn.Module, BaseAtomicModel_):
         of the atomic model. Implemented by removing the pairs from the nlist.
     rcond : float, optional
         The condition number for the regression of atomic energy.
-    preset_out_bias : Dict[str, List[Optional[np.ndarray]]], optional
+    preset_out_bias : dict[str, list[Optional[np.ndarray]]], optional
         Specifying atomic energy contribution in vacuum. Given by key:value pairs.
         The value is a list specifying the bias. the elements can be None or np.ndarray of output shape.
         For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
@@ -77,11 +74,11 @@ class BaseAtomicModel(torch.nn.Module, BaseAtomicModel_):
 
     def __init__(
         self,
-        type_map: List[str],
-        atom_exclude_types: List[int] = [],
-        pair_exclude_types: List[Tuple[int, int]] = [],
+        type_map: list[str],
+        atom_exclude_types: list[int] = [],
+        pair_exclude_types: list[tuple[int, int]] = [],
         rcond: Optional[float] = None,
-        preset_out_bias: Optional[Dict[str, np.ndarray]] = None,
+        preset_out_bias: Optional[dict[str, np.ndarray]] = None,
     ):
         torch.nn.Module.__init__(self)
         BaseAtomicModel_.__init__(self)
@@ -94,7 +91,7 @@ def __init__(
     def init_out_stat(self):
         """Initialize the output bias."""
         ntypes = self.get_ntypes()
-        self.bias_keys: List[str] = list(self.fitting_output_def().keys())
+        self.bias_keys: list[str] = list(self.fitting_output_def().keys())
         self.max_out_size = max(
             [self.atomic_output_def()[kk].size for kk in self.bias_keys]
         )
@@ -124,13 +121,13 @@ def __getitem__(self, key):
             raise KeyError(key)
 
     @torch.jit.export
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.type_map
 
     def reinit_atom_exclude(
         self,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
     ):
         self.atom_exclude_types = exclude_types
         if exclude_types == []:
@@ -140,7 +137,7 @@ def reinit_atom_exclude(
 
     def reinit_pair_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.pair_exclude_types = exclude_types
         if exclude_types == []:
@@ -195,8 +192,8 @@ def forward_common_atomic(
         mapping: Optional[torch.Tensor] = None,
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
-    ) -> Dict[str, torch.Tensor]:
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
+    ) -> dict[str, torch.Tensor]:
         """Common interface for atomic inference.
 
         This method accept extended coordinates, extended atom typs, neighbor list,
@@ -276,8 +273,8 @@ def forward(
         mapping: Optional[torch.Tensor] = None,
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
-    ) -> Dict[str, torch.Tensor]:
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
+    ) -> dict[str, torch.Tensor]:
         return self.forward_common_atomic(
             extended_coord,
             extended_atype,
@@ -289,7 +286,7 @@ def forward(
         )
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -354,7 +351,7 @@ def deserialize(cls, data: dict) -> "BaseAtomicModel":
 
     def compute_or_load_stat(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
         """
@@ -362,11 +359,11 @@ def compute_or_load_stat(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         stat_file_path : Optional[DPPath]
@@ -377,7 +374,7 @@ def compute_or_load_stat(
 
     def compute_or_load_out_stat(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
         """
@@ -385,11 +382,11 @@ def compute_or_load_out_stat(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         stat_file_path : Optional[DPPath]
@@ -404,7 +401,7 @@ def compute_or_load_out_stat(
 
     def apply_out_stat(
         self,
-        ret: Dict[str, torch.Tensor],
+        ret: dict[str, torch.Tensor],
         atype: torch.Tensor,
     ):
         """Apply the stat to each atomic output.
@@ -435,11 +432,11 @@ def change_out_bias(
 
         Parameters
         ----------
-        sample_merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         bias_adjust_mode : str
@@ -480,7 +477,9 @@ def _get_forward_wrapper_func(self) -> Callable[..., torch.Tensor]:
         """Get a forward wrapper of the atomic model for output bias calculation."""
 
         def model_forward(coord, atype, box, fparam=None, aparam=None):
-            with torch.no_grad():  # it's essential for pure torch forward function to use auto_batchsize
+            with (
+                torch.no_grad()
+            ):  # it's essential for pure torch forward function to use auto_batchsize
                 (
                     extended_coord,
                     extended_atype,
@@ -520,7 +519,7 @@ def _default_std(self):
 
     def _varsize(
         self,
-        shape: List[int],
+        shape: list[int],
     ) -> int:
         output_size = 1
         len_shape = len(shape)
@@ -532,7 +531,7 @@ def _get_bias_index(
         self,
         kk: str,
     ) -> int:
-        res: List[int] = []
+        res: list[int] = []
         for i, e in enumerate(self.bias_keys):
             if e == kk:
                 res.append(i)
@@ -541,8 +540,8 @@ def _get_bias_index(
 
     def _store_out_stat(
         self,
-        out_bias: Dict[str, torch.Tensor],
-        out_std: Dict[str, torch.Tensor],
+        out_bias: dict[str, torch.Tensor],
+        out_std: dict[str, torch.Tensor],
         add: bool = False,
     ):
         ntypes = self.get_ntypes()
@@ -562,8 +561,8 @@ def _store_out_stat(
 
     def _fetch_out_stat(
         self,
-        keys: List[str],
-    ) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
+        keys: list[str],
+    ) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
         ret_bias = {}
         ret_std = {}
         ntypes = self.get_ntypes()
diff --git a/deepmd/pt/model/atomic_model/dipole_atomic_model.py b/deepmd/pt/model/atomic_model/dipole_atomic_model.py
index 1723a30f2d..aa28294cc5 100644
--- a/deepmd/pt/model/atomic_model/dipole_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dipole_atomic_model.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Dict,
-)
 
 import torch
 
@@ -21,7 +18,7 @@ def __init__(self, descriptor, fitting, type_map, **kwargs):
 
     def apply_out_stat(
         self,
-        ret: Dict[str, torch.Tensor],
+        ret: dict[str, torch.Tensor],
         atype: torch.Tensor,
     ):
         # dipole not applying bias
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index 8def2e48de..936a1fead3 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -3,8 +3,6 @@
 import functools
 import logging
 from typing import (
-    Dict,
-    List,
     Optional,
 )
 
@@ -52,7 +50,7 @@ def __init__(
         self,
         descriptor,
         fitting,
-        type_map: List[str],
+        type_map: list[str],
         **kwargs,
     ):
         super().__init__(type_map, **kwargs)
@@ -79,7 +77,7 @@ def get_rcut(self) -> float:
         """Get the cut-off radius."""
         return self.rcut
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Get the neighbor selection."""
         return self.sel
 
@@ -96,7 +94,7 @@ def mixed_types(self) -> bool:
         return self.descriptor.mixed_types()
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -157,8 +155,8 @@ def forward_atomic(
         mapping: Optional[torch.Tensor] = None,
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
-    ) -> Dict[str, torch.Tensor]:
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
+    ) -> dict[str, torch.Tensor]:
         """Return atomic prediction.
 
         Parameters
@@ -258,7 +256,7 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.fitting_net.get_dim_aparam()
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py
index 3c7692212e..d88c4c3af5 100644
--- a/deepmd/pt/model/atomic_model/linear_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py
@@ -2,10 +2,7 @@
 import copy
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -55,8 +52,8 @@ class LinearEnergyAtomicModel(BaseAtomicModel):
 
     def __init__(
         self,
-        models: List[BaseAtomicModel],
-        type_map: List[str],
+        models: list[BaseAtomicModel],
+        type_map: list[str],
         **kwargs,
     ):
         super().__init__(type_map, **kwargs)
@@ -119,12 +116,12 @@ def get_rcut(self) -> float:
         """Get the cut-off radius."""
         return max(self.get_model_rcuts())
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.type_map
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -140,22 +137,22 @@ def change_type_map(
                 else None,
             )
 
-    def get_model_rcuts(self) -> List[float]:
+    def get_model_rcuts(self) -> list[float]:
         """Get the cut-off radius for each individual models."""
         return [model.get_rcut() for model in self.models]
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         return [max([model.get_nsel() for model in self.models])]
 
-    def get_model_nsels(self) -> List[int]:
+    def get_model_nsels(self) -> list[int]:
         """Get the processed sels for each individual models. Not distinguishing types."""
         return [model.get_nsel() for model in self.models]
 
-    def get_model_sels(self) -> List[List[int]]:
+    def get_model_sels(self) -> list[list[int]]:
         """Get the sels for each individual models."""
         return [model.get_sel() for model in self.models]
 
-    def _sort_rcuts_sels(self) -> Tuple[List[float], List[int]]:
+    def _sort_rcuts_sels(self) -> tuple[list[float], list[int]]:
         # sort the pair of rcut and sels in ascending order, first based on sel, then on rcut.
         zipped = torch.stack(
             [
@@ -168,8 +165,8 @@ def _sort_rcuts_sels(self) -> Tuple[List[float], List[int]]:
         inner_sorted = zipped[inner_sorting]
         outer_sorting = torch.argsort(inner_sorted[:, 0], stable=True)
         outer_sorted = inner_sorted[outer_sorting]
-        sorted_rcuts: List[float] = outer_sorted[:, 0].tolist()
-        sorted_sels: List[int] = outer_sorted[:, 1].to(torch.int64).tolist()
+        sorted_rcuts: list[float] = outer_sorted[:, 0].tolist()
+        sorted_sels: list[int] = outer_sorted[:, 1].to(torch.int64).tolist()
         return sorted_rcuts, sorted_sels
 
     def forward_atomic(
@@ -180,8 +177,8 @@ def forward_atomic(
         mapping: Optional[torch.Tensor] = None,
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
-    ) -> Dict[str, torch.Tensor]:
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
+    ) -> dict[str, torch.Tensor]:
         """Return atomic prediction.
 
         Parameters
@@ -252,7 +249,7 @@ def forward_atomic(
 
     def apply_out_stat(
         self,
-        ret: Dict[str, torch.Tensor],
+        ret: dict[str, torch.Tensor],
         atype: torch.Tensor,
     ):
         """Apply the stat to each atomic output.
@@ -270,16 +267,16 @@ def apply_out_stat(
         return ret
 
     @staticmethod
-    def remap_atype(ori_map: List[str], new_map: List[str]) -> torch.Tensor:
+    def remap_atype(ori_map: list[str], new_map: list[str]) -> torch.Tensor:
         """
         This method is used to map the atype from the common type_map to the original type_map of
         indivial AtomicModels. It creates a index mapping for the conversion.
 
         Parameters
         ----------
-        ori_map : List[str]
+        ori_map : list[str]
             The original type map of an AtomicModel.
-        new_map : List[str]
+        new_map : list[str]
             The common type map of the DPZBLLinearEnergyAtomicModel, created by the `get_type_map` method,
             must be a subset of the ori_map.
 
@@ -335,7 +332,7 @@ def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel":
 
     def _compute_weight(
         self, extended_coord, extended_atype, nlists_
-    ) -> List[torch.Tensor]:
+    ) -> list[torch.Tensor]:
         """This should be a list of user defined weights that matches the number of models to be combined."""
         nmodels = len(self.models)
         nframes, nloc, _ = nlists_[0].shape
@@ -354,7 +351,7 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return max([model.get_dim_aparam() for model in self.models])
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
@@ -383,7 +380,7 @@ def is_aparam_nall(self) -> bool:
 
     def compute_or_load_out_stat(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
         """
@@ -391,11 +388,11 @@ def compute_or_load_out_stat(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         stat_file_path : Optional[DPPath]
@@ -456,7 +453,7 @@ def __init__(
         zbl_model: PairTabAtomicModel,
         sw_rmin: float,
         sw_rmax: float,
-        type_map: List[str],
+        type_map: list[str],
         smin_alpha: Optional[float] = 0.1,
         **kwargs,
     ):
@@ -503,13 +500,13 @@ def _compute_weight(
         self,
         extended_coord: torch.Tensor,
         extended_atype: torch.Tensor,
-        nlists_: List[torch.Tensor],
-    ) -> List[torch.Tensor]:
+        nlists_: list[torch.Tensor],
+    ) -> list[torch.Tensor]:
         """ZBL weight.
 
         Returns
         -------
-        List[torch.Tensor]
+        list[torch.Tensor]
             the atomic ZBL weight for interpolation. (nframes, nloc, 1)
         """
         assert (
diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
index 7ef87524dd..2918bba947 100644
--- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
@@ -2,8 +2,6 @@
 import copy
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -55,7 +53,7 @@ class PairTabAtomicModel(BaseAtomicModel):
         The cutoff radius.
     sel : int or list[int]
         The maxmum number of atoms in the cut-off radius.
-    type_map : List[str]
+    type_map : list[str]
         Mapping atom type to the name (str) of the type.
         For example `type_map[1]` gives the name of the type 1.
     rcond : float, optional
@@ -69,8 +67,8 @@ def __init__(
         self,
         tab_file: str,
         rcut: float,
-        sel: Union[int, List[int]],
-        type_map: List[str],
+        sel: Union[int, list[int]],
+        type_map: list[str],
         **kwargs,
     ):
         super().__init__(type_map, **kwargs)
@@ -87,7 +85,7 @@ def __init__(
             (
                 tab_info,
                 tab_data,
-            ) = self.tab.get()  # this returns -> Tuple[np.array, np.array]
+            ) = self.tab.get()  # this returns -> tuple[np.array, np.array]
             nspline, ntypes_tab = tab_info[-2:].astype(int)
             self.register_buffer("tab_info", torch.from_numpy(tab_info))
             self.register_buffer(
@@ -138,10 +136,10 @@ def get_out_bias(self) -> torch.Tensor:
     def get_rcut(self) -> float:
         return self.rcut
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         return self.type_map
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         return [self.sel]
 
     def get_nsel(self) -> int:
@@ -169,7 +167,7 @@ def need_sorted_nlist_for_lower(self) -> bool:
         return False
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -218,7 +216,7 @@ def deserialize(cls, data) -> "PairTabAtomicModel":
 
     def compute_or_load_stat(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
         """
@@ -226,11 +224,11 @@ def compute_or_load_stat(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         stat_file_path : Optional[DPPath]
@@ -248,8 +246,8 @@ def forward_atomic(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
-    ) -> Dict[str, torch.Tensor]:
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
+    ) -> dict[str, torch.Tensor]:
         nframes, nloc, nnei = nlist.shape
         extended_coord = extended_coord.view(nframes, -1, 3)
         if self.do_grad_r() or self.do_grad_c():
@@ -470,7 +468,7 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return 0
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
diff --git a/deepmd/pt/model/atomic_model/polar_atomic_model.py b/deepmd/pt/model/atomic_model/polar_atomic_model.py
index 81cf8a23b6..39cda2650d 100644
--- a/deepmd/pt/model/atomic_model/polar_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/polar_atomic_model.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Dict,
-)
 
 import torch
 
@@ -21,7 +18,7 @@ def __init__(self, descriptor, fitting, type_map, **kwargs):
 
     def apply_out_stat(
         self,
-        ret: Dict[str, torch.Tensor],
+        ret: dict[str, torch.Tensor],
         atype: torch.Tensor,
     ):
         """Apply the stat to each atomic output.
diff --git a/deepmd/pt/model/atomic_model/property_atomic_model.py b/deepmd/pt/model/atomic_model/property_atomic_model.py
index 1fb8a5957f..2fac90100f 100644
--- a/deepmd/pt/model/atomic_model/property_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/property_atomic_model.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Dict,
-)
 
 import torch
 
@@ -21,7 +18,7 @@ def __init__(self, descriptor, fitting, type_map, **kwargs):
 
     def apply_out_stat(
         self,
-        ret: Dict[str, torch.Tensor],
+        ret: dict[str, torch.Tensor],
         atype: torch.Tensor,
     ):
         """Apply the stat to each atomic output.
diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py
index 16c3d96301..78a4608108 100644
--- a/deepmd/pt/model/descriptor/descriptor.py
+++ b/deepmd/pt/model/descriptor/descriptor.py
@@ -6,8 +6,6 @@
 )
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -71,7 +69,7 @@ def get_nsel(self) -> int:
         pass
 
     @abstractmethod
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         pass
 
@@ -102,7 +100,7 @@ def get_env_protection(self) -> float:
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -110,11 +108,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -123,7 +121,7 @@ def compute_input_stats(
         """
         raise NotImplementedError
 
-    def get_stats(self) -> Dict[str, StatItem]:
+    def get_stats(self) -> dict[str, StatItem]:
         """Get the statistics of the descriptor."""
         raise NotImplementedError
 
@@ -203,7 +201,7 @@ def extend_descrpt_stat(des, type_map, des_with_stat=None):
     ----------
     des : DescriptorBlock
         The descriptor block to be extended.
-    type_map : List[str]
+    type_map : list[str]
         The name of each type of atoms to be extended.
     des_with_stat : DescriptorBlock, Optional
         The descriptor block has additional statistics of types from newly provided `type_map`.
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index 14767cb100..617e8b49b6 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -157,7 +154,7 @@ class DescrptDPA1(BaseDescriptor, torch.nn.Module):
             (Only support False to keep consistent with other backend references.)
             (Not used in this version. True option is not implemented.)
             If mask the diagonal of attention weights
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     env_protection: float
@@ -191,7 +188,7 @@ class DescrptDPA1(BaseDescriptor, torch.nn.Module):
             Whether to use electronic configuration type embedding.
     use_tebd_bias : bool, Optional
             Whether to use bias in the type embedding layer.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     spin
             (Only support None to keep consistent with other backend references.)
@@ -215,7 +212,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
         neuron: list = [25, 50, 100],
         axis_neuron: int = 16,
@@ -229,7 +226,7 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = "float64",
         resnet_dt: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         scaling_factor: int = 1.0,
         normalize=True,
@@ -241,10 +238,10 @@ def __init__(
         smooth_type_embedding: bool = True,
         type_one_side: bool = False,
         stripped_type_embedding: Optional[bool] = None,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         # not implemented
         spin=None,
         type: Optional[str] = None,
@@ -326,7 +323,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return self.se_atten.get_nsel()
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.se_atten.get_sel()
 
@@ -334,7 +331,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.se_atten.get_ntypes()
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
@@ -405,7 +402,7 @@ def dim_emb(self):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -413,11 +410,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -435,12 +432,12 @@ def set_stat_mean_and_stddev(
         self.se_atten.mean = mean
         self.se_atten.stddev = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[torch.Tensor, torch.Tensor]:
+    def get_stat_mean_and_stddev(self) -> tuple[torch.Tensor, torch.Tensor]:
         """Get mean and stddev for descriptor."""
         return self.se_atten.mean, self.se_atten.stddev
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -562,7 +559,7 @@ def forward(
         extended_atype: torch.Tensor,
         nlist: torch.Tensor,
         mapping: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         """Compute the descriptor.
 
@@ -617,9 +614,9 @@ def forward(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index 9fc4fc4a21..f1ef200b09 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -87,14 +84,14 @@ def __init__(
         concat_output_tebd: bool = True,
         precision: str = "float64",
         smooth: bool = True,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         add_tebd_to_repinit_out: bool = False,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         old_impl: bool = False,
     ):
         r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
@@ -111,7 +108,7 @@ def __init__(
             The precision of the embedding net parameters.
         smooth : bool, optional
             Whether to use smoothness in processes such as attention weights calculation.
-        exclude_types : List[List[int]], optional
+        exclude_types : list[list[int]], optional
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
         env_protection : float, optional
@@ -127,7 +124,7 @@ def __init__(
             Whether to use electronic configuration type embedding.
         use_tebd_bias : bool, Optional
             Whether to use bias in the type embedding layer.
-        type_map : List[str], Optional
+        type_map : list[str], Optional
             A list of strings. Give the name to each type of atoms.
 
         Returns
@@ -324,7 +321,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -332,7 +329,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
@@ -423,7 +420,7 @@ def share_params(self, base_class, shared_level, resume=False):
             raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -488,7 +485,7 @@ def dim_emb(self):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -496,11 +493,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -515,8 +512,8 @@ def compute_input_stats(
 
     def set_stat_mean_and_stddev(
         self,
-        mean: List[torch.Tensor],
-        stddev: List[torch.Tensor],
+        mean: list[torch.Tensor],
+        stddev: list[torch.Tensor],
     ) -> None:
         """Update mean and stddev for descriptor."""
         descrpt_list = [self.repinit, self.repformers]
@@ -526,7 +523,7 @@ def set_stat_mean_and_stddev(
             descrpt.mean = mean[ii]
             descrpt.stddev = stddev[ii]
 
-    def get_stat_mean_and_stddev(self) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+    def get_stat_mean_and_stddev(self) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
         """Get mean and stddev for descriptor."""
         mean_list = [self.repinit.mean, self.repformers.mean]
         stddev_list = [
@@ -711,7 +708,7 @@ def forward(
         extended_atype: torch.Tensor,
         nlist: torch.Tensor,
         mapping: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         """Compute the descriptor.
 
@@ -816,9 +813,9 @@ def forward(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/pt/model/descriptor/gaussian_lcc.py b/deepmd/pt/model/descriptor/gaussian_lcc.py
index 2ae14bd432..8ac52215c0 100644
--- a/deepmd/pt/model/descriptor/gaussian_lcc.py
+++ b/deepmd/pt/model/descriptor/gaussian_lcc.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
 )
 
@@ -162,7 +161,7 @@ def dim_emb(self):
         """Returns the output dimension of pair representation."""
         return self.pair_embed_dim
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         pass
 
diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index 7156396c48..c8730e3465 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -2,10 +2,7 @@
 import math
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -38,16 +35,16 @@ class DescrptHybrid(BaseDescriptor, torch.nn.Module):
 
     Parameters
     ----------
-    list : list : List[Union[BaseDescriptor, Dict[str, Any]]]
+    list : list : list[Union[BaseDescriptor, dict[str, Any]]]
         Build a descriptor from the concatenation of the list of descriptors.
         The descriptor can be either an object or a dictionary.
     """
 
-    nlist_cut_idx: List[torch.Tensor]
+    nlist_cut_idx: list[torch.Tensor]
 
     def __init__(
         self,
-        list: List[Union[BaseDescriptor, Dict[str, Any]]],
+        list: list[Union[BaseDescriptor, dict[str, Any]]],
         **kwargs,
     ) -> None:
         super().__init__()
@@ -57,7 +54,7 @@ def __init__(
             raise RuntimeError(
                 "cannot build descriptor from an empty list of descriptors."
             )
-        formatted_descript_list: List[BaseDescriptor] = []
+        formatted_descript_list: list[BaseDescriptor] = []
         for ii in descrpt_list:
             if isinstance(ii, BaseDescriptor):
                 formatted_descript_list.append(ii)
@@ -75,7 +72,7 @@ def __init__(
                 self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes()
             ), f"number of atom types in {ii}th descrptor does not match others"
         # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type
-        self.nlist_cut_idx: List[torch.Tensor] = []
+        self.nlist_cut_idx: list[torch.Tensor] = []
         if self.mixed_types() and not all(
             descrpt.mixed_types() for descrpt in self.descrpt_list
         ):
@@ -114,7 +111,7 @@ def get_rcut_smth(self) -> float:
         # Note: Using the minimum rcut_smth might not be appropriate in all scenarios. Consider using a different approach or provide detailed documentation on why the minimum value is chosen.
         return min([descrpt.get_rcut_smth() for descrpt in self.descrpt_list])
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         if self.mixed_types():
             return [
@@ -131,7 +128,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.descrpt_list[0].get_ntypes()
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.descrpt_list[0].get_type_map()
 
@@ -185,7 +182,7 @@ def share_params(self, base_class, shared_level, resume=False):
             raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -198,15 +195,15 @@ def change_type_map(
                 else None,
             )
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         for descrpt in self.descrpt_list:
             descrpt.compute_input_stats(merged, path)
 
     def set_stat_mean_and_stddev(
         self,
-        mean: List[Union[torch.Tensor, List[torch.Tensor]]],
-        stddev: List[Union[torch.Tensor, List[torch.Tensor]]],
+        mean: list[Union[torch.Tensor, list[torch.Tensor]]],
+        stddev: list[Union[torch.Tensor, list[torch.Tensor]]],
     ) -> None:
         """Update mean and stddev for descriptor."""
         for ii, descrpt in enumerate(self.descrpt_list):
@@ -214,9 +211,9 @@ def set_stat_mean_and_stddev(
 
     def get_stat_mean_and_stddev(
         self,
-    ) -> Tuple[
-        List[Union[torch.Tensor, List[torch.Tensor]]],
-        List[Union[torch.Tensor, List[torch.Tensor]]],
+    ) -> tuple[
+        list[Union[torch.Tensor, list[torch.Tensor]]],
+        list[Union[torch.Tensor, list[torch.Tensor]]],
     ]:
         """Get mean and stddev for descriptor."""
         mean_list = []
@@ -233,7 +230,7 @@ def forward(
         atype_ext: torch.Tensor,
         nlist: torch.Tensor,
         mapping: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         """Compute the descriptor.
 
@@ -303,9 +300,9 @@ def forward(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py
index 579dc0c81e..92e2404469 100644
--- a/deepmd/pt/model/descriptor/repformer_layer.py
+++ b/deepmd/pt/model/descriptor/repformer_layer.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -44,7 +43,7 @@ def get_residual(
     _mode: str = "norm",
     trainable: bool = True,
     precision: str = "float64",
-    seed: Optional[Union[int, List[int]]] = None,
+    seed: Optional[Union[int, list[int]]] = None,
 ) -> torch.Tensor:
     r"""
     Get residual tensor for one update vector.
@@ -160,7 +159,7 @@ def __init__(
         smooth: bool = True,
         attnw_shift: float = 20.0,
         precision: str = "float64",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         """Return neighbor-wise multi-head self-attention maps, with gate mechanism."""
         super().__init__()
@@ -285,7 +284,7 @@ def __init__(
         input_dim: int,
         head_num: int,
         precision: str = "float64",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.input_dim = input_dim
@@ -370,7 +369,7 @@ def __init__(
         input_dim: int,
         head_num: int,
         precision: str = "float64",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.input_dim = input_dim
@@ -443,7 +442,7 @@ def __init__(
         smooth: bool = True,
         attnw_shift: float = 20.0,
         precision: str = "float64",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.input_dim = input_dim
@@ -602,7 +601,7 @@ def __init__(
         use_sqrt_nnei: bool = True,
         g1_out_conv: bool = True,
         g1_out_mlp: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.epsilon = 1e-4  # protection of 1./nnei
@@ -1132,10 +1131,10 @@ def forward(
         assert (nb, nloc) == g1.shape[:2]
         assert (nb, nloc, nnei) == h2.shape[:3]
 
-        g2_update: List[torch.Tensor] = [g2]
-        h2_update: List[torch.Tensor] = [h2]
-        g1_update: List[torch.Tensor] = [g1]
-        g1_mlp: List[torch.Tensor] = [g1] if not self.g1_out_mlp else []
+        g2_update: list[torch.Tensor] = [g2]
+        h2_update: list[torch.Tensor] = [h2]
+        g1_update: list[torch.Tensor] = [g1]
+        g1_mlp: list[torch.Tensor] = [g1] if not self.g1_out_mlp else []
         if self.g1_out_mlp:
             assert self.g1_self_mlp is not None
             g1_self_mlp = self.act(self.g1_self_mlp(g1))
@@ -1236,7 +1235,7 @@ def forward(
     @torch.jit.export
     def list_update_res_avg(
         self,
-        update_list: List[torch.Tensor],
+        update_list: list[torch.Tensor],
     ) -> torch.Tensor:
         nitem = len(update_list)
         uu = update_list[0]
@@ -1245,7 +1244,7 @@ def list_update_res_avg(
         return uu / (float(nitem) ** 0.5)
 
     @torch.jit.export
-    def list_update_res_incr(self, update_list: List[torch.Tensor]) -> torch.Tensor:
+    def list_update_res_incr(self, update_list: list[torch.Tensor]) -> torch.Tensor:
         nitem = len(update_list)
         uu = update_list[0]
         scale = 1.0 / (float(nitem - 1) ** 0.5) if nitem > 1 else 0.0
@@ -1255,7 +1254,7 @@ def list_update_res_incr(self, update_list: List[torch.Tensor]) -> torch.Tensor:
 
     @torch.jit.export
     def list_update_res_residual(
-        self, update_list: List[torch.Tensor], update_name: str = "g1"
+        self, update_list: list[torch.Tensor], update_name: str = "g1"
     ) -> torch.Tensor:
         nitem = len(update_list)
         uu = update_list[0]
@@ -1275,7 +1274,7 @@ def list_update_res_residual(
 
     @torch.jit.export
     def list_update(
-        self, update_list: List[torch.Tensor], update_name: str = "g1"
+        self, update_list: list[torch.Tensor], update_name: str = "g1"
     ) -> torch.Tensor:
         if self.update_style == "res_avg":
             return self.list_update_res_avg(update_list)
diff --git a/deepmd/pt/model/descriptor/repformer_layer_old_impl.py b/deepmd/pt/model/descriptor/repformer_layer_old_impl.py
index 81ee35c9ab..47b20f7b03 100644
--- a/deepmd/pt/model/descriptor/repformer_layer_old_impl.py
+++ b/deepmd/pt/model/descriptor/repformer_layer_old_impl.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Callable,
-    List,
 )
 
 import torch
@@ -634,10 +633,10 @@ def forward(
         if self.update_h2:
             h2 = _apply_h_norm(h2)
 
-        g2_update: List[torch.Tensor] = [g2]
-        h2_update: List[torch.Tensor] = [h2]
-        g1_update: List[torch.Tensor] = [g1]
-        g1_mlp: List[torch.Tensor] = [g1]
+        g2_update: list[torch.Tensor] = [g2]
+        h2_update: list[torch.Tensor] = [h2]
+        g1_update: list[torch.Tensor] = [g1]
+        g1_mlp: list[torch.Tensor] = [g1]
 
         if cal_gg1:
             gg1 = _make_nei_g1(g1_ext, nlist)
@@ -704,7 +703,7 @@ def forward(
     @torch.jit.export
     def list_update_res_avg(
         self,
-        update_list: List[torch.Tensor],
+        update_list: list[torch.Tensor],
     ) -> torch.Tensor:
         nitem = len(update_list)
         uu = update_list[0]
@@ -713,7 +712,7 @@ def list_update_res_avg(
         return uu / (float(nitem) ** 0.5)
 
     @torch.jit.export
-    def list_update_res_incr(self, update_list: List[torch.Tensor]) -> torch.Tensor:
+    def list_update_res_incr(self, update_list: list[torch.Tensor]) -> torch.Tensor:
         nitem = len(update_list)
         uu = update_list[0]
         scale = 1.0 / (float(nitem - 1) ** 0.5) if nitem > 1 else 0.0
@@ -722,7 +721,7 @@ def list_update_res_incr(self, update_list: List[torch.Tensor]) -> torch.Tensor:
         return uu
 
     @torch.jit.export
-    def list_update(self, update_list: List[torch.Tensor]) -> torch.Tensor:
+    def list_update(self, update_list: list[torch.Tensor]) -> torch.Tensor:
         if self.update_style == "res_avg":
             return self.list_update_res_avg(update_list)
         elif self.update_style == "res_incr":
diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
index a9e4ef7893..406758faa6 100644
--- a/deepmd/pt/model/descriptor/repformers.py
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -99,12 +96,12 @@ def __init__(
         update_residual_init: str = "norm",
         set_davg_zero: bool = True,
         smooth: bool = True,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         precision: str = "float64",
         trainable_ln: bool = True,
         ln_eps: Optional[float] = 1e-5,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         use_sqrt_nnei: bool = True,
         g1_out_conv: bool = True,
         g1_out_mlp: bool = True,
@@ -177,7 +174,7 @@ def __init__(
             The precision of the embedding net parameters.
         smooth : bool, optional
             Whether to use smoothness in processes such as attention weights calculation.
-        exclude_types : List[List[int]], optional
+        exclude_types : list[list[int]], optional
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
         env_protection : float, optional
@@ -339,7 +336,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -408,7 +405,7 @@ def dim_emb(self):
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
@@ -420,7 +417,7 @@ def forward(
         extended_atype: torch.Tensor,
         extended_atype_embd: Optional[torch.Tensor] = None,
         mapping: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         if comm_dict is None:
             assert mapping is not None
@@ -530,7 +527,7 @@ def forward(
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -538,11 +535,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -567,7 +564,7 @@ def compute_input_stats(
             self.mean.copy_(torch.tensor(mean, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
         self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
 
-    def get_stats(self) -> Dict[str, StatItem]:
+    def get_stats(self) -> dict[str, StatItem]:
         """Get the statistics of the descriptor."""
         if self.stats is None:
             raise RuntimeError(
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index 44564a6fd3..1b51acfa21 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -3,10 +3,7 @@
 from typing import (
     Callable,
     ClassVar,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -84,14 +81,14 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = "float64",
         resnet_dt: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         old_impl: bool = False,
         type_one_side: bool = True,
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         ntypes: Optional[int] = None,  # to be compat with input
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         # not implemented
         spin=None,
     ):
@@ -130,7 +127,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return self.sea.get_nsel()
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sea.get_sel()
 
@@ -138,7 +135,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.sea.get_ntypes()
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
@@ -192,7 +189,7 @@ def dim_out(self):
         return self.sea.dim_out
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -205,7 +202,7 @@ def change_type_map(
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -213,11 +210,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -228,7 +225,7 @@ def compute_input_stats(
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         """Update the type exclusions."""
         self.sea.reinit_exclude(exclude_types)
@@ -239,7 +236,7 @@ def forward(
         atype_ext: torch.Tensor,
         nlist: torch.Tensor,
         mapping: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         """Compute the descriptor.
 
@@ -284,7 +281,7 @@ def set_stat_mean_and_stddev(
         self.sea.mean = mean
         self.sea.stddev = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[torch.Tensor, torch.Tensor]:
+    def get_stat_mean_and_stddev(self) -> tuple[torch.Tensor, torch.Tensor]:
         """Get mean and stddev for descriptor."""
         return self.sea.mean, self.sea.stddev
 
@@ -342,9 +339,9 @@ def t_cvt(xx):
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -386,12 +383,12 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = "float64",
         resnet_dt: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         old_impl: bool = False,
         type_one_side: bool = True,
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         **kwargs,
     ):
         """Construct an embedding net of type `se_a`.
@@ -484,7 +481,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -548,7 +545,7 @@ def __getitem__(self, key):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -556,11 +553,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -585,7 +582,7 @@ def compute_input_stats(
             self.mean.copy_(torch.tensor(mean, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
         self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
 
-    def get_stats(self) -> Dict[str, StatItem]:
+    def get_stats(self) -> dict[str, StatItem]:
         """Get the statistics of the descriptor."""
         if self.stats is None:
             raise RuntimeError(
@@ -595,7 +592,7 @@ def get_stats(self) -> Dict[str, StatItem]:
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
index 92d6e223e4..c760f7330b 100644
--- a/deepmd/pt/model/descriptor/se_atten.py
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -63,7 +60,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
         neuron: list = [25, 50, 100],
         axis_neuron: int = 16,
@@ -82,11 +79,11 @@ def __init__(
         temperature=None,
         smooth: bool = True,
         type_one_side: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         trainable_ln: bool = True,
         ln_eps: Optional[float] = 1e-5,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         type: Optional[str] = None,
         old_impl: bool = False,
     ):
@@ -134,7 +131,7 @@ def __init__(
             (Only support False to keep consistent with other backend references.)
             (Not used in this version.)
             If mask the diagonal of attention weights
-        exclude_types : List[List[int]]
+        exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
         env_protection : float
@@ -304,7 +301,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -373,7 +370,7 @@ def dim_emb(self):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -381,11 +378,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -410,7 +407,7 @@ def compute_input_stats(
             self.mean.copy_(torch.tensor(mean, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
         self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
 
-    def get_stats(self) -> Dict[str, StatItem]:
+    def get_stats(self) -> dict[str, StatItem]:
         """Get the statistics of the descriptor."""
         if self.stats is None:
             raise RuntimeError(
@@ -420,7 +417,7 @@ def get_stats(self) -> Dict[str, StatItem]:
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
@@ -612,7 +609,7 @@ def __init__(
         ln_eps: float = 1e-5,
         smooth: bool = True,
         precision: str = DEFAULT_PRECISION,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         """Construct a neighbor-wise attention net."""
         super().__init__()
@@ -755,7 +752,7 @@ def __init__(
         trainable_ln: bool = True,
         ln_eps: float = 1e-5,
         precision: str = DEFAULT_PRECISION,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         """Construct a neighbor-wise attention layer."""
         super().__init__()
@@ -862,7 +859,7 @@ def __init__(
         bias: bool = True,
         smooth: bool = True,
         precision: str = DEFAULT_PRECISION,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         """Construct a multi-head neighbor-wise attention net."""
         super().__init__()
diff --git a/deepmd/pt/model/descriptor/se_atten_v2.py b/deepmd/pt/model/descriptor/se_atten_v2.py
index 41e37eb03c..f73ff255e6 100644
--- a/deepmd/pt/model/descriptor/se_atten_v2.py
+++ b/deepmd/pt/model/descriptor/se_atten_v2.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -42,7 +40,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
         neuron: list = [25, 50, 100],
         axis_neuron: int = 16,
@@ -55,7 +53,7 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = "float64",
         resnet_dt: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         scaling_factor: int = 1.0,
         normalize=True,
@@ -66,10 +64,10 @@ def __init__(
         ln_eps: Optional[float] = 1e-5,
         type_one_side: bool = False,
         stripped_type_embedding: Optional[bool] = None,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         # not implemented
         spin=None,
         type: Optional[str] = None,
@@ -113,7 +111,7 @@ def __init__(
         resnet_dt : bool
             Time-step `dt` in the resnet construction:
             y = x + dt * \phi (Wx + b)
-        exclude_types : List[List[int]]
+        exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
         env_protection : float
@@ -149,7 +147,7 @@ def __init__(
             Whether to use electronic configuration type embedding.
         use_tebd_bias : bool, Optional
             Whether to use bias in the type embedding layer.
-        type_map : List[str], Optional
+        type_map : list[str], Optional
             A list of strings. Give the name to each type of atoms.
         spin
             (Only support None to keep consistent with other backend references.)
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index da8d422444..b873ee20b8 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -69,12 +66,12 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = "float64",
         resnet_dt: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         old_impl: bool = False,
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
-        type_map: Optional[List[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
         **kwargs,
     ):
         super().__init__()
@@ -143,7 +140,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -151,7 +148,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
@@ -225,7 +222,7 @@ def share_params(self, base_class, shared_level, resume=False):
             raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -238,7 +235,7 @@ def change_type_map(
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -246,11 +243,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -275,7 +272,7 @@ def compute_input_stats(
             self.mean.copy_(torch.tensor(mean, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
         self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
 
-    def get_stats(self) -> Dict[str, StatItem]:
+    def get_stats(self) -> dict[str, StatItem]:
         """Get the statistics of the descriptor."""
         if self.stats is None:
             raise RuntimeError(
@@ -301,7 +298,7 @@ def __getitem__(self, key):
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
@@ -312,7 +309,7 @@ def forward(
         atype_ext: torch.Tensor,
         nlist: torch.Tensor,
         mapping: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         """Compute the descriptor.
 
@@ -404,7 +401,7 @@ def set_stat_mean_and_stddev(
         self.mean = mean
         self.stddev = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[torch.Tensor, torch.Tensor]:
+    def get_stat_mean_and_stddev(self) -> tuple[torch.Tensor, torch.Tensor]:
         """Get mean and stddev for descriptor."""
         return self.mean, self.stddev
 
@@ -458,9 +455,9 @@ def t_cvt(xx):
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/pt/model/descriptor/se_t.py b/deepmd/pt/model/descriptor/se_t.py
index 5e7e507fbf..072457b48f 100644
--- a/deepmd/pt/model/descriptor/se_t.py
+++ b/deepmd/pt/model/descriptor/se_t.py
@@ -3,10 +3,7 @@
 from typing import (
     Callable,
     ClassVar,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -95,7 +92,7 @@ class DescrptSeT(BaseDescriptor, torch.nn.Module):
             The activation function in the embedding net. Supported options are |ACTIVATION_FN|
     env_protection : float
             Protection parameter to prevent division by zero errors during environment matrix calculations.
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     precision : str
@@ -104,7 +101,7 @@ class DescrptSeT(BaseDescriptor, torch.nn.Module):
             If the weights of embedding net are trainable.
     seed : int, Optional
             Random seed for initializing the network parameters.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -112,17 +109,17 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         resnet_dt: bool = False,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         env_protection: float = 0.0,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         precision: str = "float64",
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
-        type_map: Optional[List[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
         ntypes: Optional[int] = None,  # to be compat with input
         # not implemented
         spin=None,
@@ -159,7 +156,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return self.seat.get_nsel()
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.seat.get_sel()
 
@@ -167,7 +164,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.seat.get_ntypes()
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
@@ -221,7 +218,7 @@ def dim_out(self):
         return self.seat.dim_out
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -234,7 +231,7 @@ def change_type_map(
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -242,11 +239,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -257,7 +254,7 @@ def compute_input_stats(
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         """Update the type exclusions."""
         self.seat.reinit_exclude(exclude_types)
@@ -268,7 +265,7 @@ def forward(
         atype_ext: torch.Tensor,
         nlist: torch.Tensor,
         mapping: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         """Compute the descriptor.
 
@@ -314,7 +311,7 @@ def set_stat_mean_and_stddev(
         self.seat.mean = mean
         self.seat.stddev = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[torch.Tensor, torch.Tensor]:
+    def get_stat_mean_and_stddev(self) -> tuple[torch.Tensor, torch.Tensor]:
         """Get mean and stddev for descriptor."""
         return self.seat.mean, self.seat.stddev
 
@@ -367,9 +364,9 @@ def t_cvt(xx):
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -404,16 +401,16 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         resnet_dt: bool = False,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         env_protection: float = 0.0,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         precision: str = "float64",
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         r"""Construct an embedding net of type `se_e3`.
 
@@ -438,7 +435,7 @@ def __init__(
             The activation function in the embedding net. Supported options are |ACTIVATION_FN|
         env_protection : float
             Protection parameter to prevent division by zero errors during environment matrix calculations.
-        exclude_types : List[List[int]]
+        exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
         precision : str
@@ -511,7 +508,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -575,7 +572,7 @@ def __getitem__(self, key):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -583,11 +580,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -612,7 +609,7 @@ def compute_input_stats(
             self.mean.copy_(torch.tensor(mean, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
         self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
 
-    def get_stats(self) -> Dict[str, StatItem]:
+    def get_stats(self) -> dict[str, StatItem]:
         """Get the statistics of the descriptor."""
         if self.stats is None:
             raise RuntimeError(
@@ -622,7 +619,7 @@ def get_stats(self) -> Dict[str, StatItem]:
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
diff --git a/deepmd/pt/model/descriptor/se_t_tebd.py b/deepmd/pt/model/descriptor/se_t_tebd.py
index 774a9154de..437a464709 100644
--- a/deepmd/pt/model/descriptor/se_t_tebd.py
+++ b/deepmd/pt/model/descriptor/se_t_tebd.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -79,7 +76,7 @@ class DescrptSeTTebd(BaseDescriptor, torch.nn.Module):
             The cut-off radius
     rcut_smth
             From where the environment matrix should be smoothed
-    sel : Union[List[int], int]
+    sel : Union[list[int], int]
             list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius
             int: the total maxmum number of atoms in the cut-off radius
     ntypes : int
@@ -101,7 +98,7 @@ class DescrptSeTTebd(BaseDescriptor, torch.nn.Module):
             The activation function in the embedding net. Supported options are |ACTIVATION_FN|
     env_protection: float
             Protection parameter to prevent division by zero errors during environment matrix calculations.
-    exclude_types : List[Tuple[int, int]]
+    exclude_types : list[tuple[int, int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     precision
@@ -110,7 +107,7 @@ class DescrptSeTTebd(BaseDescriptor, torch.nn.Module):
             If the weights of embedding net are trainable.
     seed
             Random seed for initializing the network parameters.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     concat_output_tebd: bool
             Whether to concat type embedding at the output of the descriptor.
@@ -127,7 +124,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
         neuron: list = [2, 4, 8],
         tebd_dim: int = 8,
@@ -136,11 +133,11 @@ def __init__(
         set_davg_zero: bool = True,
         activation_function: str = "tanh",
         env_protection: float = 0.0,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         precision: str = "float64",
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
-        type_map: Optional[List[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
         concat_output_tebd: bool = True,
         use_econf_tebd: bool = False,
         use_tebd_bias=False,
@@ -195,7 +192,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return self.se_ttebd.get_nsel()
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.se_ttebd.get_sel()
 
@@ -203,7 +200,7 @@ def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.se_ttebd.get_ntypes()
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
@@ -274,7 +271,7 @@ def dim_emb(self):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -282,11 +279,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -304,12 +301,12 @@ def set_stat_mean_and_stddev(
         self.se_ttebd.mean = mean
         self.se_ttebd.stddev = stddev
 
-    def get_stat_mean_and_stddev(self) -> Tuple[torch.Tensor, torch.Tensor]:
+    def get_stat_mean_and_stddev(self) -> tuple[torch.Tensor, torch.Tensor]:
         """Get mean and stddev for descriptor."""
         return self.se_ttebd.mean, self.se_ttebd.stddev
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -410,7 +407,7 @@ def forward(
         extended_atype: torch.Tensor,
         nlist: torch.Tensor,
         mapping: Optional[torch.Tensor] = None,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         """Compute the descriptor.
 
@@ -465,9 +462,9 @@ def forward(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -500,7 +497,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
         neuron: list = [25, 50, 100],
         tebd_dim: int = 8,
@@ -509,10 +506,10 @@ def __init__(
         activation_function="tanh",
         precision: str = "float64",
         resnet_dt: bool = False,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
         smooth: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.rcut = rcut
@@ -598,7 +595,7 @@ def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
 
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
@@ -667,7 +664,7 @@ def dim_emb(self):
 
     def compute_input_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         path: Optional[DPPath] = None,
     ):
         """
@@ -675,11 +672,11 @@ def compute_input_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         path : Optional[DPPath]
@@ -704,7 +701,7 @@ def compute_input_stats(
             self.mean.copy_(torch.tensor(mean, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
         self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))  # pylint: disable=no-explicit-dtype
 
-    def get_stats(self) -> Dict[str, StatItem]:
+    def get_stats(self) -> dict[str, StatItem]:
         """Get the statistics of the descriptor."""
         if self.stats is None:
             raise RuntimeError(
@@ -714,7 +711,7 @@ def get_stats(self) -> Dict[str, StatItem]:
 
     def reinit_exclude(
         self,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
diff --git a/deepmd/pt/model/model/dipole_model.py b/deepmd/pt/model/model/dipole_model.py
index 0d4a53a850..c83d1f0bf7 100644
--- a/deepmd/pt/model/model/dipole_model.py
+++ b/deepmd/pt/model/model/dipole_model.py
@@ -3,7 +3,6 @@
     deepcopy,
 )
 from typing import (
-    Dict,
     Optional,
 )
 
@@ -64,7 +63,7 @@ def forward(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, torch.Tensor]:
+    ) -> dict[str, torch.Tensor]:
         model_ret = self.forward_common(
             coord,
             atype,
diff --git a/deepmd/pt/model/model/dos_model.py b/deepmd/pt/model/model/dos_model.py
index 27d62fa882..abfcd4a2b4 100644
--- a/deepmd/pt/model/model/dos_model.py
+++ b/deepmd/pt/model/model/dos_model.py
@@ -3,7 +3,6 @@
     deepcopy,
 )
 from typing import (
-    Dict,
     Optional,
 )
 
@@ -56,7 +55,7 @@ def forward(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, torch.Tensor]:
+    ) -> dict[str, torch.Tensor]:
         model_ret = self.forward_common(
             coord,
             atype,
diff --git a/deepmd/pt/model/model/dp_model.py b/deepmd/pt/model/model/dp_model.py
index d3a65db287..8659526c49 100644
--- a/deepmd/pt/model/model/dp_model.py
+++ b/deepmd/pt/model/model/dp_model.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
-    Tuple,
 )
 
 from deepmd.pt.model.descriptor.base_descriptor import (
@@ -20,9 +18,9 @@ class DPModelCommon:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/pt/model/model/dp_zbl_model.py b/deepmd/pt/model/model/dp_zbl_model.py
index 4016f0eb35..59147e1d4c 100644
--- a/deepmd/pt/model/model/dp_zbl_model.py
+++ b/deepmd/pt/model/model/dp_zbl_model.py
@@ -3,10 +3,7 @@
     deepcopy,
 )
 from typing import (
-    Dict,
-    List,
     Optional,
-    Tuple,
 )
 
 import torch
@@ -68,7 +65,7 @@ def forward(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, torch.Tensor]:
+    ) -> dict[str, torch.Tensor]:
         model_ret = self.forward_common(
             coord,
             atype,
@@ -135,9 +132,9 @@ def forward_lower(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py
index e58ba1df62..82f429c4ab 100644
--- a/deepmd/pt/model/model/ener_model.py
+++ b/deepmd/pt/model/model/ener_model.py
@@ -3,7 +3,6 @@
     deepcopy,
 )
 from typing import (
-    Dict,
     Optional,
 )
 
@@ -64,7 +63,7 @@ def forward(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, torch.Tensor]:
+    ) -> dict[str, torch.Tensor]:
         model_ret = self.forward_common(
             coord,
             atype,
@@ -104,7 +103,7 @@ def forward_lower(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         model_ret = self.forward_common_lower(
             extended_coord,
diff --git a/deepmd/pt/model/model/frozen.py b/deepmd/pt/model/model/frozen.py
index 395d81c217..431c035339 100644
--- a/deepmd/pt/model/model/frozen.py
+++ b/deepmd/pt/model/model/frozen.py
@@ -2,10 +2,7 @@
 import json
 import tempfile
 from typing import (
-    Dict,
-    List,
     Optional,
-    Tuple,
 )
 
 import torch
@@ -56,12 +53,12 @@ def get_rcut(self) -> float:
         return self.model.get_rcut()
 
     @torch.jit.export
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.model.get_type_map()
 
     @torch.jit.export
-    def get_sel(self) -> List[int]:
+    def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.model.get_sel()
 
@@ -76,7 +73,7 @@ def get_dim_aparam(self) -> int:
         return self.model.get_dim_aparam()
 
     @torch.jit.export
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
@@ -124,7 +121,7 @@ def forward(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, torch.Tensor]:
+    ) -> dict[str, torch.Tensor]:
         return self.model.forward(
             coord,
             atype,
@@ -177,9 +174,9 @@ def get_nsel(self) -> int:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/pt/model/model/make_hessian_model.py b/deepmd/pt/model/model/make_hessian_model.py
index 9588348f53..d2541a815e 100644
--- a/deepmd/pt/model/model/make_hessian_model.py
+++ b/deepmd/pt/model/model/make_hessian_model.py
@@ -2,8 +2,6 @@
 import copy
 import math
 from typing import (
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -47,7 +45,7 @@ def __init__(
 
         def requires_hessian(
             self,
-            keys: Union[str, List[str]],
+            keys: Union[str, list[str]],
         ):
             """Set which output variable(s) requires hessian."""
             if isinstance(keys, str):
@@ -68,7 +66,7 @@ def forward_common(
             fparam: Optional[torch.Tensor] = None,
             aparam: Optional[torch.Tensor] = None,
             do_atomic_virial: bool = False,
-        ) -> Dict[str, torch.Tensor]:
+        ) -> dict[str, torch.Tensor]:
             """Return model prediction.
 
             Parameters
@@ -90,7 +88,7 @@ def forward_common(
             Returns
             -------
             ret_dict
-                The result dict of type Dict[str,torch.Tensor].
+                The result dict of type dict[str,torch.Tensor].
                 The keys are defined by the `ModelOutputDef`.
 
             """
@@ -122,7 +120,7 @@ def _cal_hessian_all(
             box: Optional[torch.Tensor] = None,
             fparam: Optional[torch.Tensor] = None,
             aparam: Optional[torch.Tensor] = None,
-        ) -> Dict[str, torch.Tensor]:
+        ) -> dict[str, torch.Tensor]:
             nf, nloc = atype.shape
             coord = coord.view([nf, (nloc * 3)])
             box = box.view([nf, 9]) if box is not None else None
@@ -130,7 +128,7 @@ def _cal_hessian_all(
             aparam = aparam.view([nf, nloc, -1]) if aparam is not None else None
             fdef = self.atomic_output_def()
             # keys of values that require hessian
-            hess_keys: List[str] = []
+            hess_keys: list[str] = []
             for kk in fdef.keys():
                 if fdef[kk].r_hessian:
                     hess_keys.append(kk)
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
index 8207f4961e..46b7e51109 100644
--- a/deepmd/pt/model/model/make_model.py
+++ b/deepmd/pt/model/model/make_model.py
@@ -1,10 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Dict,
-    List,
     Optional,
-    Tuple,
-    Type,
 )
 
 import torch
@@ -43,7 +39,7 @@
 )
 
 
-def make_model(T_AtomicModel: Type[BaseAtomicModel]):
+def make_model(T_AtomicModel: type[BaseAtomicModel]):
     """Make a model as a derived class of an atomic model.
 
     The model provide two interfaces.
@@ -89,13 +85,13 @@ def model_output_def(self):
             return ModelOutputDef(self.atomic_output_def())
 
         @torch.jit.export
-        def model_output_type(self) -> List[str]:
+        def model_output_type(self) -> list[str]:
             """Get the output type for the model."""
             output_def = self.model_output_def()
             var_defs = output_def.var_defs
             # jit: Comprehension ifs are not supported yet
             # type hint is critical for JIT
-            vars: List[str] = []
+            vars: list[str] = []
             for kk, vv in var_defs.items():
                 # .value is critical for JIT
                 if vv.category == OutputVariableCategory.OUT.value:
@@ -111,7 +107,7 @@ def forward_common(
             fparam: Optional[torch.Tensor] = None,
             aparam: Optional[torch.Tensor] = None,
             do_atomic_virial: bool = False,
-        ) -> Dict[str, torch.Tensor]:
+        ) -> dict[str, torch.Tensor]:
             """Return model prediction.
 
             Parameters
@@ -133,7 +129,7 @@ def forward_common(
             Returns
             -------
             ret_dict
-                The result dict of type Dict[str,torch.Tensor].
+                The result dict of type dict[str,torch.Tensor].
                 The keys are defined by the `ModelOutputDef`.
 
             """
@@ -187,11 +183,11 @@ def change_out_bias(
 
             Parameters
             ----------
-            merged : Union[Callable[[], List[dict]], List[dict]]
-                - List[dict]: A list of data samples from various data systems.
+            merged : Union[Callable[[], list[dict]], list[dict]]
+                - list[dict]: A list of data samples from various data systems.
                     Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                     originating from the `i`-th data system.
-                - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                     only when needed. Since the sampling process can be slow and memory-intensive,
                     the lazy function helps by only sampling once.
             bias_adjust_mode : str
@@ -214,7 +210,7 @@ def forward_common_lower(
             fparam: Optional[torch.Tensor] = None,
             aparam: Optional[torch.Tensor] = None,
             do_atomic_virial: bool = False,
-            comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+            comm_dict: Optional[dict[str, torch.Tensor]] = None,
             extra_nlist_sort: bool = False,
         ):
             """Return model prediction. Lower interface that takes
@@ -283,7 +279,7 @@ def input_type_cast(
             box: Optional[torch.Tensor] = None,
             fparam: Optional[torch.Tensor] = None,
             aparam: Optional[torch.Tensor] = None,
-        ) -> Tuple[
+        ) -> tuple[
             torch.Tensor,
             Optional[torch.Tensor],
             Optional[torch.Tensor],
@@ -302,7 +298,7 @@ def input_type_cast(
             #           " does not match"
             #           f" that of the coordinate {input_prec}"
             #         )
-            _lst: List[Optional[torch.Tensor]] = [
+            _lst: list[Optional[torch.Tensor]] = [
                 vv.to(coord.dtype) if vv is not None else None
                 for vv in [box, fparam, aparam]
             ]
@@ -324,9 +320,9 @@ def input_type_cast(
 
         def output_type_cast(
             self,
-            model_ret: Dict[str, torch.Tensor],
+            model_ret: dict[str, torch.Tensor],
             input_prec: str,
-        ) -> Dict[str, torch.Tensor]:
+        ) -> dict[str, torch.Tensor]:
             """Convert the model output to the input prec."""
             do_cast = (
                 input_prec
@@ -469,7 +465,7 @@ def do_grad_c(
             return self.atomic_model.do_grad_c(var_name)
 
         def change_type_map(
-            self, type_map: List[str], model_with_new_type_stat=None
+            self, type_map: list[str], model_with_new_type_stat=None
         ) -> None:
             """Change the type related params to new ones, according to `type_map` and the original one in the model.
             If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -499,7 +495,7 @@ def get_dim_aparam(self) -> int:
             return self.atomic_model.get_dim_aparam()
 
         @torch.jit.export
-        def get_sel_type(self) -> List[int]:
+        def get_sel_type(self) -> list[int]:
             """Get the selected atom types of this model.
 
             Only atoms with selected atom types have atomic contribution
@@ -522,7 +518,7 @@ def get_rcut(self) -> float:
             return self.atomic_model.get_rcut()
 
         @torch.jit.export
-        def get_type_map(self) -> List[str]:
+        def get_type_map(self) -> list[str]:
             """Get the type map."""
             return self.atomic_model.get_type_map()
 
@@ -548,7 +544,7 @@ def compute_or_load_stat(
             """Compute or load the statistics."""
             return self.atomic_model.compute_or_load_stat(sampled_func, stat_file_path)
 
-        def get_sel(self) -> List[int]:
+        def get_sel(self) -> list[int]:
             """Returns the number of selected atoms for each type."""
             return self.atomic_model.get_sel()
 
@@ -581,7 +577,7 @@ def forward(
             fparam: Optional[torch.Tensor] = None,
             aparam: Optional[torch.Tensor] = None,
             do_atomic_virial: bool = False,
-        ) -> Dict[str, torch.Tensor]:
+        ) -> dict[str, torch.Tensor]:
             # directly call the forward_common method when no specific transform rule
             return self.forward_common(
                 coord,
diff --git a/deepmd/pt/model/model/polar_model.py b/deepmd/pt/model/model/polar_model.py
index 7fbb7bdcf4..57379ba372 100644
--- a/deepmd/pt/model/model/polar_model.py
+++ b/deepmd/pt/model/model/polar_model.py
@@ -3,7 +3,6 @@
     deepcopy,
 )
 from typing import (
-    Dict,
     Optional,
 )
 
@@ -56,7 +55,7 @@ def forward(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, torch.Tensor]:
+    ) -> dict[str, torch.Tensor]:
         model_ret = self.forward_common(
             coord,
             atype,
diff --git a/deepmd/pt/model/model/property_model.py b/deepmd/pt/model/model/property_model.py
index a5b52139fe..164331f44c 100644
--- a/deepmd/pt/model/model/property_model.py
+++ b/deepmd/pt/model/model/property_model.py
@@ -3,7 +3,6 @@
     deepcopy,
 )
 from typing import (
-    Dict,
     Optional,
 )
 
@@ -56,7 +55,7 @@ def forward(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, torch.Tensor]:
+    ) -> dict[str, torch.Tensor]:
         model_ret = self.forward_common(
             coord,
             atype,
@@ -92,7 +91,7 @@ def forward_lower(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+        comm_dict: Optional[dict[str, torch.Tensor]] = None,
     ):
         model_ret = self.forward_common_lower(
             extended_coord,
diff --git a/deepmd/pt/model/model/spin_model.py b/deepmd/pt/model/model/spin_model.py
index 717a7ee7c8..a9f6e4d75a 100644
--- a/deepmd/pt/model/model/spin_model.py
+++ b/deepmd/pt/model/model/spin_model.py
@@ -4,8 +4,6 @@
     deepcopy,
 )
 from typing import (
-    Dict,
-    List,
     Optional,
 )
 
@@ -258,7 +256,7 @@ def expand_aparam(aparam, nloc: int):
         return aparam
 
     @torch.jit.export
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map."""
         tmap = self.backbone_model.get_type_map()
         ntypes = len(tmap) // 2  # ignore the virtual type
@@ -285,7 +283,7 @@ def get_dim_aparam(self):
         return self.backbone_model.get_dim_aparam()
 
     @torch.jit.export
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
         Only atoms with selected atom types have atomic contribution
         to the result of the model.
@@ -301,7 +299,7 @@ def is_aparam_nall(self) -> bool:
         return self.backbone_model.is_aparam_nall()
 
     @torch.jit.export
-    def model_output_type(self) -> List[str]:
+    def model_output_type(self) -> list[str]:
         """Get the output type for the model."""
         return self.backbone_model.model_output_type()
 
@@ -422,7 +420,7 @@ def forward_common(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, torch.Tensor]:
+    ) -> dict[str, torch.Tensor]:
         nframes, nloc = atype.shape
         coord_updated, atype_updated = self.process_spin_input(coord, atype, spin)
         if aparam is not None:
@@ -576,7 +574,7 @@ def forward(
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
         do_atomic_virial: bool = False,
-    ) -> Dict[str, torch.Tensor]:
+    ) -> dict[str, torch.Tensor]:
         model_ret = self.forward_common(
             coord,
             atype,
diff --git a/deepmd/pt/model/model/transform_output.py b/deepmd/pt/model/model/transform_output.py
index e8afab15c4..e15eda6a1d 100644
--- a/deepmd/pt/model/model/transform_output.py
+++ b/deepmd/pt/model/model/transform_output.py
@@ -1,7 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Dict,
-    List,
     Optional,
 )
 
@@ -31,7 +29,7 @@ def atomic_virial_corr(
     ce = coord * atom_energy
     sumce0, sumce1, sumce2 = torch.split(torch.sum(ce, dim=1), [1, 1, 1], dim=-1)
     faked_grad = torch.ones_like(sumce0)
-    lst = torch.jit.annotate(List[Optional[torch.Tensor]], [faked_grad])
+    lst = torch.jit.annotate(list[Optional[torch.Tensor]], [faked_grad])
     extended_virial_corr0 = torch.autograd.grad(
         [sumce0],
         [extended_coord],
@@ -76,7 +74,7 @@ def task_deriv_one(
     create_graph: bool = True,
 ):
     faked_grad = torch.ones_like(energy)
-    lst = torch.jit.annotate(List[Optional[torch.Tensor]], [faked_grad])
+    lst = torch.jit.annotate(list[Optional[torch.Tensor]], [faked_grad])
     extended_force = torch.autograd.grad(
         [energy],
         [extended_coord],
@@ -153,12 +151,12 @@ def take_deriv(
 
 
 def fit_output_to_model_output(
-    fit_ret: Dict[str, torch.Tensor],
+    fit_ret: dict[str, torch.Tensor],
     fit_output_def: FittingOutputDef,
     coord_ext: torch.Tensor,
     do_atomic_virial: bool = False,
     create_graph: bool = True,
-) -> Dict[str, torch.Tensor]:
+) -> dict[str, torch.Tensor]:
     """Transform the output of the fitting network to
     the model output.
 
@@ -197,11 +195,11 @@ def fit_output_to_model_output(
 
 
 def communicate_extended_output(
-    model_ret: Dict[str, torch.Tensor],
+    model_ret: dict[str, torch.Tensor],
     model_output_def: ModelOutputDef,
     mapping: torch.Tensor,  # nf x nloc
     do_atomic_virial: bool = False,
-) -> Dict[str, torch.Tensor]:
+) -> dict[str, torch.Tensor]:
     """Transform the output of the model network defined on
     local and ghost (extended) atoms to local atoms.
 
diff --git a/deepmd/pt/model/network/layernorm.py b/deepmd/pt/model/network/layernorm.py
index c1c2c29c87..76ce90b627 100644
--- a/deepmd/pt/model/network/layernorm.py
+++ b/deepmd/pt/model/network/layernorm.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -45,7 +44,7 @@ def __init__(
         stddev: float = 1.0,
         precision: str = DEFAULT_PRECISION,
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         self.eps = eps
diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py
index 090d64fbcf..f2137bd004 100644
--- a/deepmd/pt/model/network/mlp.py
+++ b/deepmd/pt/model/network/mlp.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     ClassVar,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -83,7 +81,7 @@ def __init__(
         stddev: float = 1.0,
         precision: str = DEFAULT_PRECISION,
         init: str = "default",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
     ):
         super().__init__()
         # only use_timestep when skip connection is established.
@@ -297,7 +295,7 @@ def __init__(self, *args, **kwargs):
 class NetworkCollection(DPNetworkCollection, nn.Module):
     """PyTorch implementation of NetworkCollection."""
 
-    NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = {
+    NETWORK_TYPE_MAP: ClassVar[dict[str, type]] = {
         "network": MLP,
         "embedding_network": EmbeddingNet,
         "fitting_network": FittingNet,
diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py
index 0c21a9814b..ef50274b03 100644
--- a/deepmd/pt/model/network/network.py
+++ b/deepmd/pt/model/network/network.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -571,7 +570,7 @@ def __init__(
         bavg=0.0,
         stddev=1.0,
         precision="default",
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         use_econf_tebd=False,
         use_tebd_bias: bool = False,
         type_map=None,
@@ -627,7 +626,7 @@ def share_params(self, base_class, shared_level, resume=False):
             raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -660,7 +659,7 @@ class TypeEmbedNetConsistent(nn.Module):
         Whether to use electronic configuration type embedding.
     use_tebd_bias : bool, Optional
         Whether to use bias in the type embedding layer.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
     """
 
@@ -668,16 +667,16 @@ def __init__(
         self,
         *,
         ntypes: int,
-        neuron: List[int],
+        neuron: list[int],
         resnet_dt: bool = False,
         activation_function: str = "tanh",
         precision: str = "default",
         trainable: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
         padding: bool = False,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
     ):
         """Construct a type embedding net."""
         super().__init__()
@@ -734,7 +733,7 @@ def forward(self, device: torch.device):
         return embed
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index 30c5a341a7..56b14677b9 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -3,7 +3,6 @@
 import logging
 from typing import (
     Callable,
-    List,
     Optional,
     Union,
 )
@@ -45,7 +44,7 @@ class DipoleFittingNet(GeneralFitting):
         Embedding width per atom.
     embedding_width : int
         The dimension of rotation matrix, m1.
-    neuron : List[int]
+    neuron : list[int]
         Number of neurons in each hidden layers of the fitting net.
     resnet_dt : bool
         Using time-step in the ResNet construction.
@@ -70,7 +69,7 @@ class DipoleFittingNet(GeneralFitting):
     c_differentiable
         If the variable is differentiated with respect to the cell tensor (pbc case).
         Only reducible variable are differentiable.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
     """
 
@@ -79,7 +78,7 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         embedding_width: int,
-        neuron: List[int] = [128, 128, 128],
+        neuron: list[int] = [128, 128, 128],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
@@ -87,11 +86,11 @@ def __init__(
         precision: str = DEFAULT_PRECISION,
         mixed_types: bool = True,
         rcond: Optional[float] = None,
-        seed: Optional[Union[int, List[int]]] = None,
-        exclude_types: List[int] = [],
+        seed: Optional[Union[int, list[int]]] = None,
+        exclude_types: list[int] = [],
         r_differentiable: bool = True,
         c_differentiable: bool = True,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         **kwargs,
     ):
         self.embedding_width = embedding_width
@@ -151,7 +150,7 @@ def output_def(self) -> FittingOutputDef:
 
     def compute_output_stats(
         self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
+        merged: Union[Callable[[], list[dict]], list[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
         """
@@ -159,11 +158,11 @@ def compute_output_stats(
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
                 Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
                 originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         stat_file_path : Optional[DPPath]
@@ -197,4 +196,4 @@ def forward(
         return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
 
     # make jit happy with torch 2.0.0
-    exclude_types: List[int]
+    exclude_types: list[int]
diff --git a/deepmd/pt/model/task/dos.py b/deepmd/pt/model/task/dos.py
index c27e287728..4f69094b0d 100644
--- a/deepmd/pt/model/task/dos.py
+++ b/deepmd/pt/model/task/dos.py
@@ -2,7 +2,6 @@
 import copy
 import logging
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -45,19 +44,19 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         numb_dos: int = 300,
-        neuron: List[int] = [128, 128, 128],
+        neuron: list[int] = [128, 128, 128],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
         rcond: Optional[float] = None,
         bias_dos: Optional[torch.Tensor] = None,
-        trainable: Union[bool, List[bool]] = True,
-        seed: Optional[Union[int, List[int]]] = None,
+        trainable: Union[bool, list[bool]] = True,
+        seed: Optional[Union[int, list[int]]] = None,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
         mixed_types: bool = True,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
     ):
         if bias_dos is not None:
             self.bias_dos = bias_dos
@@ -127,4 +126,4 @@ def serialize(self) -> dict:
         return dd
 
     # make jit happy with torch 2.0.0
-    exclude_types: List[int]
+    exclude_types: list[int]
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index 1737e401fb..2048c05ba9 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -2,9 +2,7 @@
 import copy
 import logging
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -48,7 +46,7 @@ def __init__(
         self,
         ntypes: int,
         dim_descrpt: int,
-        neuron: List[int] = [128, 128, 128],
+        neuron: list[int] = [128, 128, 128],
         bias_atom_e: Optional[torch.Tensor] = None,
         resnet_dt: bool = True,
         numb_fparam: int = 0,
@@ -56,8 +54,8 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         mixed_types: bool = True,
-        seed: Optional[Union[int, List[int]]] = None,
-        type_map: Optional[List[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
         **kwargs,
     ):
         super().__init__(
@@ -94,7 +92,7 @@ def serialize(self) -> dict:
         }
 
     # make jit happy with torch 2.0.0
-    exclude_types: List[int]
+    exclude_types: list[int]
 
 
 @Fitting.register("direct_force")
@@ -185,11 +183,11 @@ def deserialize(self) -> "EnergyFittingNetDirect":
         raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         raise NotImplementedError
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         raise NotImplementedError
 
     def forward(
@@ -201,7 +199,7 @@ def forward(
         h2: Optional[torch.Tensor] = None,
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
-    ) -> Tuple[torch.Tensor, None]:
+    ) -> tuple[torch.Tensor, None]:
         """Based on embedding net output, alculate total energy.
 
         Args:
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 95242eb67c..1827569a17 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -5,7 +5,6 @@
     abstractmethod,
 )
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -97,7 +96,7 @@ class GeneralFitting(Fitting):
         Embedding width per atom.
     dim_out : int
         The output dimension of the fitting net.
-    neuron : List[int]
+    neuron : list[int]
         Number of neurons in each hidden layers of the fitting net.
     bias_atom_e : torch.Tensor, optional
         Average enery per atom for each element.
@@ -118,17 +117,17 @@ class GeneralFitting(Fitting):
         The condition number for the regression of atomic energy.
     seed : int, optional
         Random seed.
-    exclude_types: List[int]
+    exclude_types: list[int]
         Atomic contributions of the excluded atom types are set zero.
-    trainable : Union[List[bool], bool]
+    trainable : Union[list[bool], bool]
         If the parameters in the fitting net are trainable.
         Now this only supports setting all the parameters in the fitting net at one state.
-        When in List[bool], the trainable will be True only if all the boolean parameters are True.
-    remove_vaccum_contribution: List[bool], optional
+        When in list[bool], the trainable will be True only if all the boolean parameters are True.
+    remove_vaccum_contribution: list[bool], optional
         Remove vaccum contribution before the bias is added. The list assigned each
         type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same
         length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
     """
 
@@ -137,7 +136,7 @@ def __init__(
         var_name: str,
         ntypes: int,
         dim_descrpt: int,
-        neuron: List[int] = [128, 128, 128],
+        neuron: list[int] = [128, 128, 128],
         bias_atom_e: Optional[torch.Tensor] = None,
         resnet_dt: bool = True,
         numb_fparam: int = 0,
@@ -146,11 +145,11 @@ def __init__(
         precision: str = DEFAULT_PRECISION,
         mixed_types: bool = True,
         rcond: Optional[float] = None,
-        seed: Optional[Union[int, List[int]]] = None,
-        exclude_types: List[int] = [],
-        trainable: Union[bool, List[bool]] = True,
-        remove_vaccum_contribution: Optional[List[bool]] = None,
-        type_map: Optional[List[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        exclude_types: list[int] = [],
+        trainable: Union[bool, list[bool]] = True,
+        remove_vaccum_contribution: Optional[list[bool]] = None,
+        type_map: Optional[list[str]] = None,
         **kwargs,
     ):
         super().__init__()
@@ -253,13 +252,13 @@ def __init__(
 
     def reinit_exclude(
         self,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
     ):
         self.exclude_types = exclude_types
         self.emask = AtomExcludeMask(self.ntypes, self.exclude_types)
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -342,9 +341,9 @@ def get_dim_aparam(self) -> int:
         return self.numb_aparam
 
     # make jit happy
-    exclude_types: List[int]
+    exclude_types: list[int]
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
         Only atoms with selected atom types have atomic contribution
@@ -352,13 +351,13 @@ def get_sel_type(self) -> List[int]:
         If returning an empty list, all atom types are selected.
         """
         # make jit happy
-        sel_type: List[int] = []
+        sel_type: list[int] = []
         for ii in range(self.ntypes):
             if ii not in self.exclude_types:
                 sel_type.append(ii)
         return sel_type
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py
index 36c416d6e5..230046b74b 100644
--- a/deepmd/pt/model/task/invar_fitting.py
+++ b/deepmd/pt/model/task/invar_fitting.py
@@ -2,7 +2,6 @@
 import copy
 import logging
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -48,7 +47,7 @@ class InvarFitting(GeneralFitting):
         Embedding width per atom.
     dim_out : int
         The output dimension of the fitting net.
-    neuron : List[int]
+    neuron : list[int]
         Number of neurons in each hidden layers of the fitting net.
     bias_atom_e : torch.Tensor, optional
         Average enery per atom for each element.
@@ -69,14 +68,14 @@ class InvarFitting(GeneralFitting):
         The condition number for the regression of atomic energy.
     seed : int, optional
         Random seed.
-    exclude_types: List[int]
+    exclude_types: list[int]
         Atomic contributions of the excluded atom types are set zero.
-    atom_ener: List[Optional[torch.Tensor]], optional
+    atom_ener: list[Optional[torch.Tensor]], optional
         Specifying atomic energy contribution in vacuum.
         The value is a list specifying the bias. the elements can be None or np.array of output shape.
         For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
         The `set_davg_zero` key in the descrptor should be set.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
 
     """
@@ -87,7 +86,7 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         dim_out: int,
-        neuron: List[int] = [128, 128, 128],
+        neuron: list[int] = [128, 128, 128],
         bias_atom_e: Optional[torch.Tensor] = None,
         resnet_dt: bool = True,
         numb_fparam: int = 0,
@@ -96,10 +95,10 @@ def __init__(
         precision: str = DEFAULT_PRECISION,
         mixed_types: bool = True,
         rcond: Optional[float] = None,
-        seed: Optional[Union[int, List[int]]] = None,
-        exclude_types: List[int] = [],
-        atom_ener: Optional[List[Optional[torch.Tensor]]] = None,
-        type_map: Optional[List[str]] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        exclude_types: list[int] = [],
+        atom_ener: Optional[list[Optional[torch.Tensor]]] = None,
+        type_map: Optional[list[str]] = None,
         **kwargs,
     ):
         self.dim_out = dim_out
@@ -179,4 +178,4 @@ def forward(
         return self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)
 
     # make jit happy with torch 2.0.0
-    exclude_types: List[int]
+    exclude_types: list[int]
diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py
index 7345fa296c..a16ab886d4 100644
--- a/deepmd/pt/model/task/polarizability.py
+++ b/deepmd/pt/model/task/polarizability.py
@@ -2,7 +2,6 @@
 import copy
 import logging
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -47,7 +46,7 @@ class PolarFittingNet(GeneralFitting):
         Embedding width per atom.
     embedding_width : int
         The dimension of rotation matrix, m1.
-    neuron : List[int]
+    neuron : list[int]
         Number of neurons in each hidden layers of the fitting net.
     resnet_dt : bool
         Using time-step in the ResNet construction.
@@ -69,11 +68,11 @@ class PolarFittingNet(GeneralFitting):
     fit_diag : bool
         Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to
         normal polarizability matrix by contracting with the rotation matrix.
-    scale : List[float]
+    scale : list[float]
         The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i]
     shift_diag : bool
         Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
 
     """
@@ -83,7 +82,7 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         embedding_width: int,
-        neuron: List[int] = [128, 128, 128],
+        neuron: list[int] = [128, 128, 128],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
@@ -91,12 +90,12 @@ def __init__(
         precision: str = DEFAULT_PRECISION,
         mixed_types: bool = True,
         rcond: Optional[float] = None,
-        seed: Optional[Union[int, List[int]]] = None,
-        exclude_types: List[int] = [],
+        seed: Optional[Union[int, list[int]]] = None,
+        exclude_types: list[int] = [],
         fit_diag: bool = True,
-        scale: Optional[Union[List[float], float]] = None,
+        scale: Optional[Union[list[float], float]] = None,
         shift_diag: bool = True,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         **kwargs,
     ):
         self.embedding_width = embedding_width
@@ -162,7 +161,7 @@ def __getitem__(self, key):
             return super().__getitem__(key)
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -258,4 +257,4 @@ def forward(
         return {"polarizability": out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
 
     # make jit happy with torch 2.0.0
-    exclude_types: List[int]
+    exclude_types: list[int]
diff --git a/deepmd/pt/model/task/property.py b/deepmd/pt/model/task/property.py
index 804383c57f..cc6a4e8745 100644
--- a/deepmd/pt/model/task/property.py
+++ b/deepmd/pt/model/task/property.py
@@ -2,7 +2,6 @@
 import copy
 import logging
 from typing import (
-    List,
     Optional,
 )
 
@@ -46,7 +45,7 @@ class PropertyFittingNet(InvarFitting):
         Embedding width per atom.
     task_dim : int
             The dimension of outputs of fitting net.
-    neuron : List[int]
+    neuron : list[int]
         Number of neurons in each hidden layers of the fitting net.
     bias_atom_p : torch.Tensor, optional
         Average property per atom for each element.
@@ -78,7 +77,7 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         task_dim: int = 1,
-        neuron: List[int] = [128, 128, 128],
+        neuron: list[int] = [128, 128, 128],
         bias_atom_p: Optional[torch.Tensor] = None,
         intensive: bool = False,
         bias_method: str = "normal",
@@ -149,4 +148,4 @@ def serialize(self) -> dict:
         return dd
 
     # make jit happy with torch 2.0.0
-    exclude_types: List[int]
+    exclude_types: list[int]
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 9bdc80195f..95c73bd83c 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -10,7 +10,6 @@
 )
 from typing import (
     Any,
-    Dict,
 )
 
 import numpy as np
@@ -88,7 +87,7 @@
 class Trainer:
     def __init__(
         self,
-        config: Dict[str, Any],
+        config: dict[str, Any],
         training_data,
         stat_file_path=None,
         validation_data=None,
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index 6bc7cdc87a..922ac296ea 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
-    Dict,
     Optional,
     Union,
 )
@@ -18,8 +17,8 @@
 class ModelWrapper(torch.nn.Module):
     def __init__(
         self,
-        model: Union[torch.nn.Module, Dict],
-        loss: Union[torch.nn.Module, Dict] = None,
+        model: Union[torch.nn.Module, dict],
+        loss: Union[torch.nn.Module, dict] = None,
         model_params=None,
         shared_links=None,
     ):
@@ -183,12 +182,12 @@ def forward(
             )
             return model_pred, loss, more_loss
 
-    def set_extra_state(self, state: Dict):
+    def set_extra_state(self, state: dict):
         self.model_params = state["model_params"]
         self.train_infos = state["train_infos"]
         return None
 
-    def get_extra_state(self) -> Dict:
+    def get_extra_state(self) -> dict:
         state = {
             "model_params": self.model_params,
             "train_infos": self.train_infos,
diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
index 6a37a4a843..c7f44cfb70 100644
--- a/deepmd/pt/utils/dataloader.py
+++ b/deepmd/pt/utils/dataloader.py
@@ -9,9 +9,6 @@
 from threading import (
     Thread,
 )
-from typing import (
-    List,
-)
 
 import h5py
 import numpy as np
@@ -86,7 +83,7 @@ def __init__(
             with h5py.File(systems) as file:
                 systems = [os.path.join(systems, item) for item in file.keys()]
 
-        self.systems: List[DeepmdDataSetForLoader] = []
+        self.systems: list[DeepmdDataSetForLoader] = []
         if len(systems) >= 100:
             log.info(f"Constructing DataLoaders from {len(systems)} systems")
 
@@ -106,7 +103,7 @@ def construct_dataset(system):
         ) as pool:
             self.systems = pool.map(construct_dataset, systems)
 
-        self.sampler_list: List[DistributedSampler] = []
+        self.sampler_list: list[DistributedSampler] = []
         self.index = []
         self.total_batch = 0
 
@@ -178,7 +175,7 @@ def __getitem__(self, idx):
         batch["sid"] = idx
         return batch
 
-    def add_data_requirement(self, data_requirement: List[DataRequirementItem]):
+    def add_data_requirement(self, data_requirement: list[DataRequirementItem]):
         """Add data requirement for each system in multiple systems."""
         for system in self.systems:
             system.add_data_requirement(data_requirement)
@@ -186,7 +183,7 @@ def add_data_requirement(self, data_requirement: List[DataRequirementItem]):
     def print_summary(
         self,
         name: str,
-        prob: List[float],
+        prob: list[float],
     ):
         print_summary(
             name,
diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py
index dbe4d92a0f..4a29f3f045 100644
--- a/deepmd/pt/utils/dataset.py
+++ b/deepmd/pt/utils/dataset.py
@@ -2,7 +2,6 @@
 
 
 from typing import (
-    List,
     Optional,
 )
 
@@ -17,7 +16,7 @@
 
 
 class DeepmdDataSetForLoader(Dataset):
-    def __init__(self, system: str, type_map: Optional[List[str]] = None):
+    def __init__(self, system: str, type_map: Optional[list[str]] = None):
         """Construct DeePMD-style dataset containing frames cross different systems.
 
         Args:
@@ -41,7 +40,7 @@ def __getitem__(self, index):
         b_data["natoms"] = self._natoms_vec
         return b_data
 
-    def add_data_requirement(self, data_requirement: List[DataRequirementItem]):
+    def add_data_requirement(self, data_requirement: list[DataRequirementItem]):
         """Add data requirement for this data system."""
         for data_item in data_requirement:
             self._data_system.add(
diff --git a/deepmd/pt/utils/env_mat_stat.py b/deepmd/pt/utils/env_mat_stat.py
index 9eaea16c3e..cc30bd5155 100644
--- a/deepmd/pt/utils/env_mat_stat.py
+++ b/deepmd/pt/utils/env_mat_stat.py
@@ -1,10 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from collections.abc import (
+    Iterator,
+)
 from typing import (
     TYPE_CHECKING,
-    Dict,
-    Iterator,
-    List,
-    Tuple,
     Union,
 )
 
@@ -38,7 +37,7 @@
 
 
 class EnvMatStat(BaseEnvMatStat):
-    def compute_stat(self, env_mat: Dict[str, torch.Tensor]) -> Dict[str, StatItem]:
+    def compute_stat(self, env_mat: dict[str, torch.Tensor]) -> dict[str, StatItem]:
         """Compute the statistics of the environment matrix for a single system.
 
         Parameters
@@ -48,7 +47,7 @@ def compute_stat(self, env_mat: Dict[str, torch.Tensor]) -> Dict[str, StatItem]:
 
         Returns
         -------
-        Dict[str, StatItem]
+        dict[str, StatItem]
             The statistics of the environment matrix.
         """
         stats = {}
@@ -78,18 +77,18 @@ def __init__(self, descriptor: "DescriptorBlock"):
         )  # se_r=1, se_a=4
 
     def iter(
-        self, data: List[Dict[str, Union[torch.Tensor, List[Tuple[int, int]]]]]
-    ) -> Iterator[Dict[str, StatItem]]:
+        self, data: list[dict[str, Union[torch.Tensor, list[tuple[int, int]]]]]
+    ) -> Iterator[dict[str, StatItem]]:
         """Get the iterator of the environment matrix.
 
         Parameters
         ----------
-        data : List[Dict[str, Union[torch.Tensor, List[Tuple[int, int]]]]]
+        data : list[dict[str, Union[torch.Tensor, list[tuple[int, int]]]]]
             The data.
 
         Yields
         ------
-        Dict[str, StatItem]
+        dict[str, StatItem]
             The statistics of the environment matrix.
         """
         zero_mean = torch.zeros(
diff --git a/deepmd/pt/utils/exclude_mask.py b/deepmd/pt/utils/exclude_mask.py
index c3f3f8eb2f..a5de969c07 100644
--- a/deepmd/pt/utils/exclude_mask.py
+++ b/deepmd/pt/utils/exclude_mask.py
@@ -1,9 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    List,
-    Set,
-    Tuple,
-)
 
 import numpy as np
 import torch
@@ -19,7 +14,7 @@ class AtomExcludeMask(torch.nn.Module):
     def __init__(
         self,
         ntypes: int,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
     ):
         super().__init__()
         self.reinit(ntypes, exclude_types)
@@ -27,7 +22,7 @@ def __init__(
     def reinit(
         self,
         ntypes: int,
-        exclude_types: List[int] = [],
+        exclude_types: list[int] = [],
     ):
         self.ntypes = ntypes
         self.exclude_types = exclude_types
@@ -72,7 +67,7 @@ class PairExcludeMask(torch.nn.Module):
     def __init__(
         self,
         ntypes: int,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         super().__init__()
         self.reinit(ntypes, exclude_types)
@@ -80,10 +75,10 @@ def __init__(
     def reinit(
         self,
         ntypes: int,
-        exclude_types: List[Tuple[int, int]] = [],
+        exclude_types: list[tuple[int, int]] = [],
     ):
         self.ntypes = ntypes
-        self._exclude_types: Set[Tuple[int, int]] = set()
+        self._exclude_types: set[tuple[int, int]] = set()
         for tt in exclude_types:
             assert len(tt) == 2
             self._exclude_types.add((tt[0], tt[1]))
diff --git a/deepmd/pt/utils/neighbor_stat.py b/deepmd/pt/utils/neighbor_stat.py
index d5b5c74bdc..d427dc758a 100644
--- a/deepmd/pt/utils/neighbor_stat.py
+++ b/deepmd/pt/utils/neighbor_stat.py
@@ -1,8 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
+from collections.abc import (
     Iterator,
+)
+from typing import (
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -52,7 +53,7 @@ def forward(
         coord: torch.Tensor,
         atype: torch.Tensor,
         cell: Optional[torch.Tensor],
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         """Calculate the neareest neighbor distance between atoms, maximum nbor size of
         atoms and the output data range of the environment matrix.
 
@@ -139,7 +140,7 @@ def __init__(
 
     def iterator(
         self, data: DeepmdDataSystem
-    ) -> Iterator[Tuple[np.ndarray, float, str]]:
+    ) -> Iterator[tuple[np.ndarray, float, str]]:
         """Abstract method for producing data.
 
         Yields
diff --git a/deepmd/pt/utils/nlist.py b/deepmd/pt/utils/nlist.py
index b34c43378c..a4f81a23a5 100644
--- a/deepmd/pt/utils/nlist.py
+++ b/deepmd/pt/utils/nlist.py
@@ -1,7 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -21,7 +19,7 @@ def extend_input_and_build_neighbor_list(
     coord,
     atype,
     rcut: float,
-    sel: List[int],
+    sel: list[int],
     mixed_types: bool = False,
     box: Optional[torch.Tensor] = None,
 ):
@@ -55,7 +53,7 @@ def build_neighbor_list(
     atype: torch.Tensor,
     nloc: int,
     rcut: float,
-    sel: Union[int, List[int]],
+    sel: Union[int, list[int]],
     distinguish_types: bool = True,
 ) -> torch.Tensor:
     """Build neightbor list for a single frame. keeps nsel neighbors.
@@ -71,7 +69,7 @@ def build_neighbor_list(
         number of local atoms.
     rcut : float
         cut-off radius
-    sel : int or List[int]
+    sel : int or list[int]
         maximal number of neighbors (of each type).
         if distinguish_types==True, nsel should be list and
         the length of nsel should be equal to number of
@@ -137,7 +135,7 @@ def _trim_mask_distinguish_nlist(
     rr: torch.Tensor,
     nlist: torch.Tensor,
     rcut: float,
-    sel: List[int],
+    sel: list[int],
     distinguish_types: bool,
 ) -> torch.Tensor:
     """Trim the size of nlist, mask if any central atom is virtual, distinguish types if necessary."""
@@ -178,7 +176,7 @@ def build_directional_neighbor_list(
     coord_neig: torch.Tensor,
     atype_neig: torch.Tensor,
     rcut: float,
-    sel: Union[int, List[int]],
+    sel: Union[int, list[int]],
     distinguish_types: bool = True,
 ) -> torch.Tensor:
     """Build directional neighbor list.
@@ -205,7 +203,7 @@ def build_directional_neighbor_list(
         if type < 0 the atom is treated as virtual atoms.
     rcut : float
         cut-off radius
-    sel : int or List[int]
+    sel : int or list[int]
         maximal number of neighbors (of each type).
         if distinguish_types==True, nsel should be list and
         the length of nsel should be equal to number of
@@ -277,7 +275,7 @@ def build_directional_neighbor_list(
 def nlist_distinguish_types(
     nlist: torch.Tensor,
     atype: torch.Tensor,
-    sel: List[int],
+    sel: list[int],
 ):
     """Given a nlist that does not distinguish atom types, return a nlist that
     distinguish atom types.
@@ -327,9 +325,9 @@ def get_multiple_nlist_key(
 def build_multiple_neighbor_list(
     coord: torch.Tensor,
     nlist: torch.Tensor,
-    rcuts: List[float],
-    nsels: List[int],
-) -> Dict[str, torch.Tensor]:
+    rcuts: list[float],
+    nsels: list[int],
+) -> dict[str, torch.Tensor]:
     """Input one neighbor list, and produce multiple neighbor lists with
     different cutoff radius and numbers of selection out of it.  The
     required rcuts and nsels should be smaller or equal to the input nlist.
@@ -341,14 +339,14 @@ def build_multiple_neighbor_list(
     nlist : torch.Tensor
         Neighbor list of shape [batch_size, nloc, nsel], the neighbors
         should be stored in an ascending order.
-    rcuts : List[float]
+    rcuts : list[float]
         list of cut-off radius in ascending order.
-    nsels : List[int]
+    nsels : list[int]
         maximal number of neighbors in ascending order.
 
     Returns
     -------
-    nlist_dict : Dict[str, torch.Tensor]
+    nlist_dict : dict[str, torch.Tensor]
         A dict of nlists, key given by get_multiple_nlist_key(rc, nsel)
         value being the corresponding nlist.
 
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 58e02f436d..23fb12f2a4 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -5,8 +5,6 @@
 )
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -89,7 +87,7 @@ def make_stat_input(datasets, dataloaders, nbatches):
 
 def _restore_from_file(
     stat_file_path: DPPath,
-    keys: List[str] = ["energy"],
+    keys: list[str] = ["energy"],
 ) -> Optional[dict]:
     if stat_file_path is None:
         return None, None
@@ -147,8 +145,8 @@ def _post_process_stat(
 
 
 def _compute_model_predict(
-    sampled: Union[Callable[[], List[dict]], List[dict]],
-    keys: List[str],
+    sampled: Union[Callable[[], list[dict]], list[dict]],
+    keys: list[str],
     model_forward: Callable[..., torch.Tensor],
 ):
     auto_batch_size = AutoBatchSize()
@@ -187,7 +185,7 @@ def model_forward_auto_batch_size(*args, **kwargs):
 
 def _make_preset_out_bias(
     ntypes: int,
-    ibias: List[Optional[np.ndarray]],
+    ibias: list[Optional[np.ndarray]],
 ) -> Optional[np.ndarray]:
     """Make preset out bias.
 
@@ -237,12 +235,12 @@ def _fill_stat_with_global(
 
 
 def compute_output_stats(
-    merged: Union[Callable[[], List[dict]], List[dict]],
+    merged: Union[Callable[[], list[dict]], list[dict]],
     ntypes: int,
-    keys: Union[str, List[str]] = ["energy"],
+    keys: Union[str, list[str]] = ["energy"],
     stat_file_path: Optional[DPPath] = None,
     rcond: Optional[float] = None,
-    preset_bias: Optional[Dict[str, List[Optional[np.ndarray]]]] = None,
+    preset_bias: Optional[dict[str, list[Optional[np.ndarray]]]] = None,
     model_forward: Optional[Callable[..., torch.Tensor]] = None,
     atomic_output: Optional[FittingOutputDef] = None,
 ):
@@ -251,11 +249,11 @@ def compute_output_stats(
 
     Parameters
     ----------
-    merged : Union[Callable[[], List[dict]], List[dict]]
-        - List[dict]: A list of data samples from various data systems.
+    merged : Union[Callable[[], list[dict]], list[dict]]
+        - list[dict]: A list of data samples from various data systems.
             Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
             originating from the `i`-th data system.
-        - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+        - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
             only when needed. Since the sampling process can be slow and memory-intensive,
             the lazy function helps by only sampling once.
     ntypes : int
@@ -264,7 +262,7 @@ def compute_output_stats(
         The path to the stat file.
     rcond : float, optional
         The condition number for the regression of atomic energy.
-    preset_bias : Dict[str, List[Optional[np.ndarray]]], optional
+    preset_bias : dict[str, list[Optional[np.ndarray]]], optional
         Specifying atomic energy contribution in vacuum. Given by key:value pairs.
         The value is a list specifying the bias. the elements can be None or np.ndarray of output shape.
         For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
@@ -401,12 +399,12 @@ def compute_output_stats(
 
 
 def compute_output_stats_global(
-    sampled: List[dict],
+    sampled: list[dict],
     ntypes: int,
-    keys: List[str],
+    keys: list[str],
     rcond: Optional[float] = None,
-    preset_bias: Optional[Dict[str, List[Optional[np.ndarray]]]] = None,
-    model_pred: Optional[Dict[str, np.ndarray]] = None,
+    preset_bias: Optional[dict[str, list[Optional[np.ndarray]]]] = None,
+    model_pred: Optional[dict[str, np.ndarray]] = None,
     atomic_output: Optional[FittingOutputDef] = None,
 ):
     """This function only handle stat computation from reduced global labels."""
@@ -526,10 +524,10 @@ def rmse(x):
 
 
 def compute_output_stats_atomic(
-    sampled: List[dict],
+    sampled: list[dict],
     ntypes: int,
-    keys: List[str],
-    model_pred: Optional[Dict[str, np.ndarray]] = None,
+    keys: list[str],
+    model_pred: Optional[dict[str, np.ndarray]] = None,
 ):
     # get label dict from sample; for each key, only picking the system with atomic labels.
     outputs = {
diff --git a/deepmd/pt/utils/update_sel.py b/deepmd/pt/utils/update_sel.py
index 7f42a9f91c..e8c40e2626 100644
--- a/deepmd/pt/utils/update_sel.py
+++ b/deepmd/pt/utils/update_sel.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Type,
-)
 
 from deepmd.pt.utils.neighbor_stat import (
     NeighborStat,
@@ -13,5 +10,5 @@
 
 class UpdateSel(BaseUpdateSel):
     @property
-    def neighbor_stat(self) -> Type[NeighborStat]:
+    def neighbor_stat(self) -> type[NeighborStat]:
         return NeighborStat
diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py
index 9ccdbfdb5d..43b82efcc1 100644
--- a/deepmd/pt/utils/utils.py
+++ b/deepmd/pt/utils/utils.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
     overload,
@@ -123,7 +122,7 @@ def dict_to_device(sample_dict):
 XSHIFT = 16
 
 
-def hashmix(value: int, hash_const: List[int]):
+def hashmix(value: int, hash_const: list[int]):
     value ^= INIT_A
     hash_const[0] *= MULT_A
     value *= INIT_A
@@ -142,7 +141,7 @@ def mix(x: int, y: int):
     return result
 
 
-def mix_entropy(entropy_array: List[int]) -> int:
+def mix_entropy(entropy_array: list[int]) -> int:
     # https://github.com/numpy/numpy/blob/a4cddb60489f821a1a4dffc16cd5c69755d43bdb/numpy/random/bit_generator.pyx#L341-L374
     hash_const = [INIT_A]
     mixer = hashmix(entropy_array[0], hash_const)
@@ -152,7 +151,7 @@ def mix_entropy(entropy_array: List[int]) -> int:
 
 
 def get_generator(
-    seed: Optional[Union[int, List[int]]] = None,
+    seed: Optional[Union[int, list[int]]] = None,
 ) -> Optional[torch.Generator]:
     if seed is not None:
         if isinstance(seed, list):
diff --git a/deepmd/tf/cluster/__init__.py b/deepmd/tf/cluster/__init__.py
index 6735ce92f4..0f8916038d 100644
--- a/deepmd/tf/cluster/__init__.py
+++ b/deepmd/tf/cluster/__init__.py
@@ -2,9 +2,7 @@
 """Module that reads node resources, auto detects if running local or on SLURM."""
 
 from typing import (
-    List,
     Optional,
-    Tuple,
 )
 
 from .local import get_resource as get_local_res
@@ -12,12 +10,12 @@
 __all__ = ["get_resource"]
 
 
-def get_resource() -> Tuple[str, List[str], Optional[List[int]]]:
+def get_resource() -> tuple[str, list[str], Optional[list[int]]]:
     """Get local or slurm resources: nodename, nodelist, and gpus.
 
     Returns
     -------
-    Tuple[str, List[str], Optional[List[int]]]
+    tuple[str, list[str], Optional[list[int]]]
         nodename, nodelist, and gpus
     """
     return get_local_res()
diff --git a/deepmd/tf/cluster/local.py b/deepmd/tf/cluster/local.py
index 009a182e55..a9392bd326 100644
--- a/deepmd/tf/cluster/local.py
+++ b/deepmd/tf/cluster/local.py
@@ -4,9 +4,7 @@
 import subprocess as sp
 import sys
 from typing import (
-    List,
     Optional,
-    Tuple,
 )
 
 from deepmd.tf.env import (
@@ -25,7 +23,7 @@ def get_gpus():
 
     Returns
     -------
-    Optional[List[int]]
+    Optional[list[int]]
         List of available GPU IDs. Otherwise, None.
     """
     if not tf.test.is_built_with_cuda() and not (
@@ -51,12 +49,12 @@ def get_gpus():
         return list(range(num_gpus)) if num_gpus > 0 else None
 
 
-def get_resource() -> Tuple[str, List[str], Optional[List[int]]]:
+def get_resource() -> tuple[str, list[str], Optional[list[int]]]:
     """Get local resources: nodename, nodelist, and gpus.
 
     Returns
     -------
-    Tuple[str, List[str], Optional[List[int]]]
+    tuple[str, list[str], Optional[list[int]]]
         nodename, nodelist, and gpus
     """
     nodename, nodelist = get_host_names()
diff --git a/deepmd/tf/descriptor/descriptor.py b/deepmd/tf/descriptor/descriptor.py
index 2bef63fa5e..ba54ca1309 100644
--- a/deepmd/tf/descriptor/descriptor.py
+++ b/deepmd/tf/descriptor/descriptor.py
@@ -4,11 +4,7 @@
 )
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
-    Set,
-    Tuple,
 )
 
 import numpy as np
@@ -111,7 +107,7 @@ def get_dim_rot_mat_1(self) -> int:
         """
         raise NotImplementedError
 
-    def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist(self) -> tuple[tf.Tensor, tf.Tensor, list[int], list[int]]:
         """Returns neighbor information.
 
         Returns
@@ -130,12 +126,12 @@ def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
     @abstractmethod
     def compute_input_stats(
         self,
-        data_coord: List[np.ndarray],
-        data_box: List[np.ndarray],
-        data_atype: List[np.ndarray],
-        natoms_vec: List[np.ndarray],
-        mesh: List[np.ndarray],
-        input_dict: Dict[str, List[np.ndarray]],
+        data_coord: list[np.ndarray],
+        data_box: list[np.ndarray],
+        data_atype: list[np.ndarray],
+        natoms_vec: list[np.ndarray],
+        mesh: list[np.ndarray],
+        input_dict: dict[str, list[np.ndarray]],
         **kwargs,
     ) -> None:
         """Compute the statisitcs (avg and std) of the training data. The input will be
@@ -175,7 +171,7 @@ def build(
         natoms: tf.Tensor,
         box_: tf.Tensor,
         mesh: tf.Tensor,
-        input_dict: Dict[str, Any],
+        input_dict: dict[str, Any],
         reuse: Optional[bool] = None,
         suffix: str = "",
     ) -> tf.Tensor:
@@ -275,7 +271,7 @@ def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
     @abstractmethod
     def prod_force_virial(
         self, atom_ener: tf.Tensor, natoms: tf.Tensor
-    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    ) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """Compute force and virial.
 
         Parameters
@@ -323,7 +319,7 @@ def init_variables(
             f"Descriptor {type(self).__name__} doesn't support initialization from the given variables!"
         )
 
-    def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
+    def get_tensor_names(self, suffix: str = "") -> tuple[str]:
         """Get names of tensors.
 
         Parameters
@@ -333,7 +329,7 @@ def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
 
         Returns
         -------
-        Tuple[str]
+        tuple[str]
             Names of tensors
         """
         raise NotImplementedError(
@@ -362,9 +358,9 @@ def pass_tensors_from_frz_model(
 
     def build_type_exclude_mask(
         self,
-        exclude_types: Set[Tuple[int, int]],
+        exclude_types: set[tuple[int, int]],
         ntypes: int,
-        sel: List[int],
+        sel: list[int],
         ndescrpt: int,
         atype: tf.Tensor,
         shape0: tf.Tensor,
@@ -391,12 +387,12 @@ def build_type_exclude_mask(
 
         Parameters
         ----------
-        exclude_types : List[Tuple[int, int]]
+        exclude_types : list[tuple[int, int]]
             The list of excluded types, e.g. [(0, 1), (1, 0)] means the interaction
             between type 0 and type 1 is excluded.
         ntypes : int
             The number of types.
-        sel : List[int]
+        sel : list[int]
             The list of the number of selected neighbors for each type.
         ndescrpt : int
             The number of descriptors for each atom.
@@ -469,9 +465,9 @@ def explicit_ntypes(self) -> bool:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -535,6 +531,6 @@ def serialize(self, suffix: str = "") -> dict:
         raise NotImplementedError(f"Not implemented in class {self.__name__}")
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         return []
diff --git a/deepmd/tf/descriptor/hybrid.py b/deepmd/tf/descriptor/hybrid.py
index fe4fc2ae6a..e4458476c8 100644
--- a/deepmd/tf/descriptor/hybrid.py
+++ b/deepmd/tf/descriptor/hybrid.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -41,14 +38,14 @@ class DescrptHybrid(Descriptor):
 
     Parameters
     ----------
-    list : list : List[Union[Descriptor, Dict[str, Any]]]
+    list : list : list[Union[Descriptor, dict[str, Any]]]
             Build a descriptor from the concatenation of the list of descriptors.
             The descriptor can be either an object or a dictionary.
     """
 
     def __init__(
         self,
-        list: List[Union[Descriptor, Dict[str, Any]]],
+        list: list[Union[Descriptor, dict[str, Any]]],
         ntypes: Optional[int] = None,
         spin: Optional[Spin] = None,
         **kwargs,
@@ -93,7 +90,7 @@ def get_dim_out(self) -> int:
 
     def get_nlist(
         self,
-    ) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    ) -> tuple[tf.Tensor, tf.Tensor, list[int], list[int]]:
         """Get the neighbor information of the descriptor, returns the
         nlist of the descriptor with the largest cut-off radius.
 
@@ -111,7 +108,7 @@ def get_nlist(
         maxr_idx = np.argmax([ii.get_rcut() for ii in self.descrpt_list])
         return self.get_nlist_i(maxr_idx)
 
-    def get_nlist_i(self, ii: int) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist_i(self, ii: int) -> tuple[tf.Tensor, tf.Tensor, list[int], list[int]]:
         """Get the neighbor information of the ii-th descriptor.
 
         Parameters
@@ -275,7 +272,7 @@ def build(
 
     def prod_force_virial(
         self, atom_ener: tf.Tensor, natoms: tf.Tensor
-    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    ) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """Compute force and virial.
 
         Parameters
@@ -385,7 +382,7 @@ def init_variables(
         for idx, ii in enumerate(self.descrpt_list):
             ii.init_variables(graph, graph_def, suffix=f"{suffix}_{idx}")
 
-    def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
+    def get_tensor_names(self, suffix: str = "") -> tuple[str]:
         """Get names of tensors.
 
         Parameters
@@ -395,7 +392,7 @@ def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
 
         Returns
         -------
-        Tuple[str]
+        tuple[str]
             Names of tensors
         """
         tensor_names = []
@@ -429,9 +426,9 @@ def explicit_ntypes(self) -> bool:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/tf/descriptor/loc_frame.py b/deepmd/tf/descriptor/loc_frame.py
index 4891c5a55f..74ba755b4c 100644
--- a/deepmd/tf/descriptor/loc_frame.py
+++ b/deepmd/tf/descriptor/loc_frame.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -60,9 +58,9 @@ class DescrptLocFrame(Descriptor):
     def __init__(
         self,
         rcut: float,
-        sel_a: List[int],
-        sel_r: List[int],
-        axis_rule: List[int],
+        sel_a: list[int],
+        sel_r: list[int],
+        axis_rule: list[int],
         **kwargs,
     ) -> None:
         """Constructor."""
@@ -142,7 +140,7 @@ def get_dim_out(self) -> int:
         """Returns the output dimension of this descriptor."""
         return self.ndescrpt
 
-    def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist(self) -> tuple[tf.Tensor, tf.Tensor, list[int], list[int]]:
         """Returns
         -------
         nlist
@@ -320,7 +318,7 @@ def get_rot_mat(self) -> tf.Tensor:
 
     def prod_force_virial(
         self, atom_ener: tf.Tensor, natoms: tf.Tensor
-    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    ) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """Compute force and virial.
 
         Parameters
@@ -437,9 +435,9 @@ def init_variables(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/tf/descriptor/se.py b/deepmd/tf/descriptor/se.py
index f5f54550f2..319a65f6da 100644
--- a/deepmd/tf/descriptor/se.py
+++ b/deepmd/tf/descriptor/se.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import re
 from typing import (
-    List,
     Optional,
-    Set,
-    Tuple,
 )
 
 from deepmd.dpmodel.utils.network import (
@@ -80,7 +77,7 @@ def _identity_tensors(self, suffix: str = "") -> None:
         self.rij = tf.identity(self.rij, name="o_rij" + suffix)
         self.nlist = tf.identity(self.nlist, name="o_nlist" + suffix)
 
-    def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
+    def get_tensor_names(self, suffix: str = "") -> tuple[str]:
         """Get names of tensors.
 
         Parameters
@@ -90,7 +87,7 @@ def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
 
         Returns
         -------
-        Tuple[str]
+        tuple[str]
             Names of tensors
         """
         return (
@@ -157,9 +154,9 @@ def precision(self) -> tf.DType:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -190,11 +187,11 @@ def serialize_network(
         ntypes: int,
         ndim: int,
         in_dim: int,
-        neuron: List[int],
+        neuron: list[int],
         activation_function: str,
         resnet_dt: bool,
         variables: dict,
-        excluded_types: Set[Tuple[int, int]] = set(),
+        excluded_types: set[tuple[int, int]] = set(),
         suffix: str = "",
     ) -> dict:
         """Serialize network.
@@ -207,7 +204,7 @@ def serialize_network(
             The dimension of elements
         in_dim : int
             The input dimension
-        neuron : List[int]
+        neuron : list[int]
             The neuron list
         activation_function : str
             The activation function
@@ -215,7 +212,7 @@ def serialize_network(
             Whether to use resnet
         variables : dict
             The input variables
-        excluded_types : Set[Tuple[int, int]], optional
+        excluded_types : set[tuple[int, int]], optional
             The excluded types
         suffix : str, optional
             The suffix of the scope
diff --git a/deepmd/tf/descriptor/se_a.py b/deepmd/tf/descriptor/se_a.py
index 721e8e71d1..d5a8ed6815 100644
--- a/deepmd/tf/descriptor/se_a.py
+++ b/deepmd/tf/descriptor/se_a.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -141,7 +139,7 @@ class DescrptSeA(DescrptSe):
             Random seed for initializing the network parameters.
     type_one_side
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     set_davg_zero
@@ -154,7 +152,7 @@ class DescrptSeA(DescrptSe):
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     env_protection: float
             Protection parameter to prevent division by zero errors during environment matrix calculations.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
 
     References
@@ -169,21 +167,21 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
         seed: Optional[int] = None,
         type_one_side: bool = True,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
         spin: Optional[Spin] = None,
         tebd_input_mode: str = "concat",
-        type_map: Optional[List[str]] = None,  # to be compat with input
+        type_map: Optional[list[str]] = None,  # to be compat with input
         env_protection: float = 0.0,  # not implement!!
         **kwargs,
     ) -> None:
@@ -327,7 +325,7 @@ def get_dim_rot_mat_1(self) -> int:
         """Returns the first dimension of the rotation matrix. The rotation is of shape dim_1 x 3."""
         return self.filter_neuron[-1]
 
-    def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist(self) -> tuple[tf.Tensor, tf.Tensor, list[int], list[int]]:
         """Returns neighbor information.
 
         Returns
@@ -696,7 +694,7 @@ def get_rot_mat(self) -> tf.Tensor:
 
     def prod_force_virial(
         self, atom_ener: tf.Tensor, natoms: tf.Tensor
-    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    ) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """Compute force and virial.
 
         Parameters
diff --git a/deepmd/tf/descriptor/se_a_ebd.py b/deepmd/tf/descriptor/se_a_ebd.py
index c558cd285e..ae76308e69 100644
--- a/deepmd/tf/descriptor/se_a_ebd.py
+++ b/deepmd/tf/descriptor/se_a_ebd.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
 )
 
@@ -65,7 +64,7 @@ class DescrptSeAEbd(DescrptSeA):
             The activation function in the embedding net. Supported options are {0}
     precision
             The precision of the embedding net parameters. Supported options are {1}
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     """
@@ -74,8 +73,8 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
@@ -87,7 +86,7 @@ def __init__(
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = "default",
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         **kwargs,
     ) -> None:
         """Constructor."""
@@ -600,7 +599,7 @@ def _ebd_filter(
         return result, qmat
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         data_requirement = super().input_requirement
         if self.numb_aparam > 0:
diff --git a/deepmd/tf/descriptor/se_a_ebd_v2.py b/deepmd/tf/descriptor/se_a_ebd_v2.py
index 9afa6598d1..af43eedbbc 100644
--- a/deepmd/tf/descriptor/se_a_ebd_v2.py
+++ b/deepmd/tf/descriptor/se_a_ebd_v2.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
-    List,
     Optional,
 )
 
@@ -31,14 +30,14 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
         seed: Optional[int] = None,
         type_one_side: bool = True,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = "default",
diff --git a/deepmd/tf/descriptor/se_a_ef.py b/deepmd/tf/descriptor/se_a_ef.py
index 81f4c8955a..9f70464c56 100644
--- a/deepmd/tf/descriptor/se_a_ef.py
+++ b/deepmd/tf/descriptor/se_a_ef.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -57,7 +55,7 @@ class DescrptSeAEf(DescrptSe):
             Random seed for initializing the network parameters.
     type_one_side
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     set_davg_zero
@@ -74,14 +72,14 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
         seed: Optional[int] = None,
         type_one_side: bool = True,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = "default",
@@ -144,7 +142,7 @@ def get_rot_mat(self) -> tf.Tensor:
         """Get rotational matrix."""
         return self.qmat
 
-    def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist(self) -> tuple[tf.Tensor, tf.Tensor, list[int], list[int]]:
         """Returns neighbor information.
 
         Returns
@@ -267,7 +265,7 @@ def build(
 
     def prod_force_virial(
         self, atom_ener: tf.Tensor, natoms: tf.Tensor
-    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    ) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """Compute force and virial.
 
         Parameters
@@ -305,14 +303,14 @@ def __init__(
         op,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
         seed: Optional[int] = None,
         type_one_side: bool = True,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = "default",
@@ -586,7 +584,7 @@ def _compute_dstats_sys_smth(
         return sysr, sysr2, sysa, sysa2, sysn
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         data_requirement = super().input_requirement
         data_requirement.append(
diff --git a/deepmd/tf/descriptor/se_a_mask.py b/deepmd/tf/descriptor/se_a_mask.py
index 316a909be1..e12f6a0fff 100644
--- a/deepmd/tf/descriptor/se_a_mask.py
+++ b/deepmd/tf/descriptor/se_a_mask.py
@@ -2,10 +2,7 @@
 import warnings
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -100,7 +97,7 @@ class DescrptSeAMask(DescrptSeA):
             Random seed for initializing the network parameters.
     type_one_side
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     activation_function
@@ -120,13 +117,13 @@ class DescrptSeAMask(DescrptSeA):
 
     def __init__(
         self,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
         type_one_side: bool = False,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         seed: Optional[int] = None,
         activation_function: str = "tanh",
         precision: str = "default",
@@ -271,7 +268,7 @@ def build(
         natoms: tf.Tensor,
         box_: tf.Tensor,
         mesh: tf.Tensor,
-        input_dict: Dict[str, Any],
+        input_dict: dict[str, Any],
         reuse: Optional[bool] = None,
         suffix: str = "",
     ) -> tf.Tensor:
@@ -384,7 +381,7 @@ def prod_force_virial(
         self,
         atom_ener: tf.Tensor,
         natoms: tf.Tensor,
-    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    ) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """Compute force and virial.
 
         Parameters
@@ -430,9 +427,9 @@ def prod_force_virial(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py
index 37bcd7eea0..963e81ecf0 100644
--- a/deepmd/tf/descriptor/se_atten.py
+++ b/deepmd/tf/descriptor/se_atten.py
@@ -4,10 +4,7 @@
 import warnings
 from typing import (
     Any,
-    List,
     Optional,
-    Set,
-    Tuple,
     Union,
 )
 
@@ -125,7 +122,7 @@ class DescrptSeAtten(DescrptSeA):
             If 'False', type embeddings of both neighbor and central atoms are considered.
             If 'True', only type embeddings of neighbor atoms are considered.
             Default is 'False'.
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     set_davg_zero: bool
@@ -162,7 +159,7 @@ class DescrptSeAtten(DescrptSeA):
             Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'.
             Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'.
             The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
 
     Raises
@@ -175,16 +172,16 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
-        neuron: List[int] = [25, 50, 100],
+        neuron: list[int] = [25, 50, 100],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
         seed: Optional[int] = None,
         type_one_side: bool = True,
         set_davg_zero: bool = True,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
@@ -203,7 +200,7 @@ def __init__(
         concat_output_tebd: bool = True,
         env_protection: float = 0.0,  # not implement!!
         stripped_type_embedding: Optional[bool] = None,
-        type_map: Optional[List[str]] = None,  # to be compat with input
+        type_map: Optional[list[str]] = None,  # to be compat with input
         **kwargs,
     ) -> None:
         # Ensure compatibility with the deprecated stripped_type_embedding option.
@@ -1420,9 +1417,9 @@ def compat_ln_pattern(old_key):
 
     def build_type_exclude_mask_mixed(
         self,
-        exclude_types: Set[Tuple[int, int]],
+        exclude_types: set[tuple[int, int]],
         ntypes: int,
-        sel: List[int],
+        sel: list[int],
         ndescrpt: int,
         atype: tf.Tensor,
         shape0: tf.Tensor,
@@ -1441,12 +1438,12 @@ def build_type_exclude_mask_mixed(
 
         Parameters
         ----------
-        exclude_types : List[Tuple[int, int]]
+        exclude_types : list[tuple[int, int]]
             The list of excluded types, e.g. [(0, 1), (1, 0)] means the interaction
             between type 0 and type 1 is excluded.
         ntypes : int
             The number of types.
-        sel : List[int]
+        sel : list[int]
             The list of the number of selected neighbors for each type.
         ndescrpt : int
             The number of descriptors for each atom.
@@ -1511,9 +1508,9 @@ def explicit_ntypes(self) -> bool:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -1646,7 +1643,7 @@ def serialize_network_strip(
         ntypes: int,
         ndim: int,
         in_dim: int,
-        neuron: List[int],
+        neuron: list[int],
         activation_function: str,
         resnet_dt: bool,
         variables: dict,
@@ -1663,7 +1660,7 @@ def serialize_network_strip(
             The dimension of elements
         in_dim : int
             The input dimension
-        neuron : List[int]
+        neuron : list[int]
             The neuron list
         activation_function : str
             The activation function
@@ -2055,7 +2052,7 @@ class DescrptDPA1Compat(DescrptSeAtten):
     attn_mask: bool
             (Only support False to keep consistent with other backend references.)
             If mask the diagonal of attention weights
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     env_protection: float
@@ -2088,7 +2085,7 @@ class DescrptDPA1Compat(DescrptSeAtten):
             Whether to use electronic configuration type embedding.
     use_tebd_bias : bool, Optional
             Whether to use bias in the type embedding layer.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     spin
             (Only support None to keep consistent with old implementation.)
@@ -2099,9 +2096,9 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: Union[List[int], int],
+        sel: Union[list[int], int],
         ntypes: int,
-        neuron: List[int] = [25, 50, 100],
+        neuron: list[int] = [25, 50, 100],
         axis_neuron: int = 8,
         tebd_dim: int = 8,
         tebd_input_mode: str = "concat",
@@ -2112,7 +2109,7 @@ def __init__(
         attn_layer: int = 2,
         attn_dotr: bool = True,
         attn_mask: bool = False,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         env_protection: float = 0.0,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
@@ -2126,7 +2123,7 @@ def __init__(
         concat_output_tebd: bool = True,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         spin: Optional[Any] = None,
         # consistent with argcheck, not used though
         seed: Optional[int] = None,
diff --git a/deepmd/tf/descriptor/se_atten_v2.py b/deepmd/tf/descriptor/se_atten_v2.py
index a4fdf24a55..dc71f87523 100644
--- a/deepmd/tf/descriptor/se_atten_v2.py
+++ b/deepmd/tf/descriptor/se_atten_v2.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
-    List,
     Optional,
 )
 
@@ -44,7 +43,7 @@ class DescrptSeAttenV2(DescrptSeAtten):
             Random seed for initializing the network parameters.
     type_one_side
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     set_davg_zero
@@ -71,14 +70,14 @@ def __init__(
         rcut_smth: float,
         sel: int,
         ntypes: int,
-        neuron: List[int] = [24, 48, 96],
+        neuron: list[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
         seed: Optional[int] = None,
         type_one_side: bool = True,
         set_davg_zero: bool = False,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
diff --git a/deepmd/tf/descriptor/se_r.py b/deepmd/tf/descriptor/se_r.py
index cd99651314..8096ef7c96 100644
--- a/deepmd/tf/descriptor/se_r.py
+++ b/deepmd/tf/descriptor/se_r.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -76,7 +74,7 @@ class DescrptSeR(DescrptSe):
             Random seed for initializing the network parameters.
     type_one_side
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     activation_function
@@ -85,7 +83,7 @@ class DescrptSeR(DescrptSe):
             The precision of the embedding net parameters. Supported options are |PRECISION|
     uniform_seed
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -93,19 +91,19 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         resnet_dt: bool = False,
         trainable: bool = True,
         seed: Optional[int] = None,
         type_one_side: bool = True,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
         spin: Optional[Spin] = None,
-        type_map: Optional[List[str]] = None,  # to be compat with input
+        type_map: Optional[list[str]] = None,  # to be compat with input
         env_protection: float = 0.0,  # not implement!!
         **kwargs,
     ) -> None:
@@ -488,7 +486,7 @@ def build(
 
     def prod_force_virial(
         self, atom_ener: tf.Tensor, natoms: tf.Tensor
-    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    ) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """Compute force and virial.
 
         Parameters
diff --git a/deepmd/tf/descriptor/se_t.py b/deepmd/tf/descriptor/se_t.py
index d5f5e2ab8a..f96b1ba778 100644
--- a/deepmd/tf/descriptor/se_t.py
+++ b/deepmd/tf/descriptor/se_t.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import re
 from typing import (
-    List,
     Optional,
-    Set,
-    Tuple,
 )
 
 import numpy as np
@@ -90,7 +87,7 @@ class DescrptSeT(DescrptSe):
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     env_protection: float
             Protection parameter to prevent division by zero errors during environment matrix calculations.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -98,17 +95,17 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[int],
-        neuron: List[int] = [24, 48, 96],
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
         resnet_dt: bool = False,
         trainable: bool = True,
         seed: Optional[int] = None,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
-        type_map: Optional[List[str]] = None,  # to be compat with input
+        type_map: Optional[list[str]] = None,  # to be compat with input
         env_protection: float = 0.0,  # not implement!!
         **kwargs,
     ) -> None:
@@ -212,7 +209,7 @@ def get_dim_out(self) -> int:
         """Returns the output dimension of this descriptor."""
         return self.filter_neuron[-1]
 
-    def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist(self) -> tuple[tf.Tensor, tf.Tensor, list[int], list[int]]:
         """Returns neighbor information.
 
         Returns
@@ -495,7 +492,7 @@ def build(
 
     def prod_force_virial(
         self, atom_ener: tf.Tensor, natoms: tf.Tensor
-    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    ) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """Compute force and virial.
 
         Parameters
@@ -724,11 +721,11 @@ def serialize_network(
         ntypes: int,
         ndim: int,
         in_dim: int,
-        neuron: List[int],
+        neuron: list[int],
         activation_function: str,
         resnet_dt: bool,
         variables: dict,
-        excluded_types: Set[Tuple[int, int]] = set(),
+        excluded_types: set[tuple[int, int]] = set(),
         suffix: str = "",
     ) -> dict:
         """Serialize network.
@@ -741,7 +738,7 @@ def serialize_network(
             The dimension of elements
         in_dim : int
             The input dimension
-        neuron : List[int]
+        neuron : list[int]
             The neuron list
         activation_function : str
             The activation function
@@ -749,7 +746,7 @@ def serialize_network(
             Whether to use resnet
         variables : dict
             The input variables
-        excluded_types : Set[Tuple[int, int]], optional
+        excluded_types : set[tuple[int, int]], optional
             The excluded types
         suffix : str, optional
             The suffix of the scope
diff --git a/deepmd/tf/entrypoints/freeze.py b/deepmd/tf/entrypoints/freeze.py
index 787d26e9a4..cee6615abc 100755
--- a/deepmd/tf/entrypoints/freeze.py
+++ b/deepmd/tf/entrypoints/freeze.py
@@ -15,7 +15,6 @@
     Path,
 )
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -80,7 +79,7 @@ def _make_node_names(
     modifier_type: Optional[str] = None,
     out_suffix: str = "",
     node_names: Optional[Union[str, list]] = None,
-) -> List[str]:
+) -> list[str]:
     """Get node names based on model type.
 
     Parameters
@@ -96,7 +95,7 @@ def _make_node_names(
 
     Returns
     -------
-    List[str]
+    list[str]
         list with all node names to freeze
 
     Raises
@@ -238,7 +237,7 @@ def freeze_graph(
         The default session.
     input_graph : tf.GraphDef
         The input graph_def stored from the checkpoint.
-    input_node : List[str]
+    input_node : list[str]
         The expected nodes to freeze.
     freeze_type : str
         The model type to freeze.
diff --git a/deepmd/tf/entrypoints/ipi.py b/deepmd/tf/entrypoints/ipi.py
index 1631a35c2e..1183375119 100644
--- a/deepmd/tf/entrypoints/ipi.py
+++ b/deepmd/tf/entrypoints/ipi.py
@@ -4,9 +4,6 @@
 import os
 import subprocess
 import sys
-from typing import (
-    List,
-)
 
 from deepmd.tf.lmp import (
     get_op_dir,
@@ -15,7 +12,7 @@
 ROOT_DIR = get_op_dir()
 
 
-def _program(name: str, args: List[str]):
+def _program(name: str, args: list[str]):
     """Execuate a program.
 
     Parameters
diff --git a/deepmd/tf/entrypoints/main.py b/deepmd/tf/entrypoints/main.py
index 493e5b7aa4..d9dff4eb4a 100644
--- a/deepmd/tf/entrypoints/main.py
+++ b/deepmd/tf/entrypoints/main.py
@@ -6,7 +6,6 @@
     Path,
 )
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -39,12 +38,12 @@
 __all__ = ["main", "parse_args", "get_ll", "main_parser"]
 
 
-def main(args: Optional[Union[List[str], argparse.Namespace]] = None):
+def main(args: Optional[Union[list[str], argparse.Namespace]] = None):
     """DeePMD-Kit entry point.
 
     Parameters
     ----------
-    args : List[str] or argparse.Namespace, optional
+    args : list[str] or argparse.Namespace, optional
         list of command line arguments, used to avoid calling from the subprocess,
         as it is quite slow to import tensorflow; if Namespace is given, it will
         be used directly
diff --git a/deepmd/tf/entrypoints/train.py b/deepmd/tf/entrypoints/train.py
index 12a3c59d70..66622b3182 100755
--- a/deepmd/tf/entrypoints/train.py
+++ b/deepmd/tf/entrypoints/train.py
@@ -9,7 +9,6 @@
 import time
 from typing import (
     Any,
-    Dict,
     Optional,
 )
 
@@ -186,12 +185,12 @@ def train(
     _do_work(jdata, run_opt, is_compress)
 
 
-def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = False):
+def _do_work(jdata: dict[str, Any], run_opt: RunOptions, is_compress: bool = False):
     """Run serial model training.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         arguments read form json/yaml control file
     run_opt : RunOptions
         object with run configuration
diff --git a/deepmd/tf/entrypoints/transfer.py b/deepmd/tf/entrypoints/transfer.py
index 7c90c77de8..b93caf3cac 100644
--- a/deepmd/tf/entrypoints/transfer.py
+++ b/deepmd/tf/entrypoints/transfer.py
@@ -3,10 +3,11 @@
 
 import logging
 import re
+from collections.abc import (
+    Sequence,
+)
 from typing import (
-    Dict,
     Optional,
-    Sequence,
 )
 
 import numpy as np
@@ -234,7 +235,7 @@ def check_dim(raw_graph_node: tf.Tensor, old_graph_node: tf.Tensor, node_name: s
         )
 
 
-def load_transform_node(graph: tf.Graph) -> Dict[str, tf.Tensor]:
+def load_transform_node(graph: tf.Graph) -> dict[str, tf.Tensor]:
     """Load nodes and their names from graph to dict.
 
     Parameters
@@ -244,7 +245,7 @@ def load_transform_node(graph: tf.Graph) -> Dict[str, tf.Tensor]:
 
     Returns
     -------
-    Dict[str, tf.Tensor]
+    dict[str, tf.Tensor]
         mapping on graph node names and corresponding tensors
     """
     transform_node_pattern = re.compile(TRANSFER_PATTERN)
diff --git a/deepmd/tf/fit/dipole.py b/deepmd/tf/fit/dipole.py
index fd37b63720..0e5b860fa2 100644
--- a/deepmd/tf/fit/dipole.py
+++ b/deepmd/tf/fit/dipole.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
 )
 
@@ -47,12 +46,12 @@ class DipoleFittingSeA(Fitting):
             The dimension of the descrptor :math:`\mathcal{D}`
     embedding_width
             The rotation matrix dimension of the descrptor :math:`\mathcal{D}`
-    neuron : List[int]
+    neuron : list[int]
             Number of neurons in each hidden layer of the fitting net
     resnet_dt : bool
             Time-step `dt` in the resnet construction:
             y = x + dt * \phi (Wx + b)
-    sel_type : List[int]
+    sel_type : list[int]
             The atom types selected to have an atomic dipole prediction. If is None, all atoms are selected.
     seed : int
             Random seed for initializing the network parameters.
@@ -65,7 +64,7 @@ class DipoleFittingSeA(Fitting):
     mixed_types : bool
         If true, use a uniform fitting net for all atom types, otherwise use
         different fitting nets for different atom types.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -74,15 +73,15 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         embedding_width: int,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
-        sel_type: Optional[List[int]] = None,
+        sel_type: Optional[list[int]] = None,
         seed: Optional[int] = None,
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
         mixed_types: bool = False,
-        type_map: Optional[List[str]] = None,  # to be compat with input
+        type_map: Optional[list[str]] = None,  # to be compat with input
         **kwargs,
     ) -> None:
         """Constructor."""
diff --git a/deepmd/tf/fit/dos.py b/deepmd/tf/fit/dos.py
index 382d11f45e..ebc347c2fd 100644
--- a/deepmd/tf/fit/dos.py
+++ b/deepmd/tf/fit/dos.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
-    List,
     Optional,
 )
 
@@ -100,7 +99,7 @@ class DOSFitting(Fitting):
     mixed_types : bool
         If true, use a uniform fitting net for all atom types, otherwise use
         different fitting nets for different atom types.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -108,21 +107,21 @@ def __init__(
         self,
         ntypes: int,
         dim_descrpt: int,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
         numb_dos: int = 300,
         rcond: Optional[float] = None,
-        trainable: Optional[List[bool]] = None,
+        trainable: Optional[list[bool]] = None,
         seed: Optional[int] = None,
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
-        layer_name: Optional[List[Optional[str]]] = None,
+        layer_name: Optional[list[Optional[str]]] = None,
         use_aparam_as_mask: bool = False,
         mixed_types: bool = False,
-        type_map: Optional[List[str]] = None,  # to be compat with input
+        type_map: Optional[list[str]] = None,  # to be compat with input
         **kwargs,
     ) -> None:
         """Constructor."""
@@ -738,7 +737,7 @@ def serialize(self, suffix: str = "") -> dict:
         return data
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         data_requirement = []
         if self.numb_fparam > 0:
diff --git a/deepmd/tf/fit/ener.py b/deepmd/tf/fit/ener.py
index c2aef0610a..b01574cf87 100644
--- a/deepmd/tf/fit/ener.py
+++ b/deepmd/tf/fit/ener.py
@@ -2,7 +2,6 @@
 import logging
 from typing import (
     TYPE_CHECKING,
-    List,
     Optional,
 )
 
@@ -149,7 +148,7 @@ class EnerFitting(Fitting):
     mixed_types : bool
         If true, use a uniform fitting net for all atom types, otherwise use
         different fitting nets for different atom types.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -157,23 +156,23 @@ def __init__(
         self,
         ntypes: int,
         dim_descrpt: int,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
         rcond: Optional[float] = None,
         tot_ener_zero: bool = False,
-        trainable: Optional[List[bool]] = None,
+        trainable: Optional[list[bool]] = None,
         seed: Optional[int] = None,
-        atom_ener: List[float] = [],
+        atom_ener: list[float] = [],
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
-        layer_name: Optional[List[Optional[str]]] = None,
+        layer_name: Optional[list[Optional[str]]] = None,
         use_aparam_as_mask: bool = False,
         spin: Optional[Spin] = None,
         mixed_types: bool = False,
-        type_map: Optional[List[str]] = None,  # to be compat with input
+        type_map: Optional[list[str]] = None,  # to be compat with input
         **kwargs,
     ) -> None:
         """Constructor."""
@@ -942,7 +941,7 @@ def serialize(self, suffix: str = "") -> dict:
         return data
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         data_requirement = []
         if self.numb_fparam > 0:
@@ -963,8 +962,8 @@ def input_requirement(self) -> List[DataRequirementItem]:
 def change_energy_bias_lower(
     data: DeepmdDataSystem,
     dp: DeepEval,
-    origin_type_map: List[str],
-    full_type_map: List[str],
+    origin_type_map: list[str],
+    full_type_map: list[str],
     bias_atom_e: np.ndarray,
     bias_adjust_mode="change-by-statistic",
     ntest=10,
diff --git a/deepmd/tf/fit/fitting.py b/deepmd/tf/fit/fitting.py
index 9190261187..f159de1628 100644
--- a/deepmd/tf/fit/fitting.py
+++ b/deepmd/tf/fit/fitting.py
@@ -4,7 +4,6 @@
     abstractmethod,
 )
 from typing import (
-    List,
     Optional,
 )
 
@@ -131,7 +130,7 @@ def serialize_network(
         ntypes: int,
         ndim: int,
         in_dim: int,
-        neuron: List[int],
+        neuron: list[int],
         activation_function: str,
         resnet_dt: bool,
         variables: dict,
@@ -148,7 +147,7 @@ def serialize_network(
             The dimension of elements
         in_dim : int
             The input dimension
-        neuron : List[int]
+        neuron : list[int]
             The neuron list
         activation_function : str
             The activation function
@@ -257,6 +256,6 @@ def deserialize_network(cls, data: dict, suffix: str = "") -> dict:
         return fitting_net_variables
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         return []
diff --git a/deepmd/tf/fit/polar.py b/deepmd/tf/fit/polar.py
index 14902a4d96..cc79e3402a 100644
--- a/deepmd/tf/fit/polar.py
+++ b/deepmd/tf/fit/polar.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import warnings
 from typing import (
-    List,
     Optional,
 )
 
@@ -52,18 +51,18 @@ class PolarFittingSeA(Fitting):
             The dimension of the descrptor :math:`\mathcal{D}`
     embedding_width
             The rotation matrix dimension of the descrptor :math:`\mathcal{D}`
-    neuron : List[int]
+    neuron : list[int]
             Number of neurons in each hidden layer of the fitting net
     resnet_dt : bool
             Time-step `dt` in the resnet construction:
             y = x + dt * \phi (Wx + b)
-    sel_type : List[int]
+    sel_type : list[int]
             The atom types selected to have an atomic polarizability prediction. If is None, all atoms are selected.
     fit_diag : bool
             Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix.
-    scale : List[float]
+    scale : list[float]
             The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i]
-    diag_shift : List[float]
+    diag_shift : list[float]
             The diagonal part of the polarizability matrix of type i will be shifted by diag_shift[i]. The shift operation is carried out after scale.
     seed : int
             Random seed for initializing the network parameters.
@@ -76,7 +75,7 @@ class PolarFittingSeA(Fitting):
     mixed_types : bool
         If true, use a uniform fitting net for all atom types, otherwise use
         different fitting nets for different atom types.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -85,19 +84,19 @@ def __init__(
         ntypes: int,
         dim_descrpt: int,
         embedding_width: int,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
-        sel_type: Optional[List[int]] = None,
+        sel_type: Optional[list[int]] = None,
         fit_diag: bool = True,
-        scale: Optional[List[float]] = None,
+        scale: Optional[list[float]] = None,
         shift_diag: bool = True,  # YWolfeee: will support the user to decide whether to use this function
-        # diag_shift : List[float] = None, YWolfeee: will not support the user to assign a shift
+        # diag_shift : list[float] = None, YWolfeee: will not support the user to assign a shift
         seed: Optional[int] = None,
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
         mixed_types: bool = False,
-        type_map: Optional[List[str]] = None,  # to be compat with input
+        type_map: Optional[list[str]] = None,  # to be compat with input
         **kwargs,
     ) -> None:
         """Constructor."""
@@ -153,7 +152,7 @@ def __init__(
         self.mixed_types = mixed_types
         self.type_map = type_map
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get selected atom types."""
         return self.sel_type
 
@@ -620,18 +619,18 @@ class GlobalPolarFittingSeA:
     ----------
     descrpt : tf.Tensor
             The descrptor
-    neuron : List[int]
+    neuron : list[int]
             Number of neurons in each hidden layer of the fitting net
     resnet_dt : bool
             Time-step `dt` in the resnet construction:
             y = x + dt * \phi (Wx + b)
-    sel_type : List[int]
+    sel_type : list[int]
             The atom types selected to have an atomic polarizability prediction
     fit_diag : bool
             Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix.
-    scale : List[float]
+    scale : list[float]
             The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i]
-    diag_shift : List[float]
+    diag_shift : list[float]
             The diagonal part of the polarizability matrix of type i will be shifted by diag_shift[i]. The shift operation is carried out after scale.
     seed : int
             Random seed for initializing the network parameters.
@@ -644,12 +643,12 @@ class GlobalPolarFittingSeA:
     def __init__(
         self,
         descrpt: tf.Tensor,
-        neuron: List[int] = [120, 120, 120],
+        neuron: list[int] = [120, 120, 120],
         resnet_dt: bool = True,
-        sel_type: Optional[List[int]] = None,
+        sel_type: Optional[list[int]] = None,
         fit_diag: bool = True,
-        scale: Optional[List[float]] = None,
-        diag_shift: Optional[List[float]] = None,
+        scale: Optional[list[float]] = None,
+        diag_shift: Optional[list[float]] = None,
         seed: Optional[int] = None,
         activation_function: str = "tanh",
         precision: str = "default",
diff --git a/deepmd/tf/infer/data_modifier.py b/deepmd/tf/infer/data_modifier.py
index 08966c3498..ddb1af68d7 100644
--- a/deepmd/tf/infer/data_modifier.py
+++ b/deepmd/tf/infer/data_modifier.py
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import os
-from typing import (
-    List,
-    Tuple,
-)
 
 import numpy as np
 
@@ -47,8 +43,8 @@ class DipoleChargeModifier(DeepDipole):
     def __init__(
         self,
         model_name: str,
-        model_charge_map: List[float],
-        sys_charge_map: List[float],
+        model_charge_map: list[float],
+        sys_charge_map: list[float],
         ewald_h: float = 1,
         ewald_beta: float = 1,
     ) -> None:
@@ -219,7 +215,7 @@ def eval(
         box: np.ndarray,
         atype: np.ndarray,
         eval_fv: bool = True,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Evaluate the modification.
 
         Parameters
diff --git a/deepmd/tf/infer/deep_eval.py b/deepmd/tf/infer/deep_eval.py
index 0f317bd21f..33725007f3 100644
--- a/deepmd/tf/infer/deep_eval.py
+++ b/deepmd/tf/infer/deep_eval.py
@@ -1,17 +1,13 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 from functools import (
-    lru_cache,
+    cache,
 )
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
-    List,
     Optional,
-    Tuple,
-    Type,
     Union,
 )
 
@@ -268,8 +264,8 @@ def _init_attr(self):
             self.modifier_type = None
 
     @property
-    @lru_cache(maxsize=None)
-    def model_type(self) -> Type["DeepEvalWrapper"]:
+    @cache
+    def model_type(self) -> type["DeepEvalWrapper"]:
         """Get type of model.
 
         :type:str
@@ -293,7 +289,7 @@ def model_type(self) -> Type["DeepEvalWrapper"]:
             raise RuntimeError(f"unknown model type {model_type}")
 
     @property
-    @lru_cache(maxsize=None)
+    @cache
     def model_version(self) -> str:
         """Get version of model.
 
@@ -312,7 +308,7 @@ def model_version(self) -> str:
             return mt.decode("utf-8")
 
     @property
-    @lru_cache(maxsize=None)
+    @cache
     def sess(self) -> tf.Session:
         """Get TF session."""
         # start a tf session associated to the graph
@@ -398,7 +394,7 @@ def _load_graph(
     def sort_input(
         coord: np.ndarray,
         atom_type: np.ndarray,
-        sel_atoms: Optional[List[int]] = None,
+        sel_atoms: Optional[list[int]] = None,
     ):
         """Sort atoms in the system according their types.
 
@@ -451,7 +447,7 @@ def sort_input(
             return coord, atom_type, idx_map, atom_type, idx_map
 
     @staticmethod
-    def reverse_map(vec: np.ndarray, imap: List[int]) -> np.ndarray:
+    def reverse_map(vec: np.ndarray, imap: list[int]) -> np.ndarray:
         """Reverse mapping of a vector according to the index map.
 
         Parameters
@@ -635,7 +631,7 @@ def get_rcut(self) -> float:
         """Get the cut-off radius of this model."""
         return self.rcut
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map (element name of the atom types) of this model."""
         return self.tmap
 
@@ -687,8 +683,8 @@ def eval_func(*args, **kwargs):
     def _get_natoms_and_nframes(
         self,
         coords: np.ndarray,
-        atom_types: Union[List[int], np.ndarray],
-    ) -> Tuple[int, int]:
+        atom_types: Union[list[int], np.ndarray],
+    ) -> tuple[int, int]:
         natoms = len(atom_types[0])
         if natoms == 0:
             assert coords.size == 0
@@ -707,7 +703,7 @@ def eval(
         aparam: Optional[np.ndarray] = None,
         efield: Optional[np.ndarray] = None,
         **kwargs: Any,
-    ) -> Dict[str, np.ndarray]:
+    ) -> dict[str, np.ndarray]:
         """Evaluate the energy, force and virial by using this DP.
 
         Parameters
@@ -1197,7 +1193,7 @@ def __init__(
         self.neighbor_list = neighbor_list
 
     @property
-    @lru_cache(maxsize=None)
+    @cache
     def model_type(self) -> str:
         """Get type of model.
 
@@ -1208,7 +1204,7 @@ def model_type(self) -> str:
         return mt.decode("utf-8")
 
     @property
-    @lru_cache(maxsize=None)
+    @cache
     def model_version(self) -> str:
         """Get version of model.
 
@@ -1227,7 +1223,7 @@ def model_version(self) -> str:
             return mt.decode("utf-8")
 
     @property
-    @lru_cache(maxsize=None)
+    @cache
     def sess(self) -> tf.Session:
         """Get TF session."""
         # start a tf session associated to the graph
@@ -1319,7 +1315,7 @@ def _load_graph(
     def sort_input(
         coord: np.ndarray,
         atom_type: np.ndarray,
-        sel_atoms: Optional[List[int]] = None,
+        sel_atoms: Optional[list[int]] = None,
         mixed_type: bool = False,
     ):
         """Sort atoms in the system according their types.
@@ -1382,7 +1378,7 @@ def sort_input(
             return coord, atom_type, idx_map
 
     @staticmethod
-    def reverse_map(vec: np.ndarray, imap: List[int]) -> np.ndarray:
+    def reverse_map(vec: np.ndarray, imap: list[int]) -> np.ndarray:
         """Reverse mapping of a vector according to the index map.
 
         Parameters
diff --git a/deepmd/tf/infer/deep_tensor.py b/deepmd/tf/infer/deep_tensor.py
index b0f2f244e1..a20bbfe513 100644
--- a/deepmd/tf/infer/deep_tensor.py
+++ b/deepmd/tf/infer/deep_tensor.py
@@ -2,10 +2,7 @@
 from typing import (
     TYPE_CHECKING,
     ClassVar,
-    Dict,
-    List,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -41,7 +38,7 @@ class DeepTensor(DeepEval):
         The neighbor list object. If None, then build the native neighbor list.
     """
 
-    tensors: ClassVar[Dict[str, str]] = {
+    tensors: ClassVar[dict[str, str]] = {
         # descriptor attrs
         "t_ntypes": "descrpt_attr/ntypes:0",
         "t_rcut": "descrpt_attr/rcut:0",
@@ -127,11 +124,11 @@ def get_rcut(self) -> float:
         """Get the cut-off radius of this model."""
         return self.rcut
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map (element name of the atom types) of this model."""
         return self.tmap
 
-    def get_sel_type(self) -> List[int]:
+    def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model."""
         return self.tselt
 
@@ -147,7 +144,7 @@ def eval(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: List[int],
+        atom_types: list[int],
         atomic: bool = True,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
@@ -277,13 +274,13 @@ def eval_full(
         self,
         coords: np.ndarray,
         cells: Optional[np.ndarray],
-        atom_types: List[int],
+        atom_types: list[int],
         atomic: bool = False,
         fparam: Optional[np.array] = None,
         aparam: Optional[np.array] = None,
         efield: Optional[np.array] = None,
         mixed_type: bool = False,
-    ) -> Tuple[np.ndarray, ...]:
+    ) -> tuple[np.ndarray, ...]:
         """Evaluate the model with interface similar to the energy model.
         Will return global tensor, component-wise force and virial
         and optionally atomic tensor and atomic virial.
diff --git a/deepmd/tf/infer/ewald_recp.py b/deepmd/tf/infer/ewald_recp.py
index 110188c34f..f4b7d86588 100644
--- a/deepmd/tf/infer/ewald_recp.py
+++ b/deepmd/tf/infer/ewald_recp.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Tuple,
-)
 
 import numpy as np
 
@@ -54,7 +51,7 @@ def __init__(self, hh, beta):
 
     def eval(
         self, coord: np.ndarray, charge: np.ndarray, box: np.ndarray
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Evaluate.
 
         Parameters
diff --git a/deepmd/tf/lmp.py b/deepmd/tf/lmp.py
index b2e47308ed..f3679847fc 100644
--- a/deepmd/tf/lmp.py
+++ b/deepmd/tf/lmp.py
@@ -10,7 +10,6 @@
     Path,
 )
 from typing import (
-    List,
     Optional,
 )
 
@@ -32,12 +31,12 @@
     find_libpython = None
 
 
-def get_env(paths: List[Optional[str]]) -> str:
+def get_env(paths: list[Optional[str]]) -> str:
     """Get the environment variable from given paths."""
     return ":".join(p for p in paths if p is not None)
 
 
-def get_library_path(module: str, filename: str) -> List[str]:
+def get_library_path(module: str, filename: str) -> list[str]:
     """Get library path from a module.
 
     Parameters
diff --git a/deepmd/tf/loss/dos.py b/deepmd/tf/loss/dos.py
index 385d2484a8..0b8efe26e0 100644
--- a/deepmd/tf/loss/dos.py
+++ b/deepmd/tf/loss/dos.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    List,
-)
 
 import numpy as np
 
@@ -211,7 +208,7 @@ def eval(self, sess, feed_dict, natoms):
         return results
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         data_requirements = []
         # data required
diff --git a/deepmd/tf/loss/ener.py b/deepmd/tf/loss/ener.py
index 7ecb185818..337046836b 100644
--- a/deepmd/tf/loss/ener.py
+++ b/deepmd/tf/loss/ener.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
 )
 
@@ -356,7 +355,7 @@ def eval(self, sess, feed_dict, natoms):
         return results
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         data_requirements = []
         # data required
@@ -726,7 +725,7 @@ def print_on_training(
         return print_str
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         data_requirements = []
         # data required
@@ -872,7 +871,7 @@ def eval(self, sess, feed_dict, natoms):
         return results
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         data_requirements = []
         # data required
diff --git a/deepmd/tf/loss/loss.py b/deepmd/tf/loss/loss.py
index ca90c2eb64..351da7b748 100644
--- a/deepmd/tf/loss/loss.py
+++ b/deepmd/tf/loss/loss.py
@@ -3,11 +3,6 @@
     ABCMeta,
     abstractmethod,
 )
-from typing import (
-    Dict,
-    List,
-    Tuple,
-)
 
 import numpy as np
 
@@ -27,10 +22,10 @@ def build(
         self,
         learning_rate: tf.Tensor,
         natoms: tf.Tensor,
-        model_dict: Dict[str, tf.Tensor],
-        label_dict: Dict[str, tf.Tensor],
+        model_dict: dict[str, tf.Tensor],
+        label_dict: dict[str, tf.Tensor],
         suffix: str,
-    ) -> Tuple[tf.Tensor, Dict[str, tf.Tensor]]:
+    ) -> tuple[tf.Tensor, dict[str, tf.Tensor]]:
         """Build the loss function graph.
 
         Parameters
@@ -58,7 +53,7 @@ def build(
     def eval(
         self,
         sess: tf.Session,
-        feed_dict: Dict[tf.placeholder, tf.Tensor],
+        feed_dict: dict[tf.placeholder, tf.Tensor],
         natoms: tf.Tensor,
     ) -> dict:
         """Eval the loss function.
@@ -98,5 +93,5 @@ def display_if_exist(loss: tf.Tensor, find_property: float) -> tf.Tensor:
 
     @property
     @abstractmethod
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
diff --git a/deepmd/tf/loss/tensor.py b/deepmd/tf/loss/tensor.py
index 4a70ae2a96..a5bcbbe025 100644
--- a/deepmd/tf/loss/tensor.py
+++ b/deepmd/tf/loss/tensor.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    List,
-)
 
 import numpy as np
 
@@ -142,7 +139,7 @@ def eval(self, sess, feed_dict, natoms):
         return results
 
     @property
-    def label_requirement(self) -> List[DataRequirementItem]:
+    def label_requirement(self) -> list[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
         data_requirements = []
         # data required
diff --git a/deepmd/tf/model/dos.py b/deepmd/tf/model/dos.py
index 61809eff30..7ab068da63 100644
--- a/deepmd/tf/model/dos.py
+++ b/deepmd/tf/model/dos.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -51,7 +50,7 @@ def __init__(
         descriptor: dict,
         fitting_net: dict,
         type_embedding: Optional[Union[dict, TypeEmbedNet]] = None,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         data_stat_nbatch: int = 10,
         data_stat_protect: float = 1e-2,
         **kwargs,
diff --git a/deepmd/tf/model/ener.py b/deepmd/tf/model/ener.py
index 66aaff8189..b21c920d9c 100644
--- a/deepmd/tf/model/ener.py
+++ b/deepmd/tf/model/ener.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -77,7 +76,7 @@ def __init__(
         descriptor: dict,
         fitting_net: dict,
         type_embedding: Optional[Union[dict, TypeEmbedNet]] = None,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         data_stat_nbatch: int = 10,
         data_stat_protect: float = 1e-2,
         use_srtab: Optional[str] = None,
diff --git a/deepmd/tf/model/frozen.py b/deepmd/tf/model/frozen.py
index 3e296c00f2..05700dc64e 100644
--- a/deepmd/tf/model/frozen.py
+++ b/deepmd/tf/model/frozen.py
@@ -6,9 +6,7 @@
     Enum,
 )
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -244,9 +242,9 @@ def get_type_map(self) -> list:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -285,7 +283,7 @@ def deserialize(cls, data: dict, suffix: str = ""):
         raise RuntimeError("Should not touch here.")
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         data_requirement = []
         numb_fparam = self.model.get_dim_fparam()
diff --git a/deepmd/tf/model/linear.py b/deepmd/tf/model/linear.py
index 1bd1644e54..4c75c2a1d5 100644
--- a/deepmd/tf/model/linear.py
+++ b/deepmd/tf/model/linear.py
@@ -8,9 +8,7 @@
     reduce,
 )
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -50,7 +48,7 @@ class LinearModel(Model):
         If "sum", the weights are set to be 1.
     """
 
-    def __init__(self, models: List[dict], weights: List[float], **kwargs):
+    def __init__(self, models: list[dict], weights: list[float], **kwargs):
         super().__init__(**kwargs)
         self.models = [Model(**model) for model in models]
         if isinstance(weights, list):
@@ -140,9 +138,9 @@ def get_type_map(self) -> list:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -175,7 +173,7 @@ def update_sel(
         return local_jdata_cpy, min_nbor_dist
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         return reduce(
             operator.iadd, [model.input_requirement for model in self.models], []
diff --git a/deepmd/tf/model/model.py b/deepmd/tf/model/model.py
index 5224fde473..833f8364ae 100644
--- a/deepmd/tf/model/model.py
+++ b/deepmd/tf/model/model.py
@@ -8,10 +8,7 @@
     Enum,
 )
 from typing import (
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -113,7 +110,7 @@ def __new__(cls, *args, **kwargs):
     def __init__(
         self,
         type_embedding: Optional[Union[dict, TypeEmbedNet]] = None,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         data_stat_nbatch: int = 10,
         data_bias_nsample: int = 10,
         data_stat_protect: float = 1e-2,
@@ -360,7 +357,7 @@ def build_type_embedding(
         return dout
 
     def _import_graph_def_from_frz_model(
-        self, frz_model: str, feed_dict: dict, return_elements: List[str]
+        self, frz_model: str, feed_dict: dict, return_elements: list[str]
     ):
         return_nodes = [x[:-2] for x in return_elements]
         graph, graph_def = load_graph_def(frz_model)
@@ -370,7 +367,7 @@ def _import_graph_def_from_frz_model(
         )
 
     def _import_graph_def_from_ckpt_meta(
-        self, ckpt_meta: str, feed_dict: dict, return_elements: List[str]
+        self, ckpt_meta: str, feed_dict: dict, return_elements: list[str]
     ):
         return_nodes = [x[:-2] for x in return_elements]
         with tf.Graph().as_default() as graph:
@@ -469,7 +466,7 @@ def get_feed_dict(
         box: tf.Tensor,
         mesh: tf.Tensor,
         **kwargs,
-    ) -> Dict[str, tf.Tensor]:
+    ) -> dict[str, tf.Tensor]:
         """Generate the feed_dict for current descriptor.
 
         Parameters
@@ -515,9 +512,9 @@ def get_feed_dict(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Notes
@@ -586,7 +583,7 @@ def serialize(self, suffix: str = "") -> dict:
 
     @property
     @abstractmethod
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
 
 
@@ -647,7 +644,7 @@ def __init__(
         descriptor: Union[dict, Descriptor],
         fitting_net: Union[dict, Fitting],
         type_embedding: Optional[Union[dict, TypeEmbedNet]] = None,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         **kwargs,
     ) -> None:
         super().__init__(
@@ -761,9 +758,9 @@ def get_ntypes(self) -> int:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -863,6 +860,6 @@ def serialize(self, suffix: str = "") -> dict:
         }
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         return self.descrpt.input_requirement + self.fitting.input_requirement
diff --git a/deepmd/tf/model/pairtab.py b/deepmd/tf/model/pairtab.py
index 29ddfe9499..d54940fec6 100644
--- a/deepmd/tf/model/pairtab.py
+++ b/deepmd/tf/model/pairtab.py
@@ -3,9 +3,7 @@
     Enum,
 )
 from typing import (
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -69,7 +67,7 @@ class PairTabModel(Model):
     model_type = "ener"
 
     def __init__(
-        self, tab_file: str, rcut: float, sel: Union[int, List[int]], **kwargs
+        self, tab_file: str, rcut: float, sel: Union[int, list[int]], **kwargs
     ):
         super().__init__()
         self.tab_file = tab_file
@@ -275,9 +273,9 @@ def enable_compression(self, suffix: str = "") -> None:
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Notes
@@ -308,6 +306,6 @@ def update_sel(
         return local_jdata_cpy, min_nbor_dist
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         return []
diff --git a/deepmd/tf/model/pairwise_dprc.py b/deepmd/tf/model/pairwise_dprc.py
index 6fd8e82f7e..c8a57d90b3 100644
--- a/deepmd/tf/model/pairwise_dprc.py
+++ b/deepmd/tf/model/pairwise_dprc.py
@@ -1,9 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 
@@ -53,7 +50,7 @@ def __init__(
         qm_model: dict,
         qmmm_model: dict,
         type_embedding: Union[dict, TypeEmbedNet],
-        type_map: List[str],
+        type_map: list[str],
         data_stat_nbatch: int = 10,
         data_stat_nsample: int = 10,
         data_stat_protect: float = 1e-2,
@@ -373,7 +370,7 @@ def get_feed_dict(
         box: tf.Tensor,
         mesh: tf.Tensor,
         **kwargs,
-    ) -> Dict[str, tf.Tensor]:
+    ) -> dict[str, tf.Tensor]:
         """Generate the feed_dict for current descriptor.
 
         Parameters
@@ -416,9 +413,9 @@ def get_feed_dict(
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
-    ) -> Tuple[dict, Optional[float]]:
+    ) -> tuple[dict, Optional[float]]:
         """Update the selection and perform neighbor statistics.
 
         Parameters
@@ -442,7 +439,7 @@ def update_sel(
         return local_jdata, min_nbor_dist
 
     @property
-    def input_requirement(self) -> List[DataRequirementItem]:
+    def input_requirement(self) -> list[DataRequirementItem]:
         """Return data requirements needed for the model input."""
         data_requirement = []
         data_requirement.append(
diff --git a/deepmd/tf/model/tensor.py b/deepmd/tf/model/tensor.py
index b2afe0d71f..8514844e03 100644
--- a/deepmd/tf/model/tensor.py
+++ b/deepmd/tf/model/tensor.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -50,7 +49,7 @@ def __init__(
         descriptor: dict,
         fitting_net: dict,
         type_embedding: Optional[Union[dict, TypeEmbedNet]] = None,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         data_stat_nbatch: int = 10,
         data_stat_protect: float = 1e-2,
         **kwargs,
diff --git a/deepmd/tf/nvnmd/utils/fio.py b/deepmd/tf/nvnmd/utils/fio.py
index 3efd7520dd..9daff62183 100644
--- a/deepmd/tf/nvnmd/utils/fio.py
+++ b/deepmd/tf/nvnmd/utils/fio.py
@@ -3,9 +3,6 @@
 import logging
 import os
 import struct
-from typing import (
-    List,
-)
 
 import numpy as np
 
@@ -168,7 +165,7 @@ def load(self, file_name="", default_value=""):
             log.warning(f"can not find {file_name}")
             return default_value
 
-    def save(self, file_name: str, data: List[str]):
+    def save(self, file_name: str, data: list[str]):
         r"""Save hex string into binary file."""
         log.info(f"write binary to {file_name}")
         Fio().create_file_path(file_name)
diff --git a/deepmd/tf/train/run_options.py b/deepmd/tf/train/run_options.py
index b835d63852..c36b42e194 100644
--- a/deepmd/tf/train/run_options.py
+++ b/deepmd/tf/train/run_options.py
@@ -8,7 +8,6 @@
 )
 from typing import (
     TYPE_CHECKING,
-    List,
     Optional,
 )
 
@@ -80,7 +79,7 @@ class RunOptions:
 
     Attributes
     ----------
-    gpus: Optional[List[int]]
+    gpus: Optional[list[int]]
         list of GPUs if any are present else None
     is_chief: bool
         in distribured training it is true for tha main MPI process in serail it is
@@ -91,17 +90,17 @@ class RunOptions:
         index of the MPI task
     nodename: str
         name of the node
-    node_list_ : List[str]
+    node_list_ : list[str]
         the list of nodes of the current mpirun
     my_device: str
         deviice type - gpu or cpu
     """
 
-    gpus: Optional[List[int]]
+    gpus: Optional[list[int]]
     world_size: int
     my_rank: int
     nodename: str
-    nodelist: List[int]
+    nodelist: list[int]
     my_device: str
 
     _HVD: Optional["HVD"]
diff --git a/deepmd/tf/train/trainer.py b/deepmd/tf/train/trainer.py
index 474af1da90..7f9aeb27d2 100644
--- a/deepmd/tf/train/trainer.py
+++ b/deepmd/tf/train/trainer.py
@@ -4,10 +4,6 @@
 import os
 import shutil
 import time
-from typing import (
-    Dict,
-    List,
-)
 
 import google.protobuf.message
 import numpy as np
@@ -891,7 +887,7 @@ def _change_energy_bias(
         )
 
     @property
-    def data_requirements(self) -> List[DataRequirementItem]:
+    def data_requirements(self) -> list[DataRequirementItem]:
         return self.model.input_requirement + self.loss.label_requirement
 
 
@@ -922,17 +918,17 @@ def __init__(self, train_data: DeepmdDataSystem):
         self.data_keys = batch_data.keys()
         self.data_types = [tf.as_dtype(x.dtype) for x in batch_data.values()]
 
-    def build(self) -> List[tf.Tensor]:
+    def build(self) -> list[tf.Tensor]:
         """Build the OP that loads the training data.
 
         Returns
         -------
-        List[tf.Tensor]
+        list[tf.Tensor]
             Tensor of the loaded data.
         """
         train_data = self.train_data
 
-        def get_train_batch() -> List[np.ndarray]:
+        def get_train_batch() -> list[np.ndarray]:
             batch_data = train_data.get_batch()
             # convert dict to list of arryas
             batch_data = tuple([batch_data[kk] for kk in self.data_keys])
@@ -940,17 +936,17 @@ def get_train_batch() -> List[np.ndarray]:
 
         return tf.py_func(get_train_batch, [], self.data_types, name="train_data")
 
-    def get_data_dict(self, batch_list: List[np.ndarray]) -> Dict[str, np.ndarray]:
+    def get_data_dict(self, batch_list: list[np.ndarray]) -> dict[str, np.ndarray]:
         """Generate a dict of the loaded data.
 
         Parameters
         ----------
-        batch_list : List[np.ndarray]
+        batch_list : list[np.ndarray]
             The loaded data.
 
         Returns
         -------
-        Dict[str, np.ndarray]
+        dict[str, np.ndarray]
             The dict of the loaded data.
         """
         return dict(zip(self.data_keys, batch_list))
diff --git a/deepmd/tf/utils/finetune.py b/deepmd/tf/utils/finetune.py
index 4e55b9f5bb..4c57246ffd 100644
--- a/deepmd/tf/utils/finetune.py
+++ b/deepmd/tf/utils/finetune.py
@@ -3,7 +3,6 @@
 import logging
 from typing import (
     Any,
-    Dict,
 )
 
 from deepmd.tf.utils.errors import (
@@ -17,13 +16,13 @@
 
 
 def replace_model_params_with_pretrained_model(
-    jdata: Dict[str, Any], pretrained_model: str
+    jdata: dict[str, Any], pretrained_model: str
 ):
     """Replace the model params in input script according to pretrained model.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         input script
     pretrained_model : str
         filename of the pretrained model
diff --git a/deepmd/tf/utils/graph.py b/deepmd/tf/utils/graph.py
index a891506e95..4fccaac0e8 100644
--- a/deepmd/tf/utils/graph.py
+++ b/deepmd/tf/utils/graph.py
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import re
-from typing import (
-    Dict,
-    Tuple,
-)
 
 import numpy as np
 
@@ -22,7 +18,7 @@
 )
 
 
-def load_graph_def(model_file: str) -> Tuple[tf.Graph, tf.GraphDef]:
+def load_graph_def(model_file: str) -> tuple[tf.Graph, tf.GraphDef]:
     """Load graph as well as the graph_def from the frozen model(model_file).
 
     Parameters
@@ -98,7 +94,7 @@ def get_tensor_by_name(model_file: str, tensor_name: str) -> tf.Tensor:
     return get_tensor_by_name_from_graph(graph, tensor_name)
 
 
-def get_pattern_nodes_from_graph_def(graph_def: tf.GraphDef, pattern: str) -> Dict:
+def get_pattern_nodes_from_graph_def(graph_def: tf.GraphDef, pattern: str) -> dict:
     """Get the pattern nodes with the given tf.GraphDef object.
 
     Parameters
@@ -123,7 +119,7 @@ def get_pattern_nodes_from_graph_def(graph_def: tf.GraphDef, pattern: str) -> Di
 
 def get_embedding_net_nodes_from_graph_def(
     graph_def: tf.GraphDef, suffix: str = ""
-) -> Dict:
+) -> dict:
     """Get the embedding net nodes with the given tf.GraphDef object.
 
     Parameters
@@ -154,7 +150,7 @@ def get_embedding_net_nodes_from_graph_def(
     return embedding_net_nodes
 
 
-def get_embedding_net_nodes(model_file: str, suffix: str = "") -> Dict:
+def get_embedding_net_nodes(model_file: str, suffix: str = "") -> dict:
     """Get the embedding net nodes with the given frozen model(model_file).
 
     Parameters
@@ -175,7 +171,7 @@ def get_embedding_net_nodes(model_file: str, suffix: str = "") -> Dict:
 
 def get_embedding_net_variables_from_graph_def(
     graph_def: tf.GraphDef, suffix: str = ""
-) -> Dict:
+) -> dict:
     """Get the embedding net variables with the given tf.GraphDef object.
 
     Parameters
@@ -220,7 +216,7 @@ def get_extra_embedding_net_nodes_from_graph_def(
     graph_def: tf.GraphDef,
     suffix: str = "",
     extra_suffix: str = "",
-) -> Dict:
+) -> dict:
     """Get the extra embedding net nodes with the given tf.GraphDef object.
 
     Parameters
@@ -259,7 +255,7 @@ def get_extra_embedding_net_variables_from_graph_def(
     graph_def: tf.GraphDef,
     suffix: str = "",
     extra_suffix: str = "",
-) -> Dict:
+) -> dict:
     """Get the embedding net variables with the given tf.GraphDef object.
 
     Parameters
@@ -282,7 +278,7 @@ def get_extra_embedding_net_variables_from_graph_def(
     return convert_tensor_to_ndarray_in_dict(extra_embedding_net_nodes)
 
 
-def get_embedding_net_variables(model_file: str, suffix: str = "") -> Dict:
+def get_embedding_net_variables(model_file: str, suffix: str = "") -> dict:
     """Get the embedding net variables with the given frozen model(model_file).
 
     Parameters
@@ -303,7 +299,7 @@ def get_embedding_net_variables(model_file: str, suffix: str = "") -> Dict:
 
 def get_fitting_net_nodes_from_graph_def(
     graph_def: tf.GraphDef, suffix: str = ""
-) -> Dict:
+) -> dict:
     """Get the fitting net nodes with the given tf.GraphDef object.
 
     Parameters
@@ -334,7 +330,7 @@ def get_fitting_net_nodes_from_graph_def(
     return fitting_net_nodes
 
 
-def get_fitting_net_nodes(model_file: str) -> Dict:
+def get_fitting_net_nodes(model_file: str) -> dict:
     """Get the fitting net nodes with the given frozen model(model_file).
 
     Parameters
@@ -353,7 +349,7 @@ def get_fitting_net_nodes(model_file: str) -> Dict:
 
 def get_fitting_net_variables_from_graph_def(
     graph_def: tf.GraphDef, suffix: str = ""
-) -> Dict:
+) -> dict:
     """Get the fitting net variables with the given tf.GraphDef object.
 
     Parameters
@@ -372,7 +368,7 @@ def get_fitting_net_variables_from_graph_def(
     return convert_tensor_to_ndarray_in_dict(fitting_net_nodes)
 
 
-def get_fitting_net_variables(model_file: str, suffix: str = "") -> Dict:
+def get_fitting_net_variables(model_file: str, suffix: str = "") -> dict:
     """Get the fitting net variables with the given frozen model(model_file).
 
     Parameters
@@ -393,7 +389,7 @@ def get_fitting_net_variables(model_file: str, suffix: str = "") -> Dict:
 
 def get_type_embedding_net_nodes_from_graph_def(
     graph_def: tf.GraphDef, suffix: str = ""
-) -> Dict:
+) -> dict:
     """Get the type embedding net nodes with the given tf.GraphDef object.
 
     Parameters
@@ -425,7 +421,7 @@ def get_type_embedding_net_nodes_from_graph_def(
 
 def get_type_embedding_net_variables_from_graph_def(
     graph_def: tf.GraphDef, suffix: str = ""
-) -> Dict:
+) -> dict:
     """Get the type embedding net variables with the given tf.GraphDef object.
 
     Parameters
@@ -448,7 +444,7 @@ def get_type_embedding_net_variables_from_graph_def(
 
 def get_attention_layer_nodes_from_graph_def(
     graph_def: tf.GraphDef, suffix: str = ""
-) -> Dict:
+) -> dict:
     """Get the attention layer nodes with the given tf.GraphDef object.
 
     Parameters
@@ -482,7 +478,7 @@ def get_attention_layer_nodes_from_graph_def(
 
 def get_attention_layer_variables_from_graph_def(
     graph_def: tf.GraphDef, suffix: str = ""
-) -> Dict:
+) -> dict:
     """Get the attention layer variables with the given tf.GraphDef object.
 
     Parameters
@@ -504,18 +500,18 @@ def get_attention_layer_variables_from_graph_def(
 
 
 def convert_tensor_to_ndarray_in_dict(
-    tensor_dict: Dict[str, tf.Tensor],
-) -> Dict[str, np.ndarray]:
+    tensor_dict: dict[str, tf.Tensor],
+) -> dict[str, np.ndarray]:
     """Convert tensor to ndarray in dict.
 
     Parameters
     ----------
-    tensor_dict : Dict[str, tf.Tensor]
+    tensor_dict : dict[str, tf.Tensor]
         The input tensor dict
 
     Returns
     -------
-    Dict[str, np.ndarray]
+    dict[str, np.ndarray]
         The converted tensor dict
     """
     for key in tensor_dict:
diff --git a/deepmd/tf/utils/neighbor_stat.py b/deepmd/tf/utils/neighbor_stat.py
index f668d4a4da..4052c89821 100644
--- a/deepmd/tf/utils/neighbor_stat.py
+++ b/deepmd/tf/utils/neighbor_stat.py
@@ -1,9 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
-from typing import (
+from collections.abc import (
     Iterator,
+)
+from typing import (
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -61,7 +62,7 @@ def build(
         atype: tf.Tensor,
         cell: tf.Tensor,
         pbc: tf.Tensor,
-    ) -> Tuple[tf.Tensor, tf.Tensor]:
+    ) -> tuple[tf.Tensor, tf.Tensor]:
         """Calculate the nearest neighbor distance between atoms, maximum nbor size of
         atoms and the output data range of the environment matrix.
 
@@ -187,7 +188,7 @@ def __init__(
             self.op = self.build()
         self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config)
 
-    def build(self) -> Tuple[tf.Tensor, tf.Tensor]:
+    def build(self) -> tuple[tf.Tensor, tf.Tensor]:
         """Build the graph.
 
         Returns
@@ -215,7 +216,7 @@ def build(self) -> Tuple[tf.Tensor, tf.Tensor]:
 
     def iterator(
         self, data: DeepmdDataSystem
-    ) -> Iterator[Tuple[np.ndarray, float, str]]:
+    ) -> Iterator[tuple[np.ndarray, float, str]]:
         """Produce data.
 
         Parameters
diff --git a/deepmd/tf/utils/parallel_op.py b/deepmd/tf/utils/parallel_op.py
index 5eeb1fab7f..ce43ea8c15 100644
--- a/deepmd/tf/utils/parallel_op.py
+++ b/deepmd/tf/utils/parallel_op.py
@@ -1,11 +1,11 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from collections.abc import (
+    Generator,
+)
 from typing import (
     Any,
     Callable,
-    Dict,
-    Generator,
     Optional,
-    Tuple,
 )
 
 from deepmd.tf.env import (
@@ -21,7 +21,7 @@ class ParallelOp:
 
     Parameters
     ----------
-    builder : Callable[..., Tuple[Dict[str, tf.Tensor], Tuple[tf.Tensor]]]
+    builder : Callable[..., tuple[dict[str, tf.Tensor], tuple[tf.Tensor]]]
         returns two objects: a dict which stores placeholders by key, and a tuple with the final op(s)
     nthreads : int, optional
         the number of threads
@@ -45,7 +45,7 @@ class ParallelOp:
 
     def __init__(
         self,
-        builder: Callable[..., Tuple[Dict[str, tf.Tensor], Tuple[tf.Tensor]]],
+        builder: Callable[..., tuple[dict[str, tf.Tensor], tuple[tf.Tensor]]],
         nthreads: Optional[int] = None,
         config: Optional[tf.ConfigProto] = None,
     ) -> None:
@@ -65,8 +65,8 @@ def __init__(
                 self.ops.append(op)
 
     def generate(
-        self, sess: tf.Session, feed: Generator[Dict[str, Any], None, None]
-    ) -> Generator[Tuple, None, None]:
+        self, sess: tf.Session, feed: Generator[dict[str, Any], None, None]
+    ) -> Generator[tuple, None, None]:
         """Returns a generator.
 
         Parameters
diff --git a/deepmd/tf/utils/spin.py b/deepmd/tf/utils/spin.py
index c20d4dcc7b..ab70bdf319 100644
--- a/deepmd/tf/utils/spin.py
+++ b/deepmd/tf/utils/spin.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
 )
 
@@ -25,9 +24,9 @@ class Spin:
 
     def __init__(
         self,
-        use_spin: Optional[List[bool]] = None,
-        spin_norm: Optional[List[float]] = None,
-        virtual_len: Optional[List[float]] = None,
+        use_spin: Optional[list[bool]] = None,
+        spin_norm: Optional[list[float]] = None,
+        virtual_len: Optional[list[float]] = None,
     ) -> None:
         """Constructor."""
         self.use_spin = use_spin
@@ -74,14 +73,14 @@ def get_ntypes_spin(self) -> int:
         """Returns the number of atom types which contain spin."""
         return self.ntypes_spin
 
-    def get_use_spin(self) -> List[bool]:
+    def get_use_spin(self) -> list[bool]:
         """Returns the list of whether to use spin for each atom type."""
         return self.use_spin
 
-    def get_spin_norm(self) -> List[float]:
+    def get_spin_norm(self) -> list[float]:
         """Returns the list of magnitude of atomic spin for each atom type."""
         return self.spin_norm
 
-    def get_virtual_len(self) -> List[float]:
+    def get_virtual_len(self) -> list[float]:
         """Returns the list of distance between real atom and virtual atom for each atom type."""
         return self.virtual_len
diff --git a/deepmd/tf/utils/tabulate.py b/deepmd/tf/utils/tabulate.py
index e1ab45c44f..afb94bb050 100644
--- a/deepmd/tf/utils/tabulate.py
+++ b/deepmd/tf/utils/tabulate.py
@@ -5,9 +5,6 @@
 )
 from typing import (
     Callable,
-    Dict,
-    List,
-    Tuple,
 )
 
 import numpy as np
@@ -53,7 +50,7 @@ class DPTabulate:
             The graph_def of the original model
     type_one_side
             Try to build N_types tables. Otherwise, building N_types^2 tables
-    exclude_types : List[List[int]]
+    exclude_types : list[list[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     activation_function
@@ -65,11 +62,11 @@ class DPTabulate:
     def __init__(
         self,
         descrpt: Descriptor,
-        neuron: List[int],
+        neuron: list[int],
         graph: tf.Graph,
         graph_def: tf.GraphDef,
         type_one_side: bool = False,
-        exclude_types: List[List[int]] = [],
+        exclude_types: list[list[int]] = [],
         activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.tanh,
         suffix: str = "",
     ) -> None:
@@ -160,7 +157,7 @@ def __init__(
 
     def build(
         self, min_nbor_dist: float, extrapolate: float, stride0: float, stride1: float
-    ) -> Tuple[Dict[str, int], Dict[str, int]]:
+    ) -> tuple[dict[str, int], dict[str, int]]:
         r"""Build the tables for model compression.
 
         Parameters
diff --git a/deepmd/tf/utils/type_embed.py b/deepmd/tf/utils/type_embed.py
index 7d74b0a856..13d02a858c 100644
--- a/deepmd/tf/utils/type_embed.py
+++ b/deepmd/tf/utils/type_embed.py
@@ -2,7 +2,6 @@
 import logging
 import re
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -105,7 +104,7 @@ class TypeEmbedNet:
             Whether to use electronic configuration type embedding.
     use_tebd_bias : bool, Optional
             Whether to use bias in the type embedding layer.
-    type_map: List[str], Optional
+    type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
 
@@ -113,7 +112,7 @@ def __init__(
         self,
         *,
         ntypes: int,
-        neuron: List[int],
+        neuron: list[int],
         resnet_dt: bool = False,
         activation_function: Union[str, None] = "tanh",
         precision: str = "default",
@@ -123,7 +122,7 @@ def __init__(
         padding: bool = False,
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         **kwargs,
     ) -> None:
         """Constructor."""
diff --git a/deepmd/tf/utils/update_sel.py b/deepmd/tf/utils/update_sel.py
index 726aec4d41..8915eb0147 100644
--- a/deepmd/tf/utils/update_sel.py
+++ b/deepmd/tf/utils/update_sel.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Type,
-)
 
 from deepmd.tf.utils.neighbor_stat import (
     NeighborStat,
@@ -13,5 +10,5 @@
 
 class UpdateSel(BaseUpdateSel):
     @property
-    def neighbor_stat(self) -> Type[NeighborStat]:
+    def neighbor_stat(self) -> type[NeighborStat]:
         return NeighborStat
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index a799b6b0c4..1a5e1cc3b2 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -4,8 +4,6 @@
 import warnings
 from typing import (
     Callable,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -92,7 +90,7 @@ def type_embedding_args():
     doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
 
     return [
-        Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron),
+        Argument("neuron", list[int], optional=True, default=[8], doc=doc_neuron),
         Argument(
             "activation_function",
             str,
@@ -136,22 +134,22 @@ def spin_args():
     )
 
     return [
-        Argument("use_spin", [List[bool], List[int]], doc=doc_use_spin),
+        Argument("use_spin", [list[bool], list[int]], doc=doc_use_spin),
         Argument(
             "spin_norm",
-            List[float],
+            list[float],
             optional=True,
             doc=doc_only_tf_supported + doc_spin_norm,
         ),
         Argument(
             "virtual_len",
-            List[float],
+            list[float],
             optional=True,
             doc=doc_only_tf_supported + doc_virtual_len,
         ),
         Argument(
             "virtual_scale",
-            [List[float], float],
+            [list[float], float],
             optional=True,
             doc=doc_only_pt_supported + doc_virtual_scale,
         ),
@@ -166,10 +164,10 @@ def __init__(self) -> None:
         self.__plugin = Plugin()
 
     def register(
-        self, name: str, alias: Optional[List[str]] = None, doc: str = ""
+        self, name: str, alias: Optional[list[str]] = None, doc: str = ""
     ) -> Callable[
-        [Union[Callable[[], Argument], Callable[[], List[Argument]]]],
-        Union[Callable[[], Argument], Callable[[], List[Argument]]],
+        [Union[Callable[[], Argument], Callable[[], list[Argument]]]],
+        Union[Callable[[], Argument], Callable[[], list[Argument]]],
     ]:
         """Register a descriptor argument plugin.
 
@@ -177,12 +175,12 @@ def register(
         ----------
         name : str
             the name of a descriptor
-        alias : List[str], optional
+        alias : list[str], optional
             the list of aliases of this descriptor
 
         Returns
         -------
-        Callable[[Union[Callable[[], Argument], Callable[[], List[Argument]]]], Union[Callable[[], Argument], Callable[[], List[Argument]]]]
+        Callable[[Union[Callable[[], Argument], Callable[[], list[Argument]]]], Union[Callable[[], Argument], Callable[[], list[Argument]]]]
             decorator to return the registered descriptor argument method
 
         Examples
@@ -197,7 +195,7 @@ def descrpt_some_descrpt_args():
             alias = tuple(alias)
         return self.__plugin.register((name, alias, doc))
 
-    def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]:
+    def get_all_argument(self, exclude_hybrid: bool = False) -> list[Argument]:
         """Get all arguments.
 
         Parameters
@@ -207,7 +205,7 @@ def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]:
 
         Returns
         -------
-        List[Argument]
+        list[Argument]
             all arguments
         """
         arguments = []
@@ -245,17 +243,17 @@ def descrpt_local_frame_args():
 - axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance."
 
     return [
-        Argument("sel_a", List[int], optional=False, doc=doc_sel_a),
-        Argument("sel_r", List[int], optional=False, doc=doc_sel_r),
+        Argument("sel_a", list[int], optional=False, doc=doc_sel_a),
+        Argument("sel_r", list[int], optional=False, doc=doc_sel_r),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("axis_rule", List[int], optional=False, doc=doc_axis_rule),
+        Argument("axis_rule", list[int], optional=False, doc=doc_axis_rule),
     ]
 
 
 @descrpt_args_plugin.register("se_e2_a", alias=["se_a"])
 def descrpt_se_a_args():
     doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
@@ -272,11 +270,11 @@ def descrpt_se_a_args():
     doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
 
     return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("sel", [list[int], str], optional=True, default="auto", doc=doc_sel),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
         Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+            "neuron", list[int], optional=True, default=[10, 20, 40], doc=doc_neuron
         ),
         Argument(
             "axis_neuron",
@@ -302,7 +300,7 @@ def descrpt_se_a_args():
         Argument("seed", [int, None], optional=True, doc=doc_seed),
         Argument(
             "exclude_types",
-            List[List[int]],
+            list[list[int]],
             optional=True,
             default=[],
             doc=doc_exclude_types,
@@ -323,7 +321,7 @@ def descrpt_se_a_args():
 @descrpt_args_plugin.register("se_e3", alias=["se_at", "se_a_3be", "se_t"])
 def descrpt_se_t_args():
     doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
@@ -338,11 +336,11 @@ def descrpt_se_t_args():
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
 
     return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("sel", [list[int], str], optional=True, default="auto", doc=doc_sel),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
         Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+            "neuron", list[int], optional=True, default=[10, 20, 40], doc=doc_neuron
         ),
         Argument(
             "activation_function",
@@ -360,7 +358,7 @@ def descrpt_se_t_args():
         ),
         Argument(
             "exclude_types",
-            List[List[int]],
+            list[list[int]],
             optional=True,
             default=[],
             doc=doc_exclude_types,
@@ -392,7 +390,7 @@ def descrpt_se_a_tpe_args():
 @descrpt_args_plugin.register("se_e2_r", alias=["se_r"])
 def descrpt_se_r_args():
     doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
@@ -408,11 +406,11 @@ def descrpt_se_r_args():
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
 
     return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("sel", [list[int], str], optional=True, default="auto", doc=doc_sel),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
         Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+            "neuron", list[int], optional=True, default=[10, 20, 40], doc=doc_neuron
         ),
         Argument(
             "activation_function",
@@ -430,7 +428,7 @@ def descrpt_se_r_args():
         Argument("seed", [int, None], optional=True, doc=doc_seed),
         Argument(
             "exclude_types",
-            List[List[int]],
+            list[list[int]],
             optional=True,
             default=[],
             doc=doc_exclude_types,
@@ -469,7 +467,7 @@ def descrpt_hybrid_args():
 def descrpt_se_atten_common_args():
     doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
+    - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
@@ -490,12 +488,12 @@ def descrpt_se_atten_common_args():
 
     return [
         Argument(
-            "sel", [int, List[int], str], optional=True, default="auto", doc=doc_sel
+            "sel", [int, list[int], str], optional=True, default="auto", doc=doc_sel
         ),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
         Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+            "neuron", list[int], optional=True, default=[10, 20, 40], doc=doc_neuron
         ),
         Argument(
             "axis_neuron",
@@ -521,7 +519,7 @@ def descrpt_se_atten_common_args():
         Argument("seed", [int, None], optional=True, doc=doc_seed),
         Argument(
             "exclude_types",
-            List[List[int]],
+            list[list[int]],
             optional=True,
             default=[],
             doc=doc_exclude_types,
@@ -666,7 +664,7 @@ def descrpt_se_atten_args():
 def descrpt_se_e3_tebd_args():
     doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
+    - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
@@ -697,12 +695,12 @@ def descrpt_se_e3_tebd_args():
 
     return [
         Argument(
-            "sel", [int, List[int], str], optional=True, default="auto", doc=doc_sel
+            "sel", [int, list[int], str], optional=True, default="auto", doc=doc_sel
         ),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
         Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+            "neuron", list[int], optional=True, default=[10, 20, 40], doc=doc_neuron
         ),
         Argument(
             "tebd_dim",
@@ -745,7 +743,7 @@ def descrpt_se_e3_tebd_args():
         ),
         Argument(
             "exclude_types",
-            List[List[int]],
+            list[list[int]],
             optional=True,
             default=[],
             doc=doc_exclude_types,
@@ -898,7 +896,7 @@ def descrpt_dpa2_args():
         Argument("smooth", bool, optional=True, default=True, doc=doc_smooth),
         Argument(
             "exclude_types",
-            List[List[int]],
+            list[list[int]],
             optional=True,
             default=[],
             doc=doc_exclude_types,
@@ -1338,7 +1336,7 @@ def descrpt_se_a_ebd_v2_args():
 @descrpt_args_plugin.register("se_a_mask", doc=doc_only_tf_supported)
 def descrpt_se_a_mask_args():
     doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
 
     doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -1352,9 +1350,9 @@ def descrpt_se_a_mask_args():
     doc_seed = "Random seed for parameter initialization"
 
     return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("sel", [list[int], str], optional=True, default="auto", doc=doc_sel),
         Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+            "neuron", list[int], optional=True, default=[10, 20, 40], doc=doc_neuron
         ),
         Argument(
             "axis_neuron",
@@ -1377,7 +1375,7 @@ def descrpt_se_a_mask_args():
         ),
         Argument(
             "exclude_types",
-            List[List[int]],
+            list[list[int]],
             optional=True,
             default=[],
             doc=doc_exclude_types,
@@ -1451,7 +1449,7 @@ def fitting_ener():
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
         Argument(
             "neuron",
-            List[int],
+            list[int],
             optional=True,
             default=[120, 120, 120],
             alias=["n_neuron"],
@@ -1468,7 +1466,7 @@ def fitting_ener():
         Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
         Argument(
             "trainable",
-            [List[bool], bool],
+            [list[bool], bool],
             optional=True,
             default=True,
             doc=doc_trainable,
@@ -1479,12 +1477,12 @@ def fitting_ener():
         Argument("seed", [int, None], optional=True, doc=doc_seed),
         Argument(
             "atom_ener",
-            List[Optional[float]],
+            list[Optional[float]],
             optional=True,
             default=[],
             doc=doc_atom_ener,
         ),
-        Argument("layer_name", List[str], optional=True, doc=doc_layer_name),
+        Argument("layer_name", list[str], optional=True, doc=doc_layer_name),
         Argument(
             "use_aparam_as_mask",
             bool,
@@ -1516,7 +1514,7 @@ def fitting_dos():
         Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
         Argument(
-            "neuron", List[int], optional=True, default=[120, 120, 120], doc=doc_neuron
+            "neuron", list[int], optional=True, default=[120, 120, 120], doc=doc_neuron
         ),
         Argument(
             "activation_function",
@@ -1529,7 +1527,7 @@ def fitting_dos():
         Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
         Argument(
             "trainable",
-            [List[bool], bool],
+            [list[bool], bool],
             optional=True,
             default=True,
             doc=doc_trainable,
@@ -1559,7 +1557,7 @@ def fitting_property():
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
         Argument(
             "neuron",
-            List[int],
+            list[int],
             optional=True,
             default=[120, 120, 120],
             alias=["n_neuron"],
@@ -1601,7 +1599,7 @@ def fitting_polar():
     return [
         Argument(
             "neuron",
-            List[int],
+            list[int],
             optional=True,
             default=[120, 120, 120],
             alias=["n_neuron"],
@@ -1618,13 +1616,13 @@ def fitting_polar():
         Argument("precision", str, optional=True, default="default", doc=doc_precision),
         Argument("fit_diag", bool, optional=True, default=True, doc=doc_fit_diag),
         Argument(
-            "scale", [List[float], float], optional=True, default=1.0, doc=doc_scale
+            "scale", [list[float], float], optional=True, default=1.0, doc=doc_scale
         ),
         # Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
         Argument("shift_diag", bool, optional=True, default=True, doc=doc_shift_diag),
         Argument(
             "sel_type",
-            [List[int], int, None],
+            [list[int], int, None],
             optional=True,
             alias=["pol_type"],
             doc=doc_sel_type + doc_only_tf_supported,
@@ -1648,7 +1646,7 @@ def fitting_dipole():
     return [
         Argument(
             "neuron",
-            List[int],
+            list[int],
             optional=True,
             default=[120, 120, 120],
             alias=["n_neuron"],
@@ -1665,7 +1663,7 @@ def fitting_dipole():
         Argument("precision", str, optional=True, default="default", doc=doc_precision),
         Argument(
             "sel_type",
-            [List[int], int, None],
+            [list[int], int, None],
             optional=True,
             alias=["dipole_type"],
             doc=doc_sel_type + doc_only_tf_supported,
@@ -1702,9 +1700,9 @@ def modifier_dipole_charge():
     return [
         Argument("model_name", str, optional=False, doc=doc_model_name),
         Argument(
-            "model_charge_map", List[float], optional=False, doc=doc_model_charge_map
+            "model_charge_map", list[float], optional=False, doc=doc_model_charge_map
         ),
-        Argument("sys_charge_map", List[float], optional=False, doc=doc_sys_charge_map),
+        Argument("sys_charge_map", list[float], optional=False, doc=doc_sys_charge_map),
         Argument("ewald_beta", float, optional=True, default=0.4, doc=doc_ewald_beta),
         Argument("ewald_h", float, optional=True, default=1.0, doc=doc_ewald_h),
     ]
@@ -1733,7 +1731,7 @@ def model_compression():
 
     return [
         Argument("model_file", str, optional=False, doc=doc_model_file),
-        Argument("table_config", List[float], optional=False, doc=doc_table_config),
+        Argument("table_config", list[float], optional=False, doc=doc_table_config),
         Argument("min_nbor_dist", float, optional=False, doc=doc_min_nbor_dist),
     ]
 
@@ -1785,7 +1783,7 @@ def model_args(exclude_hybrid=False):
         "model",
         dict,
         [
-            Argument("type_map", List[str], optional=True, doc=doc_type_map),
+            Argument("type_map", list[str], optional=True, doc=doc_type_map),
             Argument(
                 "data_stat_nbatch",
                 int,
@@ -1837,7 +1835,7 @@ def model_args(exclude_hybrid=False):
             ),
             Argument(
                 "preset_out_bias",
-                Dict[str, List[Optional[Union[float, List[float]]]]],
+                dict[str, list[Optional[Union[float, list[float]]]]],
                 optional=True,
                 default=None,
                 doc=doc_only_pt_supported + doc_preset_out_bias,
@@ -1960,7 +1958,7 @@ def pairtab_model_args() -> Argument:
     doc_rcut = "The cut-off radius."
     doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
+    - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     ca = Argument(
         "pairtab",
@@ -1968,7 +1966,7 @@ def pairtab_model_args() -> Argument:
         [
             Argument("tab_file", str, optional=False, doc=doc_tab_file),
             Argument("rcut", float, optional=False, doc=doc_rcut),
-            Argument("sel", [int, List[int], str], optional=False, doc=doc_sel),
+            Argument("sel", [int, list[int], str], optional=False, doc=doc_sel),
         ],
         doc=doc_only_tf_supported + "Pairwise tabulation energy model.",
     )
@@ -2494,11 +2492,11 @@ def training_data_args():  # ! added by Ziyao: new specification style for data
 
     args = [
         Argument(
-            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
+            "systems", [list[str], str], optional=False, default=".", doc=doc_systems
         ),
         Argument(
             "batch_size",
-            [List[int], int, str],
+            [list[int], int, str],
             optional=True,
             default="auto",
             doc=doc_batch_size,
@@ -2515,7 +2513,7 @@ def training_data_args():  # ! added by Ziyao: new specification style for data
         ),
         Argument(
             "sys_probs",
-            List[float],
+            list[float],
             optional=True,
             default=None,
             doc=doc_sys_probs,
@@ -2560,11 +2558,11 @@ def validation_data_args():  # ! added by Ziyao: new specification style for dat
 
     args = [
         Argument(
-            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
+            "systems", [list[str], str], optional=False, default=".", doc=doc_systems
         ),
         Argument(
             "batch_size",
-            [List[int], int, str],
+            [list[int], int, str],
             optional=True,
             default="auto",
             doc=doc_batch_size,
@@ -2581,7 +2579,7 @@ def validation_data_args():  # ! added by Ziyao: new specification style for dat
         ),
         Argument(
             "sys_probs",
-            List[float],
+            list[float],
             optional=True,
             default=None,
             doc=doc_sys_probs,
@@ -2877,7 +2875,7 @@ def gen_json(multi_task: bool = False, **kwargs) -> str:
     )
 
 
-def gen_args(multi_task: bool = False) -> List[Argument]:
+def gen_args(multi_task: bool = False) -> list[Argument]:
     if not multi_task:
         return [
             model_args(),
diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index 8fe67ad6fc..0394993854 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -7,7 +7,6 @@
 )
 from typing import (
     Callable,
-    Tuple,
 )
 
 import array_api_compat
@@ -81,7 +80,7 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
 
     def execute(
         self, callable: Callable, start_index: int, natoms: int
-    ) -> Tuple[int, tuple]:
+    ) -> tuple[int, tuple]:
         """Excuate a method with given batch size.
 
         Parameters
@@ -153,7 +152,7 @@ def _adjust_batch_size(self, factor: float):
 
     def execute_all(
         self, callable: Callable, total_size: int, natoms: int, *args, **kwargs
-    ) -> Tuple[np.ndarray]:
+    ) -> tuple[np.ndarray]:
         """Excuate a method with all given data.
 
         This method is compatible with Array API.
@@ -174,7 +173,7 @@ def execute_all(
 
         def execute_with_batch_size(
             batch_size: int, start_index: int
-        ) -> Tuple[int, Tuple[np.ndarray]]:
+        ) -> tuple[int, tuple[np.ndarray]]:
             end_index = start_index + batch_size
             end_index = min(end_index, total_size)
             return (end_index - start_index), callable(
diff --git a/deepmd/utils/compat.py b/deepmd/utils/compat.py
index edd01b8291..83cbe46fad 100644
--- a/deepmd/utils/compat.py
+++ b/deepmd/utils/compat.py
@@ -3,14 +3,15 @@
 
 import json
 import warnings
+from collections.abc import (
+    Sequence,
+)
 from pathlib import (
     Path,
 )
 from typing import (
     Any,
-    Dict,
     Optional,
-    Sequence,
     Union,
 )
 
@@ -22,13 +23,13 @@
 
 
 def convert_input_v0_v1(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
+    jdata: dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> dict[str, Any]:
     """Convert input from v0 format to v1.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         loaded json/yaml file
     warning : bool, optional
         whether to show deprecation warning, by default True
@@ -37,7 +38,7 @@ def convert_input_v0_v1(
 
     Returns
     -------
-    Dict[str, Any]
+    dict[str, Any]
         converted output
     """
     output = {}
@@ -63,19 +64,19 @@ def _warning_input_v0_v1(fname: Optional[Union[str, Path]]):
     warnings.warn(msg)
 
 
-def _model(jdata: Dict[str, Any], smooth: bool) -> Dict[str, Dict[str, Any]]:
+def _model(jdata: dict[str, Any], smooth: bool) -> dict[str, dict[str, Any]]:
     """Convert data to v1 input for non-smooth model.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         parsed input json/yaml data
     smooth : bool
         whether to use smooth or non-smooth descriptor version
 
     Returns
     -------
-    Dict[str, Dict[str, Any]]
+    dict[str, dict[str, Any]]
         dictionary with model input parameters and sub-dictionaries for descriptor and
         fitting net
     """
@@ -87,17 +88,17 @@ def _model(jdata: Dict[str, Any], smooth: bool) -> Dict[str, Dict[str, Any]]:
     return model
 
 
-def _nonsmth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
+def _nonsmth_descriptor(jdata: dict[str, Any]) -> dict[str, Any]:
     """Convert data to v1 input for non-smooth descriptor.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         parsed input json/yaml data
 
     Returns
     -------
-    Dict[str, Any]
+    dict[str, Any]
         dict with descriptor parameters
     """
     descriptor = {}
@@ -106,17 +107,17 @@ def _nonsmth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
     return descriptor
 
 
-def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
+def _smth_descriptor(jdata: dict[str, Any]) -> dict[str, Any]:
     """Convert data to v1 input for smooth descriptor.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         parsed input json/yaml data
 
     Returns
     -------
-    Dict[str, Any]
+    dict[str, Any]
         dict with descriptor parameters
     """
     descriptor = {}
@@ -136,17 +137,17 @@ def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
     return descriptor
 
 
-def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]:
+def _fitting_net(jdata: dict[str, Any]) -> dict[str, Any]:
     """Convert data to v1 input for fitting net.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         parsed input json/yaml data
 
     Returns
     -------
-    Dict[str, Any]
+    dict[str, Any]
         dict with fitting net parameters
     """
     fitting_net = {}
@@ -163,17 +164,17 @@ def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]:
     return fitting_net
 
 
-def _learning_rate(jdata: Dict[str, Any]) -> Dict[str, Any]:
+def _learning_rate(jdata: dict[str, Any]) -> dict[str, Any]:
     """Convert data to v1 input for learning rate section.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         parsed input json/yaml data
 
     Returns
     -------
-    Dict[str, Any]
+    dict[str, Any]
         dict with learning rate parameters
     """
     learning_rate = {}
@@ -182,20 +183,20 @@ def _learning_rate(jdata: Dict[str, Any]) -> Dict[str, Any]:
     return learning_rate
 
 
-def _loss(jdata: Dict[str, Any]) -> Dict[str, Any]:
+def _loss(jdata: dict[str, Any]) -> dict[str, Any]:
     """Convert data to v1 input for loss function.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         parsed input json/yaml data
 
     Returns
     -------
-    Dict[str, Any]
+    dict[str, Any]
         dict with loss function parameters
     """
-    loss: Dict[str, Any] = {}
+    loss: dict[str, Any] = {}
     _jcopy(
         jdata,
         loss,
@@ -215,17 +216,17 @@ def _loss(jdata: Dict[str, Any]) -> Dict[str, Any]:
     return loss
 
 
-def _training(jdata: Dict[str, Any]) -> Dict[str, Any]:
+def _training(jdata: dict[str, Any]) -> dict[str, Any]:
     """Convert data to v1 input for training.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         parsed input json/yaml data
 
     Returns
     -------
-    Dict[str, Any]
+    dict[str, Any]
         dict with training parameters
     """
     training = {}
@@ -250,14 +251,14 @@ def _training(jdata: Dict[str, Any]) -> Dict[str, Any]:
     return training
 
 
-def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]):
+def _jcopy(src: dict[str, Any], dst: dict[str, Any], keys: Sequence[str]):
     """Copy specified keys from one dict to another.
 
     Parameters
     ----------
-    src : Dict[str, Any]
+    src : dict[str, Any]
         source dictionary
-    dst : Dict[str, Any]
+    dst : dict[str, Any]
         destination dictionary, will be modified in place
     keys : Sequence[str]
         list of keys to copy
@@ -267,12 +268,12 @@ def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]):
             dst[k] = src[k]
 
 
-def remove_decay_rate(jdata: Dict[str, Any]):
+def remove_decay_rate(jdata: dict[str, Any]):
     """Convert decay_rate to stop_lr.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         input data
     """
     lr = jdata["learning_rate"]
@@ -287,8 +288,8 @@ def remove_decay_rate(jdata: Dict[str, Any]):
 
 
 def convert_input_v1_v2(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
+    jdata: dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> dict[str, Any]:
     tr_cfg = jdata["training"]
     tr_data_keys = {
         "systems",
@@ -334,15 +335,15 @@ def _warning_input_v1_v2(fname: Optional[Union[str, Path]]):
 
 
 def deprecate_numb_test(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
+    jdata: dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> dict[str, Any]:
     """Deprecate `numb_test` since v2.1. It has taken no effect since v2.0.
 
     See `#1243 <https://github.com/deepmodeling/deepmd-kit/discussions/1243>`_.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
+    jdata : dict[str, Any]
         loaded json/yaml file
     warning : bool, optional
         whether to show deprecation warning, by default True
@@ -351,7 +352,7 @@ def deprecate_numb_test(
 
     Returns
     -------
-    Dict[str, Any]
+    dict[str, Any]
         converted output
     """
     try:
@@ -372,8 +373,8 @@ def deprecate_numb_test(
 
 
 def update_deepmd_input(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
+    jdata: dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> dict[str, Any]:
     def is_deepmd_v0_input(jdata):
         return "model" not in jdata.keys()
 
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 5d324afb95..4c77bcf59a 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -4,7 +4,6 @@
 import bisect
 import logging
 from typing import (
-    List,
     Optional,
 )
 
@@ -53,7 +52,7 @@ def __init__(
         sys_path: str,
         set_prefix: str = "set",
         shuffle_test: bool = True,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         optional_type_map: bool = True,
         modifier=None,
         trn_all_set: bool = False,
@@ -134,7 +133,7 @@ def add(
         atomic: bool = False,
         must: bool = False,
         high_prec: bool = False,
-        type_sel: Optional[List[int]] = None,
+        type_sel: Optional[list[int]] = None,
         repeat: int = 1,
         default: float = 0.0,
         dtype: Optional[np.dtype] = None,
@@ -304,11 +303,11 @@ def get_ntypes(self) -> int:
         else:
             return max(self.get_atom_type()) + 1
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.type_map
 
-    def get_atom_type(self) -> List[int]:
+    def get_atom_type(self) -> list[int]:
         """Get atom types."""
         return self.atom_type
 
@@ -738,7 +737,7 @@ def __init__(
         atomic: bool = False,
         must: bool = False,
         high_prec: bool = False,
-        type_sel: Optional[List[int]] = None,
+        type_sel: Optional[list[int]] = None,
         repeat: int = 1,
         default: float = 0.0,
         dtype: Optional[np.dtype] = None,
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 235930527b..e499163e6a 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -3,12 +3,10 @@
 import logging
 import warnings
 from functools import (
-    lru_cache,
+    cache,
 )
 from typing import (
     Any,
-    Dict,
-    List,
     Optional,
     Union,
 )
@@ -45,13 +43,13 @@ class DeepmdDataSystem:
 
     def __init__(
         self,
-        systems: List[str],
+        systems: list[str],
         batch_size: int,
         test_size: int,
         rcut: Optional[float] = None,
         set_prefix: str = "set",
         shuffle_test: bool = True,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
         optional_type_map: bool = True,
         modifier=None,
         trn_all_set=False,
@@ -241,8 +239,8 @@ def _load_test(self, ntests=-1):
                 self.test_data[nn].append(test_system_data[nn])
 
     @property
-    @lru_cache(maxsize=None)
-    def default_mesh(self) -> List[np.ndarray]:
+    @cache
+    def default_mesh(self) -> list[np.ndarray]:
         """Mesh for each system."""
         return [
             make_default_mesh(
@@ -266,7 +264,7 @@ def compute_energy_shift(self, rcond=None, key="energy"):
         )
         return energy_shift.ravel()
 
-    def add_dict(self, adict: Dict[str, Dict[str, Any]]) -> None:
+    def add_dict(self, adict: dict[str, dict[str, Any]]) -> None:
         """Add items to the data system by a `dict`.
         `adict` should have items like
         .. code-block:: python.
@@ -299,7 +297,7 @@ def add_dict(self, adict: Dict[str, Dict[str, Any]]) -> None:
             )
 
     def add_data_requirements(
-        self, data_requirements: List[DataRequirementItem]
+        self, data_requirements: list[DataRequirementItem]
     ) -> None:
         """Add items to the data system by a list of `DataRequirementItem`."""
         self.add_dict({rr.key: rr.dict for rr in data_requirements})
@@ -311,7 +309,7 @@ def add(
         atomic: bool = False,
         must: bool = False,
         high_prec: bool = False,
-        type_sel: Optional[List[int]] = None,
+        type_sel: Optional[list[int]] = None,
         repeat: int = 1,
         default: float = 0.0,
         dtype: Optional[np.dtype] = None,
@@ -468,7 +466,7 @@ def get_batch_mixed(self) -> dict:
         b_data = self._merge_batch_data(batch_data)
         return b_data
 
-    def _merge_batch_data(self, batch_data: List[dict]) -> dict:
+    def _merge_batch_data(self, batch_data: list[dict]) -> dict:
         """Merge batch data from different systems.
 
         Parameters
@@ -550,7 +548,7 @@ def get_sys_ntest(self, sys_idx=None):
         else:
             return self.test_size[self.pick_idx]
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.type_map
 
@@ -635,12 +633,12 @@ def _format_name_length(name, width):
 def print_summary(
     name: str,
     nsystems: int,
-    system_dirs: List[str],
-    natoms: List[int],
-    batch_size: List[int],
-    nbatches: List[int],
-    sys_probs: List[float],
-    pbc: List[bool],
+    system_dirs: list[str],
+    natoms: list[int],
+    batch_size: list[int],
+    nbatches: list[int],
+    sys_probs: list[float],
+    pbc: list[bool],
 ):
     """Print summary of systems.
 
@@ -732,7 +730,7 @@ def prob_sys_size_ext(keywords, nsystems, nbatch):
     return sys_probs
 
 
-def process_systems(systems: Union[str, List[str]]) -> List[str]:
+def process_systems(systems: Union[str, list[str]]) -> list[str]:
     """Process the user-input systems.
 
     If it is a single directory, search for all the systems in the directory.
@@ -773,7 +771,7 @@ def process_systems(systems: Union[str, List[str]]) -> List[str]:
 
 
 def get_data(
-    jdata: Dict[str, Any], rcut, type_map, modifier, multi_task_mode=False
+    jdata: dict[str, Any], rcut, type_map, modifier, multi_task_mode=False
 ) -> DeepmdDataSystem:
     """Get the data system.
 
diff --git a/deepmd/utils/econf_embd.py b/deepmd/utils/econf_embd.py
index 7f12206ae3..99c7edf284 100644
--- a/deepmd/utils/econf_embd.py
+++ b/deepmd/utils/econf_embd.py
@@ -1,8 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Dict,
-    List,
-)
 
 import numpy as np
 from mendeleev import (
@@ -228,8 +224,8 @@ def make_element_embedding_list_vec(
 
 
 def make_econf_embedding(
-    types: List[str], flatten: bool = True
-) -> Dict[str, np.ndarray]:
+    types: list[str], flatten: bool = True
+) -> dict[str, np.ndarray]:
     """Make the electronic configuration embedding."""
     all_ret = {}
     for ii in types:
@@ -240,7 +236,7 @@ def make_econf_embedding(
     return all_ret
 
 
-def transform_to_spin_rep(res: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
+def transform_to_spin_rep(res: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
     """Tranform electron occupation of 0/1/2 to -1,-1/-1,1/1,1."""
     ret = {}
 
@@ -261,7 +257,7 @@ def transform(ii):
     return ret
 
 
-def print_econf_embedding(res: Dict[str, np.ndarray]):
+def print_econf_embedding(res: dict[str, np.ndarray]):
     """Print electron configuration embedding."""
     for kk, vv in res.items():
         vvstr = ",".join([str(ii) for ii in vv])
diff --git a/deepmd/utils/env_mat_stat.py b/deepmd/utils/env_mat_stat.py
index bbb43fd703..ecc0b7b62f 100644
--- a/deepmd/utils/env_mat_stat.py
+++ b/deepmd/utils/env_mat_stat.py
@@ -7,10 +7,10 @@
 from collections import (
     defaultdict,
 )
-from typing import (
-    Dict,
+from collections.abc import (
     Iterator,
-    List,
+)
+from typing import (
     Optional,
 )
 
@@ -98,12 +98,12 @@ def __init__(self) -> None:
         super().__init__()
         self.stats = defaultdict(StatItem)
 
-    def compute_stats(self, data: List[Dict[str, np.ndarray]]) -> None:
+    def compute_stats(self, data: list[dict[str, np.ndarray]]) -> None:
         """Compute the statistics of the environment matrix.
 
         Parameters
         ----------
-        data : List[Dict[str, np.ndarray]]
+        data : list[dict[str, np.ndarray]]
             The environment matrix.
         """
         if len(self.stats) > 0:
@@ -113,17 +113,17 @@ def compute_stats(self, data: List[Dict[str, np.ndarray]]) -> None:
                 self.stats[kk] += iter_stats[kk]
 
     @abstractmethod
-    def iter(self, data: List[Dict[str, np.ndarray]]) -> Iterator[Dict[str, StatItem]]:
+    def iter(self, data: list[dict[str, np.ndarray]]) -> Iterator[dict[str, StatItem]]:
         """Get the iterator of the environment matrix.
 
         Parameters
         ----------
-        data : List[Dict[str, np.ndarray]]
+        data : list[dict[str, np.ndarray]]
             The environment matrix.
 
         Yields
         ------
-        Dict[str, StatItem]
+        dict[str, StatItem]
             The statistics of the environment matrix.
         """
 
@@ -160,7 +160,7 @@ def load_stats(self, path: DPPath) -> None:
             )
 
     def load_or_compute_stats(
-        self, data: List[Dict[str, np.ndarray]], path: Optional[DPPath] = None
+        self, data: list[dict[str, np.ndarray]], path: Optional[DPPath] = None
     ) -> None:
         """Load the statistics of the environment matrix if it exists, otherwise compute and save it.
 
@@ -168,7 +168,7 @@ def load_or_compute_stats(
         ----------
         path : DPPath
             The path to load the statistics of the environment matrix.
-        data : List[Dict[str, np.ndarray]]
+        data : list[dict[str, np.ndarray]]
             The environment matrix.
         """
         if path is not None and path.is_dir():
@@ -180,7 +180,7 @@ def load_or_compute_stats(
                 self.save_stats(path)
                 log.info(f"Save stats to {path}.")
 
-    def get_avg(self, default: float = 0) -> Dict[str, float]:
+    def get_avg(self, default: float = 0) -> dict[str, float]:
         """Get the average of the environment matrix.
 
         Parameters
@@ -190,14 +190,14 @@ def get_avg(self, default: float = 0) -> Dict[str, float]:
 
         Returns
         -------
-        Dict[str, float]
+        dict[str, float]
             The average of the environment matrix.
         """
         return {kk: vv.compute_avg(default=default) for kk, vv in self.stats.items()}
 
     def get_std(
         self, default: float = 1e-1, protection: float = 1e-2
-    ) -> Dict[str, float]:
+    ) -> dict[str, float]:
         """Get the standard deviation of the environment matrix.
 
         Parameters
@@ -209,7 +209,7 @@ def get_std(
 
         Returns
         -------
-        Dict[str, float]
+        dict[str, float]
             The standard deviation of the environment matrix.
         """
         return {
diff --git a/deepmd/utils/finetune.py b/deepmd/utils/finetune.py
index 9baa1b5aa8..d8d035a853 100644
--- a/deepmd/utils/finetune.py
+++ b/deepmd/utils/finetune.py
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
-from typing import (
-    List,
-    Tuple,
-)
 
 log = logging.getLogger(__name__)
 
@@ -11,8 +7,8 @@
 class FinetuneRuleItem:
     def __init__(
         self,
-        p_type_map: List[str],
-        type_map: List[str],
+        p_type_map: list[str],
+        type_map: list[str],
         model_branch: str = "Default",
         random_fitting: bool = False,
         resuming: bool = False,
@@ -74,21 +70,21 @@ def get_finetune_tmap(self):
 
 
 def get_index_between_two_maps(
-    old_map: List[str],
-    new_map: List[str],
+    old_map: list[str],
+    new_map: list[str],
 ):
     """Returns the mapping index of types in new_map to those in the old_map.
 
     Parameters
     ----------
-    old_map : List[str]
+    old_map : list[str]
         The old list of atom type names.
-    new_map : List[str]
+    new_map : list[str]
         The new list of atom type names.
 
     Returns
     -------
-    index_map: List[int]
+    index_map: list[int]
         List contains `len(new_map)` indices, where `index_map[i]` is the index of `new_map[i]` in `old_map`.
         If `new_map[i]` is not in the `old_map`, the index will be `i - len(new_map)`.
     has_new_type: bool
@@ -112,21 +108,21 @@ def get_index_between_two_maps(
 
 
 def map_atom_exclude_types(
-    atom_exclude_types: List[int],
-    remap_index: List[int],
+    atom_exclude_types: list[int],
+    remap_index: list[int],
 ):
     """Return the remapped atom_exclude_types according to remap_index.
 
     Parameters
     ----------
-    atom_exclude_types : List[int]
+    atom_exclude_types : list[int]
         Exclude the atomic contribution of the given types.
-    remap_index : List[int]
+    remap_index : list[int]
         The indices in the old type list that correspond to the types in the new type list.
 
     Returns
     -------
-    remapped_atom_exclude_types: List[int]
+    remapped_atom_exclude_types: list[int]
         Remapped atom_exclude_types that only keeps the types in the new type list.
 
     """
@@ -137,22 +133,22 @@ def map_atom_exclude_types(
 
 
 def map_pair_exclude_types(
-    pair_exclude_types: List[Tuple[int, int]],
-    remap_index: List[int],
+    pair_exclude_types: list[tuple[int, int]],
+    remap_index: list[int],
 ):
     """Return the remapped atom_exclude_types according to remap_index.
 
     Parameters
     ----------
-    pair_exclude_types : List[Tuple[int, int]]
+    pair_exclude_types : list[tuple[int, int]]
         Exclude the pair of atoms of the given types from computing the output
         of the atomic model.
-    remap_index : List[int]
+    remap_index : list[int]
         The indices in the old type list that correspond to the types in the new type list.
 
     Returns
     -------
-    remapped_pair_exclude_typess: List[Tuple[int, int]]
+    remapped_pair_exclude_typess: list[tuple[int, int]]
         Remapped pair_exclude_types that only keeps the types in the new type list.
 
     """
diff --git a/deepmd/utils/hostlist.py b/deepmd/utils/hostlist.py
index c184b04031..4dac08af19 100644
--- a/deepmd/utils/hostlist.py
+++ b/deepmd/utils/hostlist.py
@@ -1,12 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import socket
-from typing import (
-    List,
-    Tuple,
-)
 
 
-def get_host_names() -> Tuple[str, List[str]]:
+def get_host_names() -> tuple[str, list[str]]:
     """Get host names of all nodes in the cluster.
 
     If mpi4py is not installed or MPI is not used, then the
@@ -16,7 +12,7 @@ def get_host_names() -> Tuple[str, List[str]]:
     -------
     str
         Host name of the current node
-    List[str]
+    list[str]
         List of host names of all nodes in the cluster
     """
     host_name = socket.gethostname()
diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py
index 54a4c16b24..40e629d9db 100644
--- a/deepmd/utils/neighbor_stat.py
+++ b/deepmd/utils/neighbor_stat.py
@@ -5,9 +5,8 @@
     ABC,
     abstractmethod,
 )
-from typing import (
+from collections.abc import (
     Iterator,
-    Tuple,
 )
 
 import numpy as np
@@ -46,7 +45,7 @@ def __init__(
         self.ntypes = ntypes
         self.mixed_type = mixed_type
 
-    def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, np.ndarray]:
+    def get_stat(self, data: DeepmdDataSystem) -> tuple[float, np.ndarray]:
         """Get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms.
 
         Parameters
@@ -89,7 +88,7 @@ def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, np.ndarray]:
     @abstractmethod
     def iterator(
         self, data: DeepmdDataSystem
-    ) -> Iterator[Tuple[np.ndarray, float, str]]:
+    ) -> Iterator[tuple[np.ndarray, float, str]]:
         """Abstract method for producing data.
 
         Yields
diff --git a/deepmd/utils/out_stat.py b/deepmd/utils/out_stat.py
index fd09e6815b..43af191e62 100644
--- a/deepmd/utils/out_stat.py
+++ b/deepmd/utils/out_stat.py
@@ -3,7 +3,6 @@
 
 from typing import (
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -14,7 +13,7 @@ def compute_stats_from_redu(
     natoms: np.ndarray,
     assigned_bias: Optional[np.ndarray] = None,
     rcond: Optional[float] = None,
-) -> Tuple[np.ndarray, np.ndarray]:
+) -> tuple[np.ndarray, np.ndarray]:
     """Compute the output statistics.
 
     Given the reduced output value and the number of atoms for each atom,
@@ -86,7 +85,7 @@ def compute_stats_from_redu(
 def compute_stats_from_atomic(
     output: np.ndarray,
     atype: np.ndarray,
-) -> Tuple[np.ndarray, np.ndarray]:
+) -> tuple[np.ndarray, np.ndarray]:
     """Compute the output statistics.
 
     Given the output value and the type of atoms,
diff --git a/deepmd/utils/pair_tab.py b/deepmd/utils/pair_tab.py
index 73980a2fd6..cddc358f27 100644
--- a/deepmd/utils/pair_tab.py
+++ b/deepmd/utils/pair_tab.py
@@ -4,7 +4,6 @@
 import logging
 from typing import (
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -199,7 +198,7 @@ def _check_table_upper_boundary(self) -> None:
 
                 self.vdata = np.concatenate((self.vdata, pad_extrapolation), axis=0)
 
-    def get(self) -> Tuple[np.array, np.array]:
+    def get(self) -> tuple[np.array, np.array]:
         """Get the serialized table."""
         return self.tab_info, self.tab_data
 
diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py
index e794a36cab..6c52caac1d 100644
--- a/deepmd/utils/path.py
+++ b/deepmd/utils/path.py
@@ -13,8 +13,6 @@
 )
 from typing import (
     ClassVar,
-    Dict,
-    List,
     Optional,
 )
 
@@ -77,7 +75,7 @@ def save_numpy(self, arr: np.ndarray) -> None:
         """
 
     @abstractmethod
-    def glob(self, pattern: str) -> List["DPPath"]:
+    def glob(self, pattern: str) -> list["DPPath"]:
         """Search path using the glob pattern.
 
         Parameters
@@ -87,12 +85,12 @@ def glob(self, pattern: str) -> List["DPPath"]:
 
         Returns
         -------
-        List[DPPath]
+        list[DPPath]
             list of paths
         """
 
     @abstractmethod
-    def rglob(self, pattern: str) -> List["DPPath"]:
+    def rglob(self, pattern: str) -> list["DPPath"]:
         """This is like calling :meth:`DPPath.glob()` with `**/` added in front
         of the given relative pattern.
 
@@ -103,7 +101,7 @@ def rglob(self, pattern: str) -> List["DPPath"]:
 
         Returns
         -------
-        List[DPPath]
+        list[DPPath]
             list of paths
         """
 
@@ -206,7 +204,7 @@ def save_numpy(self, arr: np.ndarray) -> None:
         with self.path.open("wb") as f:
             np.save(f, arr)
 
-    def glob(self, pattern: str) -> List["DPPath"]:
+    def glob(self, pattern: str) -> list["DPPath"]:
         """Search path using the glob pattern.
 
         Parameters
@@ -216,13 +214,13 @@ def glob(self, pattern: str) -> List["DPPath"]:
 
         Returns
         -------
-        List[DPPath]
+        list[DPPath]
             list of paths
         """
         # currently DPOSPath will only derivative DPOSPath
         return [type(self)(p, mode=self.mode) for p in self.path.glob(pattern)]
 
-    def rglob(self, pattern: str) -> List["DPPath"]:
+    def rglob(self, pattern: str) -> list["DPPath"]:
         """This is like calling :meth:`DPPath.glob()` with `**/` added in front
         of the given relative pattern.
 
@@ -233,7 +231,7 @@ def rglob(self, pattern: str) -> List["DPPath"]:
 
         Returns
         -------
-        List[DPPath]
+        list[DPPath]
             list of paths
         """
         return [type(self)(p, mode=self.mode) for p in self.path.rglob(pattern)]
@@ -360,7 +358,7 @@ def save_numpy(self, arr: np.ndarray) -> None:
         self.root.flush()
         self._new_keys.append(self._name)
 
-    def glob(self, pattern: str) -> List["DPPath"]:
+    def glob(self, pattern: str) -> list["DPPath"]:
         """Search path using the glob pattern.
 
         Parameters
@@ -370,7 +368,7 @@ def glob(self, pattern: str) -> List["DPPath"]:
 
         Returns
         -------
-        List[DPPath]
+        list[DPPath]
             list of paths
         """
         # got paths starts with current path first, which is faster
@@ -384,7 +382,7 @@ def glob(self, pattern: str) -> List["DPPath"]:
             for pp in globfilter(subpaths, self._connect_path(pattern))
         ]
 
-    def rglob(self, pattern: str) -> List["DPPath"]:
+    def rglob(self, pattern: str) -> list["DPPath"]:
         """This is like calling :meth:`DPPath.glob()` with `**/` added in front
         of the given relative pattern.
 
@@ -395,17 +393,17 @@ def rglob(self, pattern: str) -> List["DPPath"]:
 
         Returns
         -------
-        List[DPPath]
+        list[DPPath]
             list of paths
         """
         return self.glob("**" + pattern)
 
     @property
-    def _keys(self) -> List[str]:
+    def _keys(self) -> list[str]:
         """Walk all groups and dataset."""
         return self._file_keys(self.root)
 
-    __file_new_keys: ClassVar[Dict[h5py.File, List[str]]] = {}
+    __file_new_keys: ClassVar[dict[h5py.File, list[str]]] = {}
 
     @property
     def _new_keys(self):
@@ -415,7 +413,7 @@ def _new_keys(self):
 
     @classmethod
     @lru_cache(None)
-    def _file_keys(cls, file: h5py.File) -> List[str]:
+    def _file_keys(cls, file: h5py.File) -> list[str]:
         """Walk all groups and dataset."""
         l = []
         file.visit(lambda x: l.append("/" + x))
diff --git a/deepmd/utils/plugin.py b/deepmd/utils/plugin.py
index b5c89eb4d3..ce8b015ddf 100644
--- a/deepmd/utils/plugin.py
+++ b/deepmd/utils/plugin.py
@@ -8,9 +8,7 @@
 )
 from typing import (
     Callable,
-    Dict,
     Optional,
-    Type,
 )
 
 
@@ -19,7 +17,7 @@ class Plugin:
 
     Attributes
     ----------
-    plugins : Dict[str, object]
+    plugins : dict[str, object]
         plugins
 
     Examples
@@ -99,7 +97,7 @@ class PluginVariant(metaclass=VariantABCMeta):
     pass
 
 
-def make_plugin_registry(name: Optional[str] = None) -> Type[object]:
+def make_plugin_registry(name: Optional[str] = None) -> type[object]:
     """Make a plugin registry.
 
     Parameters
@@ -141,7 +139,7 @@ class SomeClass(BaseClass):
             return PR.__plugins.register(key)
 
         @classmethod
-        def get_class_by_type(cls, class_type: str) -> Type[object]:
+        def get_class_by_type(cls, class_type: str) -> type[object]:
             """Get the class by the plugin type."""
             if class_type in PR.__plugins.plugins:
                 return PR.__plugins.plugins[class_type]
@@ -154,7 +152,7 @@ def get_class_by_type(cls, class_type: str) -> Type[object]:
                 raise RuntimeError(f"Unknown {name} type: {class_type}. {dym_message}")
 
         @classmethod
-        def get_plugins(cls) -> Dict[str, Type[object]]:
+        def get_plugins(cls) -> dict[str, type[object]]:
             """Get all the registered plugins."""
             return PR.__plugins.plugins
 
diff --git a/deepmd/utils/random.py b/deepmd/utils/random.py
index 44ea6a1dac..440faca177 100644
--- a/deepmd/utils/random.py
+++ b/deepmd/utils/random.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Optional,
-    Tuple,
     Union,
 )
 
@@ -12,7 +11,7 @@
 
 def choice(
     a: Union[np.ndarray, int],
-    size: Optional[Union[int, Tuple[int, ...]]] = None,
+    size: Optional[Union[int, tuple[int, ...]]] = None,
     replace: bool = True,
     p: Optional[np.ndarray] = None,
 ):
diff --git a/deepmd/utils/spin.py b/deepmd/utils/spin.py
index 101867d3e4..41ea52df88 100644
--- a/deepmd/utils/spin.py
+++ b/deepmd/utils/spin.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import copy
 from typing import (
-    List,
-    Tuple,
     Union,
 )
 
@@ -20,10 +18,10 @@ class Spin:
 
     Parameters
     ----------
-    use_spin: List[bool]
+    use_spin: list[bool]
                 A list of boolean values indicating whether to use atomic spin for each atom type.
                 True for spin and False for not. List of bool values with shape of [ntypes].
-    virtual_scale: List[float], float
+    virtual_scale: list[float], float
                 The scaling factor to determine the virtual distance
                 between a virtual atom representing spin and its corresponding real atom
                 for each atom type with spin. This factor is defined as the virtual distance
@@ -35,8 +33,8 @@ class Spin:
 
     def __init__(
         self,
-        use_spin: List[bool],
-        virtual_scale: Union[List[float], float],
+        use_spin: list[bool],
+        virtual_scale: Union[list[float], float],
     ) -> None:
         self.ntypes_real = len(use_spin)
         self.ntypes_spin = use_spin.count(True)
@@ -93,7 +91,7 @@ def get_ntypes_input(self) -> int:
         """Returns the number of double real atom types for input placeholder."""
         return self.ntypes_input
 
-    def get_use_spin(self) -> List[bool]:
+    def get_use_spin(self) -> list[bool]:
         """Returns the list of whether to use spin for each atom type."""
         return self.use_spin
 
@@ -127,7 +125,7 @@ def init_atom_exclude_types_placeholder(self) -> None:
         """
         self.atom_exclude_types_p = self.placeholder_type.tolist()
 
-    def get_pair_exclude_types(self, exclude_types=None) -> List[Tuple[int, int]]:
+    def get_pair_exclude_types(self, exclude_types=None) -> list[tuple[int, int]]:
         """
         Return the pair-wise exclusion types for descriptor.
         The placeholder types for those without spin are excluded.
@@ -135,7 +133,7 @@ def get_pair_exclude_types(self, exclude_types=None) -> List[Tuple[int, int]]:
         if exclude_types is None:
             return self.pair_exclude_types
         else:
-            _exclude_types: List[Tuple[int, int]] = copy.deepcopy(
+            _exclude_types: list[tuple[int, int]] = copy.deepcopy(
                 self.pair_exclude_types
             )
             for tt in exclude_types:
@@ -143,7 +141,7 @@ def get_pair_exclude_types(self, exclude_types=None) -> List[Tuple[int, int]]:
                 _exclude_types.append((tt[0], tt[1]))
             return _exclude_types
 
-    def get_atom_exclude_types(self, exclude_types=None) -> List[int]:
+    def get_atom_exclude_types(self, exclude_types=None) -> list[int]:
         """
         Return the atom-wise exclusion types for fitting before out_def.
         Both the placeholder types and spin types are excluded.
@@ -151,12 +149,12 @@ def get_atom_exclude_types(self, exclude_types=None) -> List[int]:
         if exclude_types is None:
             return self.atom_exclude_types_ps
         else:
-            _exclude_types: List[int] = copy.deepcopy(self.atom_exclude_types_ps)
+            _exclude_types: list[int] = copy.deepcopy(self.atom_exclude_types_ps)
             _exclude_types += exclude_types
             _exclude_types = list(set(_exclude_types))
             return _exclude_types
 
-    def get_atom_exclude_types_placeholder(self, exclude_types=None) -> List[int]:
+    def get_atom_exclude_types_placeholder(self, exclude_types=None) -> list[int]:
         """
         Return the atom-wise exclusion types for fitting after out_def.
         The placeholder types for those without spin are excluded.
@@ -164,7 +162,7 @@ def get_atom_exclude_types_placeholder(self, exclude_types=None) -> List[int]:
         if exclude_types is None:
             return self.atom_exclude_types_p
         else:
-            _exclude_types: List[int] = copy.deepcopy(self.atom_exclude_types_p)
+            _exclude_types: list[int] = copy.deepcopy(self.atom_exclude_types_p)
             _exclude_types += exclude_types
             _exclude_types = list(set(_exclude_types))
             return _exclude_types
diff --git a/deepmd/utils/update_sel.py b/deepmd/utils/update_sel.py
index 6feed525e5..ba1457b19c 100644
--- a/deepmd/utils/update_sel.py
+++ b/deepmd/utils/update_sel.py
@@ -5,10 +5,7 @@
     abstractmethod,
 )
 from typing import (
-    List,
     Optional,
-    Tuple,
-    Type,
     Union,
 )
 
@@ -28,11 +25,11 @@ class BaseUpdateSel(ABC):
     def update_one_sel(
         self,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         rcut: float,
-        sel: Union[int, List[int], str],
+        sel: Union[int, list[int], str],
         mixed_type: bool = False,
-    ) -> Tuple[float, List[int]]:
+    ) -> tuple[float, list[int]]:
         min_nbor_dist, tmp_sel = self.get_nbor_stat(
             train_data,
             type_map,
@@ -86,17 +83,17 @@ def wrap_up_4(self, xx):
     def get_nbor_stat(
         self,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         rcut: float,
         mixed_type: bool = False,
-    ) -> Tuple[float, Union[int, List[int]]]:
+    ) -> tuple[float, Union[int, list[int]]]:
         """Get the neighbor statistics of the data.
 
         Parameters
         ----------
         train_data : DeepmdDataSystem
             The training data.
-        type_map : Optional[List[str]]
+        type_map : Optional[list[str]]
             The type map.
         rcut : float
             The cutoff radius.
@@ -107,7 +104,7 @@ def get_nbor_stat(
         -------
         min_nbor_dist : float
             The minimum neighbor distance.
-        max_nbor_size : List[int]
+        max_nbor_size : list[int]
             The maximum neighbor size.
         """
         if type_map and len(type_map) == 0:
@@ -128,7 +125,7 @@ def get_nbor_stat(
 
     @property
     @abstractmethod
-    def neighbor_stat(self) -> Type[NeighborStat]:
+    def neighbor_stat(self) -> type[NeighborStat]:
         pass
 
     def get_min_nbor_dist(
diff --git a/deepmd/utils/weight_avg.py b/deepmd/utils/weight_avg.py
index b344d3bb75..7c75d18e68 100644
--- a/deepmd/utils/weight_avg.py
+++ b/deepmd/utils/weight_avg.py
@@ -2,21 +2,16 @@
 from collections import (
     defaultdict,
 )
-from typing import (
-    Dict,
-    List,
-    Tuple,
-)
 
 import numpy as np
 
 
-def weighted_average(errors: List[Dict[str, Tuple[float, float]]]) -> Dict:
+def weighted_average(errors: list[dict[str, tuple[float, float]]]) -> dict:
     """Compute wighted average of prediction errors (MAE or RMSE) for model.
 
     Parameters
     ----------
-    errors : List[Dict[str, Tuple[float, float]]]
+    errors : list[dict[str, tuple[float, float]]]
         List: the error of systems
         Dict: the error of quantities, name given by the key
         str: the name of the quantity, must starts with 'mae' or 'rmse'
diff --git a/doc/development/coding-conventions.rst b/doc/development/coding-conventions.rst
index 137b0d0d51..bf186d1231 100644
--- a/doc/development/coding-conventions.rst
+++ b/doc/development/coding-conventions.rst
@@ -30,7 +30,7 @@ Rules
 -----
 
 The code must be compatible with the oldest supported version of python
-which is 3.8.
+which is 3.9.
 
 The project follows the generic coding conventions as
 specified in the `Style Guide for Python Code`_, `Docstring
diff --git a/doc/development/create-a-model-pt.md b/doc/development/create-a-model-pt.md
index 2ab0dee18f..257dd8a25d 100644
--- a/doc/development/create-a-model-pt.md
+++ b/doc/development/create-a-model-pt.md
@@ -73,7 +73,7 @@ class SomeDescript(BaseDescriptor, torch.nn.Module):
     def update_sel(
         cls,
         train_data: DeepmdDataSystem,
-        type_map: Optional[List[str]],
+        type_map: Optional[list[str]],
         local_jdata: dict,
     ):
         pass
@@ -149,7 +149,7 @@ from deepmd.utils.argcheck import descrpt_args_plugin
 
 
 @descrpt_args_plugin.register("some_descrpt")
-def descrpt_some_args() -> List[Argument]:
+def descrpt_some_args() -> list[Argument]:
     return [
         Argument("arg1", bool, optional=False, doc="balabala"),
         Argument("arg2", float, optional=True, default=6.0, doc="haha"),
diff --git a/doc/development/create-a-model-tf.md b/doc/development/create-a-model-tf.md
index 9ab3525bb5..95a2f66f23 100644
--- a/doc/development/create-a-model-tf.md
+++ b/doc/development/create-a-model-tf.md
@@ -37,7 +37,7 @@ from deepmd.utils.argcheck import descrpt_args_plugin
 
 
 @descrpt_args_plugin.register("some_descrpt")
-def descrpt_some_args() -> List[Argument]:
+def descrpt_some_args() -> list[Argument]:
     return [
         Argument("arg1", bool, optional=False, doc="balabala"),
         Argument("arg2", float, optional=True, default=6.0, doc="haha"),
diff --git a/doc/getting-started/quick_start.ipynb b/doc/getting-started/quick_start.ipynb
index d1c45ad0b8..0c9563b9e9 100644
--- a/doc/getting-started/quick_start.ipynb
+++ b/doc/getting-started/quick_start.ipynb
@@ -523,7 +523,7 @@
        "  color: #bbbbff;\n",
        "}\n",
        "</style>\n",
-       "<div class=\"dargs-codeblock\"><code class=\"dargs-code dargs-linebegin\"></code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"model\"</code><span class=\"dargs-doc\">model: <br/>    type: <span class=\"dargs-doc-code\">dict</span></span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type_map\"</code><span class=\"dargs-doc\">type_map: <br/>    type: <span class=\"dargs-doc-code\">typing.List[str]</span>, optional<hr/>A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"H\",<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"C\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"descriptor\"</code><span class=\"dargs-doc\">descriptor: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The descriptor of atomic environment.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span><hr/>The type of the descritpor. See explanation below. <br/>- <span class=\"dargs-doc-code\">loc_frame</span>: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.<br/>- <span class=\"dargs-doc-code\">se_e2_a</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.<br/>- <span class=\"dargs-doc-code\">se_e2_r</span>: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.<br/>- <span class=\"dargs-doc-code\">se_e3</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_a_tpe</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_atten</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_atten_v2</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_a_mask</span>: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). <i>aparam</i> are required as an indicator matrix for the real/virtual sign of input atoms. <br/>- <span class=\"dargs-doc-code\">hybrid</span>: Concatenate of a list of descriptors as a new descriptor.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"se_e2_a\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"sel\"</code><span class=\"dargs-doc\">sel: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.List[int]</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This parameter set the number of selected neighbors for each type of atom. It can be:<br/>    - <span class=\"dargs-doc-code\">List[int]</span>. The length of the list should be the same as the number of atom types in the system. <span class=\"dargs-doc-code\">sel[i]</span> gives the selected number of type-i neighbors. <span class=\"dargs-doc-code\">sel[i]</span> is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.<br/>    - <span class=\"dargs-doc-code\">str</span>. Can be \"auto:factor\" or \"auto\". \"factor\" is a float number larger than 1. This option will automatically determine the <span class=\"dargs-doc-code\">sel</span>. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the \"factor\". Finally the number is wraped up to 4 divisible. The option \"auto\" is equivalent to \"auto:1.1\".</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"rcut_smth\"</code><span class=\"dargs-doc\">rcut_smth: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">0.5</span><hr/>Where to start smoothing. For example the 1/r term is smoothed from <span class=\"dargs-doc-code\">rcut</span> to <span class=\"dargs-doc-code\">rcut_smth</span></span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.5,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"rcut\"</code><span class=\"dargs-doc\">rcut: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">6.0</span><hr/>The cut-off radius.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">6.0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"neuron\"</code><span class=\"dargs-doc\">neuron: <br/>    type: <span class=\"dargs-doc-code\">typing.List[int]</span>, optional, default: <span class=\"dargs-doc-code\">[10, 20, 40]</span><hr/>Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  25,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  50,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  100<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"resnet_dt\"</code><span class=\"dargs-doc\">resnet_dt: <br/>    type: <span class=\"dargs-doc-code\">bool</span>, optional, default: <span class=\"dargs-doc-code\">False</span><hr/>Whether to use a \"Timestep\" in the skip connection</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">false,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"axis_neuron\"</code><span class=\"dargs-doc\">axis_neuron: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">4</span>, alias: <i>n_axis_neuron</i><hr/>Size of the submatrix of G (embedding matrix).</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">16,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>Random seed for parameter initialization</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"fitting_net\"</code><span class=\"dargs-doc\">fitting_net: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The fitting of physical properties.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"neuron\"</code><span class=\"dargs-doc\">neuron: <br/>    type: <span class=\"dargs-doc-code\">typing.List[int]</span>, optional, default: <span class=\"dargs-doc-code\">[120, 120, 120]</span>, alias: <i>n_neuron</i><hr/>The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"resnet_dt\"</code><span class=\"dargs-doc\">resnet_dt: <br/>    type: <span class=\"dargs-doc-code\">bool</span>, optional, default: <span class=\"dargs-doc-code\">True</span><hr/>Whether to use a \"Timestep\" in the skip connection</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">true,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>Random seed for parameter initialization of the fitting net</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"learning_rate\"</code><span class=\"dargs-doc\">learning_rate: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>The definitio of learning rate</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span>, default: <span class=\"dargs-doc-code\">exp</span><hr/>The type of the learning rate.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"exp\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"decay_steps\"</code><span class=\"dargs-doc\">decay_steps: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">5000</span><hr/>The learning rate is decaying every this number of training steps.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">50,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_lr\"</code><span class=\"dargs-doc\">start_lr: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">0.001</span><hr/>The learning rate at the start of the training.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.001,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"stop_lr\"</code><span class=\"dargs-doc\">stop_lr: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">1e-08</span><hr/>The desired learning rate at the end of the training.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">3.51e-08,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"loss\"</code><span class=\"dargs-doc\">loss: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>The definition of loss function. The loss type should be set to <span class=\"dargs-doc-code\">tensor</span>, <span class=\"dargs-doc-code\">ener</span> or left unset.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span>, default: <span class=\"dargs-doc-code\">ener</span><hr/>The type of the loss. When the fitting type is <span class=\"dargs-doc-code\">ener</span>, the loss type should be set to <span class=\"dargs-doc-code\">ener</span> or left unset. When the fitting type is <span class=\"dargs-doc-code\">dipole</span> or <span class=\"dargs-doc-code\">polar</span>, the loss type should be set to <span class=\"dargs-doc-code\">tensor</span>.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"ener\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_e\"</code><span class=\"dargs-doc\">start_pref_e: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.02</span><hr/>The prefactor of energy loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the energy label should be provided by file energy.npy in each data system. If both start_pref_e and limit_pref_e are set to 0, then the energy will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.02,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_e\"</code><span class=\"dargs-doc\">limit_pref_e: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1.0</span><hr/>The prefactor of energy loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_f\"</code><span class=\"dargs-doc\">start_pref_f: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The prefactor of force loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the force label should be provided by file force.npy in each data system. If both start_pref_f and limit_pref_f are set to 0, then the force will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_f\"</code><span class=\"dargs-doc\">limit_pref_f: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1.0</span><hr/>The prefactor of force loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_v\"</code><span class=\"dargs-doc\">start_pref_v: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.0</span><hr/>The prefactor of virial loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the virial label should be provided by file virial.npy in each data system. If both start_pref_v and limit_pref_v are set to 0, then the virial will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_v\"</code><span class=\"dargs-doc\">limit_pref_v: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.0</span><hr/>The prefactor of virial loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"training\"</code><span class=\"dargs-doc\">training: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The training options.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"training_data\"</code><span class=\"dargs-doc\">training_data: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>Configurations of training data.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"systems\"</code><span class=\"dargs-doc\">systems: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.List[str]</span><hr/>The data systems for training. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"../00.data/training_data\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"batch_size\"</code><span class=\"dargs-doc\">batch_size: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.List[int]</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This key can be <br/>- list: the length of which is the same as the <span class=\"dargs-doc-code\">systems <training/training_data/systems_></span>_. The batch size of each system is given by the elements of the list.<br/>- int: all <span class=\"dargs-doc-code\">systems <training/training_data/systems_></span>_ use the same batch size.<br/>- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.<br/>- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.<br/>- string \"mixed:N\": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.<br/>If MPI is used, the value should be considered as the batch size per task.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"validation_data\"</code><span class=\"dargs-doc\">validation_data: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">dict</span>, optional, default: <span class=\"dargs-doc-code\">None</span><hr/>Configurations of validation data. Similar to that of training data, except that a <span class=\"dargs-doc-code\">numb_btch</span> argument may be configured</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"systems\"</code><span class=\"dargs-doc\">systems: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.List[str]</span><hr/>The data systems for validation. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"../00.data/validation_data\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"batch_size\"</code><span class=\"dargs-doc\">batch_size: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.List[int]</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This key can be <br/>- list: the length of which is the same as the <span class=\"dargs-doc-code\">systems <training/validation_data/systems_></span>_. The batch size of each system is given by the elements of the list.<br/>- int: all <span class=\"dargs-doc-code\">systems <training/validation_data/systems_></span>_ use the same batch size.<br/>- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.<br/>- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"numb_btch\"</code><span class=\"dargs-doc\">numb_btch: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1</span>, alias: <i>numb_batch</i><hr/>An integer that specifies the number of batches to be sampled for each validation period.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"numb_steps\"</code><span class=\"dargs-doc\">numb_steps: <br/>    type: <span class=\"dargs-doc-code\">int</span>, alias: <i>stop_batch</i><hr/>Number of training batch. Each training uses one batch of data.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">10000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>The random seed for getting frames from the training data set.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">10,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"disp_file\"</code><span class=\"dargs-doc\">disp_file: <br/>    type: <span class=\"dargs-doc-code\">str</span>, optional, default: <span class=\"dargs-doc-code\">lcurve.out</span><hr/>The file for printing learning curve.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"lcurve.out\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"disp_freq\"</code><span class=\"dargs-doc\">disp_freq: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The frequency of printing learning curve.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">200,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"save_freq\"</code><span class=\"dargs-doc\">save_freq: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The frequency of saving check point.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">}</code><br/><code class=\"dargs-code dargs-linebegin\"></code><code class=\"dargs-code\">}</code><br/></div>"
+       "<div class=\"dargs-codeblock\"><code class=\"dargs-code dargs-linebegin\"></code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"model\"</code><span class=\"dargs-doc\">model: <br/>    type: <span class=\"dargs-doc-code\">dict</span></span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type_map\"</code><span class=\"dargs-doc\">type_map: <br/>    type: <span class=\"dargs-doc-code\">typing.list[str]</span>, optional<hr/>A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"H\",<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"C\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"descriptor\"</code><span class=\"dargs-doc\">descriptor: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The descriptor of atomic environment.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span><hr/>The type of the descritpor. See explanation below. <br/>- <span class=\"dargs-doc-code\">loc_frame</span>: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.<br/>- <span class=\"dargs-doc-code\">se_e2_a</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.<br/>- <span class=\"dargs-doc-code\">se_e2_r</span>: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.<br/>- <span class=\"dargs-doc-code\">se_e3</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_a_tpe</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_atten</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_atten_v2</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_a_mask</span>: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). <i>aparam</i> are required as an indicator matrix for the real/virtual sign of input atoms. <br/>- <span class=\"dargs-doc-code\">hybrid</span>: Concatenate of a list of descriptors as a new descriptor.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"se_e2_a\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"sel\"</code><span class=\"dargs-doc\">sel: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[int]</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This parameter set the number of selected neighbors for each type of atom. It can be:<br/>    - <span class=\"dargs-doc-code\">list[int]</span>. The length of the list should be the same as the number of atom types in the system. <span class=\"dargs-doc-code\">sel[i]</span> gives the selected number of type-i neighbors. <span class=\"dargs-doc-code\">sel[i]</span> is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.<br/>    - <span class=\"dargs-doc-code\">str</span>. Can be \"auto:factor\" or \"auto\". \"factor\" is a float number larger than 1. This option will automatically determine the <span class=\"dargs-doc-code\">sel</span>. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the \"factor\". Finally the number is wraped up to 4 divisible. The option \"auto\" is equivalent to \"auto:1.1\".</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"rcut_smth\"</code><span class=\"dargs-doc\">rcut_smth: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">0.5</span><hr/>Where to start smoothing. For example the 1/r term is smoothed from <span class=\"dargs-doc-code\">rcut</span> to <span class=\"dargs-doc-code\">rcut_smth</span></span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.5,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"rcut\"</code><span class=\"dargs-doc\">rcut: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">6.0</span><hr/>The cut-off radius.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">6.0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"neuron\"</code><span class=\"dargs-doc\">neuron: <br/>    type: <span class=\"dargs-doc-code\">typing.list[int]</span>, optional, default: <span class=\"dargs-doc-code\">[10, 20, 40]</span><hr/>Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  25,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  50,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  100<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"resnet_dt\"</code><span class=\"dargs-doc\">resnet_dt: <br/>    type: <span class=\"dargs-doc-code\">bool</span>, optional, default: <span class=\"dargs-doc-code\">False</span><hr/>Whether to use a \"Timestep\" in the skip connection</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">false,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"axis_neuron\"</code><span class=\"dargs-doc\">axis_neuron: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">4</span>, alias: <i>n_axis_neuron</i><hr/>Size of the submatrix of G (embedding matrix).</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">16,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>Random seed for parameter initialization</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"fitting_net\"</code><span class=\"dargs-doc\">fitting_net: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The fitting of physical properties.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"neuron\"</code><span class=\"dargs-doc\">neuron: <br/>    type: <span class=\"dargs-doc-code\">typing.list[int]</span>, optional, default: <span class=\"dargs-doc-code\">[120, 120, 120]</span>, alias: <i>n_neuron</i><hr/>The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"resnet_dt\"</code><span class=\"dargs-doc\">resnet_dt: <br/>    type: <span class=\"dargs-doc-code\">bool</span>, optional, default: <span class=\"dargs-doc-code\">True</span><hr/>Whether to use a \"Timestep\" in the skip connection</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">true,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>Random seed for parameter initialization of the fitting net</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"learning_rate\"</code><span class=\"dargs-doc\">learning_rate: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>The definitio of learning rate</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span>, default: <span class=\"dargs-doc-code\">exp</span><hr/>The type of the learning rate.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"exp\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"decay_steps\"</code><span class=\"dargs-doc\">decay_steps: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">5000</span><hr/>The learning rate is decaying every this number of training steps.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">50,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_lr\"</code><span class=\"dargs-doc\">start_lr: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">0.001</span><hr/>The learning rate at the start of the training.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.001,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"stop_lr\"</code><span class=\"dargs-doc\">stop_lr: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">1e-08</span><hr/>The desired learning rate at the end of the training.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">3.51e-08,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"loss\"</code><span class=\"dargs-doc\">loss: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>The definition of loss function. The loss type should be set to <span class=\"dargs-doc-code\">tensor</span>, <span class=\"dargs-doc-code\">ener</span> or left unset.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span>, default: <span class=\"dargs-doc-code\">ener</span><hr/>The type of the loss. When the fitting type is <span class=\"dargs-doc-code\">ener</span>, the loss type should be set to <span class=\"dargs-doc-code\">ener</span> or left unset. When the fitting type is <span class=\"dargs-doc-code\">dipole</span> or <span class=\"dargs-doc-code\">polar</span>, the loss type should be set to <span class=\"dargs-doc-code\">tensor</span>.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"ener\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_e\"</code><span class=\"dargs-doc\">start_pref_e: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.02</span><hr/>The prefactor of energy loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the energy label should be provided by file energy.npy in each data system. If both start_pref_e and limit_pref_e are set to 0, then the energy will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.02,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_e\"</code><span class=\"dargs-doc\">limit_pref_e: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1.0</span><hr/>The prefactor of energy loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_f\"</code><span class=\"dargs-doc\">start_pref_f: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The prefactor of force loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the force label should be provided by file force.npy in each data system. If both start_pref_f and limit_pref_f are set to 0, then the force will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_f\"</code><span class=\"dargs-doc\">limit_pref_f: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1.0</span><hr/>The prefactor of force loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_v\"</code><span class=\"dargs-doc\">start_pref_v: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.0</span><hr/>The prefactor of virial loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the virial label should be provided by file virial.npy in each data system. If both start_pref_v and limit_pref_v are set to 0, then the virial will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_v\"</code><span class=\"dargs-doc\">limit_pref_v: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.0</span><hr/>The prefactor of virial loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"training\"</code><span class=\"dargs-doc\">training: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The training options.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"training_data\"</code><span class=\"dargs-doc\">training_data: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>Configurations of training data.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"systems\"</code><span class=\"dargs-doc\">systems: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[str]</span><hr/>The data systems for training. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"../00.data/training_data\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"batch_size\"</code><span class=\"dargs-doc\">batch_size: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[int]</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This key can be <br/>- list: the length of which is the same as the <span class=\"dargs-doc-code\">systems <training/training_data/systems_></span>_. The batch size of each system is given by the elements of the list.<br/>- int: all <span class=\"dargs-doc-code\">systems <training/training_data/systems_></span>_ use the same batch size.<br/>- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.<br/>- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.<br/>- string \"mixed:N\": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.<br/>If MPI is used, the value should be considered as the batch size per task.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"validation_data\"</code><span class=\"dargs-doc\">validation_data: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">dict</span>, optional, default: <span class=\"dargs-doc-code\">None</span><hr/>Configurations of validation data. Similar to that of training data, except that a <span class=\"dargs-doc-code\">numb_btch</span> argument may be configured</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"systems\"</code><span class=\"dargs-doc\">systems: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[str]</span><hr/>The data systems for validation. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"../00.data/validation_data\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"batch_size\"</code><span class=\"dargs-doc\">batch_size: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[int]</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This key can be <br/>- list: the length of which is the same as the <span class=\"dargs-doc-code\">systems <training/validation_data/systems_></span>_. The batch size of each system is given by the elements of the list.<br/>- int: all <span class=\"dargs-doc-code\">systems <training/validation_data/systems_></span>_ use the same batch size.<br/>- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.<br/>- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"numb_btch\"</code><span class=\"dargs-doc\">numb_btch: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1</span>, alias: <i>numb_batch</i><hr/>An integer that specifies the number of batches to be sampled for each validation period.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"numb_steps\"</code><span class=\"dargs-doc\">numb_steps: <br/>    type: <span class=\"dargs-doc-code\">int</span>, alias: <i>stop_batch</i><hr/>Number of training batch. Each training uses one batch of data.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">10000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>The random seed for getting frames from the training data set.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">10,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"disp_file\"</code><span class=\"dargs-doc\">disp_file: <br/>    type: <span class=\"dargs-doc-code\">str</span>, optional, default: <span class=\"dargs-doc-code\">lcurve.out</span><hr/>The file for printing learning curve.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"lcurve.out\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"disp_freq\"</code><span class=\"dargs-doc\">disp_freq: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The frequency of printing learning curve.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">200,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"save_freq\"</code><span class=\"dargs-doc\">save_freq: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The frequency of saving check point.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">}</code><br/><code class=\"dargs-code dargs-linebegin\"></code><code class=\"dargs-code\">}</code><br/></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
index a0c6270287..99962d08b8 100644
--- a/doc/install/easy-install.md
+++ b/doc/install/easy-install.md
@@ -10,7 +10,7 @@ You can refer to [DeepModeling conda FAQ](https://docs.deepmodeling.com/faq/cond
 :::
 
 :::{note}
-Python 3.8 or above is required for Python interface.
+Python 3.9 or above is required for Python interface.
 :::
 
 - [Install off-line packages](#install-off-line-packages)
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
index a725be0133..4079a8d424 100644
--- a/doc/install/install-from-source.md
+++ b/doc/install/install-from-source.md
@@ -21,7 +21,7 @@ deepmd_source_dir=`pwd`
 ### Install Backend's Python interface
 
 First, check the Python version on your machine.
-Python 3.8 or above is required.
+Python 3.9 or above is required.
 
 ```bash
 python --version
@@ -95,7 +95,7 @@ deactivate
 If one has multiple python interpreters named something like python3.x, it can be specified by, for example
 
 ```bash
-virtualenv -p python3.8 $deepmd_venv
+virtualenv -p python3.9 $deepmd_venv
 ```
 
 One should remember to activate the virtual environment every time he/she uses DeePMD-kit.
diff --git a/pyproject.toml b/pyproject.toml
index 1b825ef441..6932960ace 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,7 +28,10 @@ classifiers = [
     "Programming Language :: Python :: 3 :: Only",
     "Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.2",
     "Intended Audience :: Science/Research",
-    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Scientific/Engineering :: Physics",
@@ -51,7 +54,7 @@ dependencies = [
     'mendeleev',
     'array-api-compat',
 ]
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 keywords = ["deepmd"]
 
 [project.entry-points."lammps.plugins"]
diff --git a/source/install/build_tf.py b/source/install/build_tf.py
index a65e922098..a9e1e247cd 100755
--- a/source/install/build_tf.py
+++ b/source/install/build_tf.py
@@ -56,8 +56,6 @@
     ignore_patterns,
 )
 from typing import (
-    Dict,
-    List,
     Optional,
 )
 
@@ -225,11 +223,11 @@ class Build(metaclass=ABCMeta):
     """Build process."""
 
     @abstractproperty
-    def resources(self) -> Dict[str, OnlineResource]:
+    def resources(self) -> dict[str, OnlineResource]:
         """Required resources."""
 
     @abstractproperty
-    def dependencies(self) -> Dict[str, "Build"]:
+    def dependencies(self) -> dict[str, "Build"]:
         """Required dependencies."""
 
     def download_all_resources(self):
@@ -364,7 +362,7 @@ def _ignore_patterns(path, names):
     return _ignore_patterns
 
 
-def call(commands: List[str], env={}, **kwargs):
+def call(commands: list[str], env={}, **kwargs):
     """Call commands and print to screen for debug.
 
     Raises
@@ -423,14 +421,14 @@ def __init__(self, version="1.11.0") -> None:
 
     @property
     @lru_cache
-    def resources(self) -> Dict[str, OnlineResource]:
+    def resources(self) -> dict[str, OnlineResource]:
         return {
             "bazelisk": RESOURCES["bazelisk-" + self.version],
         }
 
     @property
     @lru_cache
-    def dependencies(self) -> Dict[str, Build]:
+    def dependencies(self) -> dict[str, Build]:
         return {}
 
     def build(self):
@@ -449,12 +447,12 @@ class BuildNumPy(Build):
 
     @property
     @lru_cache
-    def resources(self) -> Dict[str, OnlineResource]:
+    def resources(self) -> dict[str, OnlineResource]:
         return {}
 
     @property
     @lru_cache
-    def dependencies(self) -> Dict[str, Build]:
+    def dependencies(self) -> dict[str, Build]:
         return {}
 
     @property
@@ -481,12 +479,12 @@ class BuildCUDA(Build):
 
     @property
     @lru_cache
-    def resources(self) -> Dict[str, OnlineResource]:
+    def resources(self) -> dict[str, OnlineResource]:
         return {}
 
     @property
     @lru_cache
-    def dependencies(self) -> Dict[str, Build]:
+    def dependencies(self) -> dict[str, Build]:
         return {}
 
     def build(self):
@@ -554,12 +552,12 @@ class BuildROCM(Build):
 
     @property
     @lru_cache
-    def resources(self) -> Dict[str, OnlineResource]:
+    def resources(self) -> dict[str, OnlineResource]:
         return {}
 
     @property
     @lru_cache
-    def dependencies(self) -> Dict[str, Build]:
+    def dependencies(self) -> dict[str, Build]:
         return {}
 
     def build(self):
@@ -599,14 +597,14 @@ def __init__(
 
     @property
     @lru_cache
-    def resources(self) -> Dict[str, OnlineResource]:
+    def resources(self) -> dict[str, OnlineResource]:
         return {
             "tensorflow": RESOURCES["tensorflow-" + self.version],
         }
 
     @property
     @lru_cache
-    def dependencies(self) -> Dict[str, Build]:
+    def dependencies(self) -> dict[str, Build]:
         optional_dep = {}
         if self.enable_cuda:
             optional_dep["cuda"] = BuildCUDA()
@@ -778,12 +776,12 @@ def _environments(self) -> dict:
         }
 
     @property
-    def _build_targets(self) -> List[str]:
+    def _build_targets(self) -> list[str]:
         # C++ interface
         return ["//tensorflow:libtensorflow_cc" + get_shlib_ext()]
 
     @property
-    def _build_opts(self) -> List[str]:
+    def _build_opts(self) -> list[str]:
         opts = [
             "--logging=6",
             "--verbose_failures",
@@ -798,7 +796,7 @@ def _build_opts(self) -> List[str]:
         return opts
 
     @property
-    def _bazel_opts(self) -> List[str]:
+    def _bazel_opts(self) -> list[str]:
         return []
 
     @property
@@ -826,7 +824,7 @@ def clean_package():
 # interface
 
 
-def env() -> Dict[str, str]:
+def env() -> dict[str, str]:
     return {
         "Python": sys.executable,
         "CUDA": CUDA_PATH,
@@ -855,12 +853,12 @@ class RawTextArgumentDefaultsHelpFormatter(
     pass
 
 
-def parse_args(args: Optional[List[str]] = None):
+def parse_args(args: Optional[list[str]] = None):
     """TensorFlow C++ Library Installer commandline options argument parser.
 
     Parameters
     ----------
-    args : List[str]
+    args : list[str]
         list of command line arguments, main purpose is testing default option None
         takes arguments from sys.argv
     """
diff --git a/source/tests/common/dpmodel/array_api/test_env_mat.py b/source/tests/common/dpmodel/array_api/test_env_mat.py
index d5bc7b6c18..8dfa199d53 100644
--- a/source/tests/common/dpmodel/array_api/test_env_mat.py
+++ b/source/tests/common/dpmodel/array_api/test_env_mat.py
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import sys
 import unittest
 
-if sys.version_info >= (3, 9):
-    import array_api_strict as xp
-else:
-    raise unittest.SkipTest("array_api_strict doesn't support Python<=3.8")
+import array_api_strict as xp
 
 from deepmd.dpmodel.utils.env_mat import (
     compute_smooth_weight,
diff --git a/source/tests/common/dpmodel/test_output_def.py b/source/tests/common/dpmodel/test_output_def.py
index 9e8ef2940f..03ceb67d01 100644
--- a/source/tests/common/dpmodel/test_output_def.py
+++ b/source/tests/common/dpmodel/test_output_def.py
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import unittest
-from typing import (
-    List,
-)
 
 import numpy as np
 
@@ -26,7 +23,7 @@ class VariableDef:
     def __init__(
         self,
         name: str,
-        shape: List[int],
+        shape: list[int],
         atomic: bool = True,
     ):
         self.name = name
diff --git a/source/tests/common/test_argument_parser.py b/source/tests/common/test_argument_parser.py
index 36a2f07be5..1404185607 100644
--- a/source/tests/common/test_argument_parser.py
+++ b/source/tests/common/test_argument_parser.py
@@ -15,9 +15,6 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    Dict,
-    List,
-    Tuple,
     Union,
 )
 
@@ -33,13 +30,13 @@
         from typing_extensions import TypedDict  # python<=3.7
 
     class DATA(TypedDict):
-        type: Union[type, Tuple[type]]
+        type: Union[type, tuple[type]]
         value: Any
 
-    TEST_DICT = Dict[str, DATA]
+    TEST_DICT = dict[str, DATA]
 
 
-def build_args(args: "TEST_DICT", command: str) -> List[str]:
+def build_args(args: "TEST_DICT", command: str) -> list[str]:
     """Build list of arguments similar to one generated by `sys.argv` used by argparse.
 
     Parameters
@@ -51,7 +48,7 @@ def build_args(args: "TEST_DICT", command: str) -> List[str]:
 
     Returns
     -------
-    List[str]
+    list[str]
         arguments with options as list of strings, goal is to emulate `sys.argv`
     """
     args_list = [command]
diff --git a/source/tests/common/test_auto_batch_size.py b/source/tests/common/test_auto_batch_size.py
index 0369bbb70c..cc1e6bf25a 100644
--- a/source/tests/common/test_auto_batch_size.py
+++ b/source/tests/common/test_auto_batch_size.py
@@ -1,8 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import os
-import sys
 import unittest
 
+import array_api_strict as xp
+
 from deepmd.utils.batch_size import (
     AutoBatchSize,
 )
@@ -10,11 +11,6 @@
     OutOfMemoryError,
 )
 
-if sys.version_info >= (3, 9):
-    import array_api_strict as xp
-else:
-    raise unittest.SkipTest("array_api_strict doesn't support Python<=3.8")
-
 
 class CustomizedAutoBatchSizeCPU(AutoBatchSize):
     def is_gpu_available(self):
diff --git a/source/tests/consistent/common.py b/source/tests/consistent/common.py
index e8873e528a..c64b14c273 100644
--- a/source/tests/consistent/common.py
+++ b/source/tests/consistent/common.py
@@ -14,10 +14,7 @@
     Any,
     Callable,
     ClassVar,
-    Dict,
-    List,
     Optional,
-    Tuple,
     Union,
 )
 from uuid import (
@@ -75,7 +72,7 @@ class CommonTest(ABC):
     """PyTorch model class."""
     jax_class: ClassVar[Optional[type]]
     """JAX model class."""
-    args: ClassVar[Optional[Union[Argument, List[Argument]]]]
+    args: ClassVar[Optional[Union[Argument, list[Argument]]]]
     """Arguments that maps to the `data`."""
     skip_dp: ClassVar[bool] = False
     """Whether to skip the native DP model."""
@@ -118,7 +115,7 @@ def pass_data_to_cls(self, cls, data) -> Any:
         return cls(**data, **self.addtional_data)
 
     @abstractmethod
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         """Build the TF graph.
 
         Parameters
@@ -175,7 +172,7 @@ class RefBackend(Enum):
         JAX = 5
 
     @abstractmethod
-    def extract_ret(self, ret: Any, backend: RefBackend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend: RefBackend) -> tuple[np.ndarray, ...]:
         """Extract the return value when comparing with other backends.
 
         Parameters
@@ -193,7 +190,7 @@ def extract_ret(self, ret: Any, backend: RefBackend) -> Tuple[np.ndarray, ...]:
 
     def build_eval_tf(
         self, sess: "tf.Session", obj: Any, suffix: str
-    ) -> List[np.ndarray]:
+    ) -> list[np.ndarray]:
         """Build and evaluate the TF graph."""
         t_out, feed_dict = self.build_tf(obj, suffix)
 
@@ -489,7 +486,7 @@ class TestClass(base_class):
 
 def parameterize_func(
     func: Callable,
-    param_dict_list: Dict[str, Tuple],
+    param_dict_list: dict[str, tuple],
 ):
     """Parameterize functions with different default values.
 
@@ -497,7 +494,7 @@ def parameterize_func(
     ----------
     func : Callable
         The base function.
-    param_dict_list : Dict[str, Tuple]
+    param_dict_list : dict[str, Tuple]
         Dictionary of parameters with default values to be changed in base function, each of which is a tuple of choices.
 
     Returns
diff --git a/source/tests/consistent/descriptor/test_dpa1.py b/source/tests/consistent/descriptor/test_dpa1.py
index 0f44ecaae1..59d7369753 100644
--- a/source/tests/consistent/descriptor/test_dpa1.py
+++ b/source/tests/consistent/descriptor/test_dpa1.py
@@ -3,7 +3,6 @@
 from typing import (
     Any,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -284,7 +283,7 @@ def setUp(self):
             use_tebd_bias,
         ) = self.param
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_descriptor(
             obj,
             self.natoms,
@@ -314,7 +313,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             mixed_types=True,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 
     @property
diff --git a/source/tests/consistent/descriptor/test_dpa2.py b/source/tests/consistent/descriptor/test_dpa2.py
index 144567ae58..53f9ce4200 100644
--- a/source/tests/consistent/descriptor/test_dpa2.py
+++ b/source/tests/consistent/descriptor/test_dpa2.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -338,7 +337,7 @@ def setUp(self):
             use_tebd_bias,
         ) = self.param
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_descriptor(
             obj,
             self.natoms,
@@ -368,7 +367,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             mixed_types=True,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 
     @property
diff --git a/source/tests/consistent/descriptor/test_hybrid.py b/source/tests/consistent/descriptor/test_hybrid.py
index 7cfb627d54..cd52eea5be 100644
--- a/source/tests/consistent/descriptor/test_hybrid.py
+++ b/source/tests/consistent/descriptor/test_hybrid.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -105,7 +104,7 @@ def setUp(self):
         )
         self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_descriptor(
             obj,
             self.natoms,
@@ -133,5 +132,5 @@ def eval_pt(self, pt_obj: Any) -> Any:
             self.box,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
diff --git a/source/tests/consistent/descriptor/test_se_atten_v2.py b/source/tests/consistent/descriptor/test_se_atten_v2.py
index 989fdc16e7..a3fe4e98b4 100644
--- a/source/tests/consistent/descriptor/test_se_atten_v2.py
+++ b/source/tests/consistent/descriptor/test_se_atten_v2.py
@@ -3,7 +3,6 @@
 from typing import (
     Any,
     Optional,
-    Tuple,
 )
 
 import numpy as np
@@ -215,7 +214,7 @@ def setUp(self):
         )
         self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_descriptor(
             obj,
             self.natoms,
@@ -245,7 +244,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             mixed_types=True,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 
     @property
diff --git a/source/tests/consistent/descriptor/test_se_e2_a.py b/source/tests/consistent/descriptor/test_se_e2_a.py
index 1e3e5ae86d..2563ee1d6d 100644
--- a/source/tests/consistent/descriptor/test_se_e2_a.py
+++ b/source/tests/consistent/descriptor/test_se_e2_a.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -150,7 +149,7 @@ def setUp(self):
             self.atype = self.atype[idx]
             self.coords = self.coords.reshape(-1, 3)[idx].ravel()
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_descriptor(
             obj,
             self.natoms,
@@ -178,7 +177,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             self.box,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 
     @property
diff --git a/source/tests/consistent/descriptor/test_se_r.py b/source/tests/consistent/descriptor/test_se_r.py
index 8b835f3b5c..7103f60aa7 100644
--- a/source/tests/consistent/descriptor/test_se_r.py
+++ b/source/tests/consistent/descriptor/test_se_r.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -121,7 +120,7 @@ def setUp(self):
         )
         self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_descriptor(
             obj,
             self.natoms,
@@ -149,7 +148,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             self.box,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 
     @property
diff --git a/source/tests/consistent/descriptor/test_se_t.py b/source/tests/consistent/descriptor/test_se_t.py
index 7579344012..833b76f6e1 100644
--- a/source/tests/consistent/descriptor/test_se_t.py
+++ b/source/tests/consistent/descriptor/test_se_t.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -141,7 +140,7 @@ def setUp(self):
         self.atype = self.atype[idx]
         self.coords = self.coords.reshape(-1, 3)[idx].ravel()
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_descriptor(
             obj,
             self.natoms,
@@ -169,7 +168,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             self.box,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 
     @property
diff --git a/source/tests/consistent/descriptor/test_se_t_tebd.py b/source/tests/consistent/descriptor/test_se_t_tebd.py
index d9bd00aad3..3299a04c78 100644
--- a/source/tests/consistent/descriptor/test_se_t_tebd.py
+++ b/source/tests/consistent/descriptor/test_se_t_tebd.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -187,7 +186,7 @@ def setUp(self):
             use_tebd_bias,
         ) = self.param
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_descriptor(
             obj,
             self.natoms,
@@ -217,7 +216,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             mixed_types=True,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 
     @property
diff --git a/source/tests/consistent/fitting/test_dipole.py b/source/tests/consistent/fitting/test_dipole.py
index 4f33d58c10..5d7be1b0e5 100644
--- a/source/tests/consistent/fitting/test_dipole.py
+++ b/source/tests/consistent/fitting/test_dipole.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -97,7 +96,7 @@ def addtional_data(self) -> dict:
             "embedding_width": 30,
         }
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         (
             resnet_dt,
             precision,
@@ -144,7 +143,7 @@ def eval_dp(self, dp_obj: Any) -> Any:
             None,
         )["dipole"]
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         if backend == self.RefBackend.TF:
             # shape is not same
             ret = ret[0].reshape(-1, self.natoms[0], 1)
diff --git a/source/tests/consistent/fitting/test_dos.py b/source/tests/consistent/fitting/test_dos.py
index bfdf76c8ff..ada65c8ac5 100644
--- a/source/tests/consistent/fitting/test_dos.py
+++ b/source/tests/consistent/fitting/test_dos.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -106,7 +105,7 @@ def addtional_data(self) -> dict:
             "mixed_types": mixed_types,
         }
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         (
             resnet_dt,
             precision,
@@ -158,7 +157,7 @@ def eval_dp(self, dp_obj: Any) -> Any:
             fparam=self.fparam if numb_fparam else None,
         )["dos"]
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         if backend == self.RefBackend.TF:
             # shape is not same
             ret = ret[0].reshape(-1, self.natoms[0], 1)
diff --git a/source/tests/consistent/fitting/test_ener.py b/source/tests/consistent/fitting/test_ener.py
index 157b1bab8a..ac4f7ae543 100644
--- a/source/tests/consistent/fitting/test_ener.py
+++ b/source/tests/consistent/fitting/test_ener.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -106,7 +105,7 @@ def addtional_data(self) -> dict:
             "mixed_types": mixed_types,
         }
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         (
             resnet_dt,
             precision,
@@ -158,7 +157,7 @@ def eval_dp(self, dp_obj: Any) -> Any:
             fparam=self.fparam if numb_fparam else None,
         )["energy"]
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         if backend == self.RefBackend.TF:
             # shape is not same
             ret = ret[0].reshape(-1, self.natoms[0], 1)
diff --git a/source/tests/consistent/fitting/test_polar.py b/source/tests/consistent/fitting/test_polar.py
index 808514ade4..6a3465ba24 100644
--- a/source/tests/consistent/fitting/test_polar.py
+++ b/source/tests/consistent/fitting/test_polar.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -97,7 +96,7 @@ def addtional_data(self) -> dict:
             "embedding_width": 30,
         }
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         (
             resnet_dt,
             precision,
@@ -144,7 +143,7 @@ def eval_dp(self, dp_obj: Any) -> Any:
             None,
         )["polarizability"]
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         if backend == self.RefBackend.TF:
             # shape is not same
             ret = ret[0].reshape(-1, self.natoms[0], 1)
diff --git a/source/tests/consistent/fitting/test_property.py b/source/tests/consistent/fitting/test_property.py
index 3f406d3a6b..a9fb6b694a 100644
--- a/source/tests/consistent/fitting/test_property.py
+++ b/source/tests/consistent/fitting/test_property.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -113,7 +112,7 @@ def addtional_data(self) -> dict:
             "mixed_types": mixed_types,
         }
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         (
             resnet_dt,
             precision,
@@ -168,7 +167,7 @@ def eval_dp(self, dp_obj: Any) -> Any:
             fparam=self.fparam if numb_fparam else None,
         )["property"]
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         if backend == self.RefBackend.TF:
             # shape is not same
             ret = ret[0].reshape(-1, self.natoms[0], 1)
diff --git a/source/tests/consistent/model/test_ener.py b/source/tests/consistent/model/test_ener.py
index c8ff9e4dcf..692e1287dc 100644
--- a/source/tests/consistent/model/test_ener.py
+++ b/source/tests/consistent/model/test_ener.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -141,7 +140,7 @@ def setUp(self):
         self.atype = self.atype[:, idx_map]
         self.coords = self.coords[:, idx_map]
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_model(
             obj,
             self.natoms,
@@ -169,7 +168,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             self.box,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         # shape not matched. ravel...
         if backend is self.RefBackend.DP:
             return (ret["energy_redu"].ravel(), ret["energy"].ravel())
diff --git a/source/tests/consistent/model/test_frozen.py b/source/tests/consistent/model/test_frozen.py
index e362aed511..f11a11914b 100644
--- a/source/tests/consistent/model/test_frozen.py
+++ b/source/tests/consistent/model/test_frozen.py
@@ -3,7 +3,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -121,7 +120,7 @@ def setUp(self):
         self.atype = self.atype[:, idx_map]
         self.coords = self.coords[:, idx_map]
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return self.build_tf_model(
             obj,
             self.natoms,
@@ -149,7 +148,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             self.box,
         )
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         # shape not matched. ravel...
         if backend is self.RefBackend.DP:
             return (ret["energy_redu"].ravel(), ret["energy"].ravel())
diff --git a/source/tests/consistent/test_type_embedding.py b/source/tests/consistent/test_type_embedding.py
index c66ef0fbaa..1464517581 100644
--- a/source/tests/consistent/test_type_embedding.py
+++ b/source/tests/consistent/test_type_embedding.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import (
     Any,
-    Tuple,
 )
 
 import numpy as np
@@ -96,7 +95,7 @@ def setUp(self):
 
         self.ntypes = 2
 
-    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+    def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
         return [
             obj.build(
                 obj.ntypes,
@@ -121,7 +120,7 @@ def eval_jax(self, jax_obj: Any) -> Any:
                 raise ValueError("Output is numpy array")
         return [np.array(x) if isinstance(x, jnp.ndarray) else x for x in (out,)]
 
-    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+    def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 
     @property
diff --git a/source/tests/infer/case.py b/source/tests/infer/case.py
index c1bce424c4..4a5ce638d6 100644
--- a/source/tests/infer/case.py
+++ b/source/tests/infer/case.py
@@ -27,7 +27,6 @@
     Path,
 )
 from typing import (
-    Dict,
     Optional,
 )
 
@@ -175,12 +174,12 @@ def get_model(self, suffix: str, out_file: Optional[str] = None) -> str:
 
 
 @lru_cache
-def get_cases() -> Dict[str, Case]:
+def get_cases() -> dict[str, Case]:
     """Get all test cases.
 
     Returns
     -------
-    Dict[str, Case]
+    dict[str, Case]
         A dictionary containing all test cases.
 
     Examples
diff --git a/source/tests/pt/common.py b/source/tests/pt/common.py
index 16b343be8a..173e9d52dc 100644
--- a/source/tests/pt/common.py
+++ b/source/tests/pt/common.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
     Union,
 )
@@ -46,7 +45,7 @@ def eval_model(
     model,
     coords: Union[np.ndarray, torch.Tensor],
     cells: Optional[Union[np.ndarray, torch.Tensor]],
-    atom_types: Union[np.ndarray, torch.Tensor, List[int]],
+    atom_types: Union[np.ndarray, torch.Tensor, list[int]],
     spins: Optional[Union[np.ndarray, torch.Tensor]] = None,
     atomic: bool = False,
     infer_batch_size: int = 2,
diff --git a/source/tests/pt/model/test_atomic_model_atomic_stat.py b/source/tests/pt/model/test_atomic_model_atomic_stat.py
index 470b01b507..6a21fc6e5a 100644
--- a/source/tests/pt/model/test_atomic_model_atomic_stat.py
+++ b/source/tests/pt/model/test_atomic_model_atomic_stat.py
@@ -5,7 +5,6 @@
     Path,
 )
 from typing import (
-    List,
     Optional,
 )
 
@@ -70,11 +69,11 @@ def serialize(self) -> dict:
         raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         raise NotImplementedError
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         raise NotImplementedError
 
     def forward(
diff --git a/source/tests/pt/model/test_atomic_model_global_stat.py b/source/tests/pt/model/test_atomic_model_global_stat.py
index 11752278e4..9ce5784bfa 100644
--- a/source/tests/pt/model/test_atomic_model_global_stat.py
+++ b/source/tests/pt/model/test_atomic_model_global_stat.py
@@ -5,7 +5,6 @@
     Path,
 )
 from typing import (
-    List,
     Optional,
 )
 
@@ -82,11 +81,11 @@ def serialize(self) -> dict:
         raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         raise NotImplementedError
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         raise NotImplementedError
 
     def forward(
diff --git a/source/tests/pt/model/test_force_grad.py b/source/tests/pt/model/test_force_grad.py
index ddc3c0bccf..d3cd11f71d 100644
--- a/source/tests/pt/model/test_force_grad.py
+++ b/source/tests/pt/model/test_force_grad.py
@@ -6,7 +6,6 @@
     Path,
 )
 from typing import (
-    List,
     Optional,
 )
 
@@ -32,7 +31,7 @@ class CheckSymmetry(DeepmdData):
     def __init__(
         self,
         sys_path: str,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
     ):
         super().__init__(sys_path=sys_path, type_map=type_map)
         self.add("energy", 1, atomic=False, must=False, high_prec=True)
diff --git a/source/tests/pt/model/test_linear_atomic_model_stat.py b/source/tests/pt/model/test_linear_atomic_model_stat.py
index 604c82f513..49b7a3821f 100644
--- a/source/tests/pt/model/test_linear_atomic_model_stat.py
+++ b/source/tests/pt/model/test_linear_atomic_model_stat.py
@@ -5,7 +5,6 @@
     Path,
 )
 from typing import (
-    List,
     Optional,
 )
 
@@ -63,11 +62,11 @@ def serialize(self) -> dict:
         raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         raise NotImplementedError
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         raise NotImplementedError
 
     def forward(
@@ -115,11 +114,11 @@ def serialize(self) -> dict:
         raise NotImplementedError
 
     def change_type_map(
-        self, type_map: List[str], model_with_new_type_stat=None
+        self, type_map: list[str], model_with_new_type_stat=None
     ) -> None:
         raise NotImplementedError
 
-    def get_type_map(self) -> List[str]:
+    def get_type_map(self) -> list[str]:
         raise NotImplementedError
 
     def forward(
diff --git a/source/tests/pt/model/test_rotation.py b/source/tests/pt/model/test_rotation.py
index caa6385c80..cf947c30b2 100644
--- a/source/tests/pt/model/test_rotation.py
+++ b/source/tests/pt/model/test_rotation.py
@@ -5,7 +5,6 @@
     Path,
 )
 from typing import (
-    List,
     Optional,
 )
 
@@ -30,7 +29,7 @@ class CheckSymmetry(DeepmdData):
     def __init__(
         self,
         sys_path: str,
-        type_map: Optional[List[str]] = None,
+        type_map: Optional[list[str]] = None,
     ):
         super().__init__(sys_path=sys_path, type_map=type_map)
         self.add("energy", 1, atomic=False, must=False, high_prec=True)
diff --git a/source/tests/universal/common/cases/atomic_model/utils.py b/source/tests/universal/common/cases/atomic_model/utils.py
index b63563e237..bfd2e2cd5f 100644
--- a/source/tests/universal/common/cases/atomic_model/utils.py
+++ b/source/tests/universal/common/cases/atomic_model/utils.py
@@ -2,8 +2,6 @@
 from typing import (
     Any,
     Callable,
-    Dict,
-    List,
     Optional,
 )
 
@@ -21,7 +19,7 @@
 class AtomicModelTestCase:
     """Common test case for atomic model."""
 
-    expected_type_map: List[str]
+    expected_type_map: list[str]
     """Expected type map."""
     expected_rcut: float
     """Expected cut-off radius."""
@@ -29,25 +27,25 @@ class AtomicModelTestCase:
     """Expected number (dimension) of frame parameters."""
     expected_dim_aparam: int
     """Expected number (dimension) of atomic parameters."""
-    expected_sel_type: List[int]
+    expected_sel_type: list[int]
     """Expected selected atom types."""
     expected_aparam_nall: bool
     """Expected shape of atomic parameters."""
-    expected_model_output_type: List[str]
+    expected_model_output_type: list[str]
     """Expected output type for the model."""
-    model_output_equivariant: List[str]
+    model_output_equivariant: list[str]
     """Outputs that are equivariant to the input rotation."""
-    expected_sel: List[int]
+    expected_sel: list[int]
     """Expected number of neighbors."""
     expected_has_message_passing: bool
     """Expected whether having message passing."""
     forward_wrapper: Callable[[Any], Any]
     """Calss wrapper for forward method."""
-    aprec_dict: Dict[str, Optional[float]]
+    aprec_dict: dict[str, Optional[float]]
     """Dictionary of absolute precision in each test."""
-    rprec_dict: Dict[str, Optional[float]]
+    rprec_dict: dict[str, Optional[float]]
     """Dictionary of relative precision in each test."""
-    epsilon_dict: Dict[str, Optional[float]]
+    epsilon_dict: dict[str, Optional[float]]
     """Dictionary of epsilons in each test."""
 
     def test_get_type_map(self):
diff --git a/source/tests/universal/common/cases/model/utils.py b/source/tests/universal/common/cases/model/utils.py
index 66b2e64fd3..d583d06b05 100644
--- a/source/tests/universal/common/cases/model/utils.py
+++ b/source/tests/universal/common/cases/model/utils.py
@@ -6,8 +6,6 @@
 from typing import (
     Any,
     Callable,
-    Dict,
-    List,
     Optional,
 )
 
@@ -31,7 +29,7 @@
 class ModelTestCase:
     """Common test case for model."""
 
-    expected_type_map: List[str]
+    expected_type_map: list[str]
     """Expected type map."""
     expected_rcut: float
     """Expected cut-off radius."""
@@ -39,15 +37,15 @@ class ModelTestCase:
     """Expected number (dimension) of frame parameters."""
     expected_dim_aparam: int
     """Expected number (dimension) of atomic parameters."""
-    expected_sel_type: List[int]
+    expected_sel_type: list[int]
     """Expected selected atom types."""
     expected_aparam_nall: bool
     """Expected shape of atomic parameters."""
-    expected_model_output_type: List[str]
+    expected_model_output_type: list[str]
     """Expected output type for the model."""
-    model_output_equivariant: List[str]
+    model_output_equivariant: list[str]
     """Outputs that are equivariant to the input rotation."""
-    expected_sel: List[int]
+    expected_sel: list[int]
     """Expected number of neighbors."""
     expected_has_message_passing: bool
     """Expected whether having message passing."""
@@ -55,11 +53,11 @@ class ModelTestCase:
     """Class wrapper for forward method."""
     forward_wrapper_cpu_ref: Callable[[Any], Any]
     """Convert model to CPU method."""
-    aprec_dict: Dict[str, Optional[float]]
+    aprec_dict: dict[str, Optional[float]]
     """Dictionary of absolute precision in each test."""
-    rprec_dict: Dict[str, Optional[float]]
+    rprec_dict: dict[str, Optional[float]]
     """Dictionary of relative precision in each test."""
-    epsilon_dict: Dict[str, Optional[float]]
+    epsilon_dict: dict[str, Optional[float]]
     """Dictionary of epsilons in each test."""
 
     def test_get_type_map(self):

From c90c4e165b6246076f9048e33c43321617df0e1c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 6 Oct 2024 13:25:07 -0400
Subject: [PATCH 14/39] fix(tf): set visible_device_list for TF C++ (#4172)

Fix #4171.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
	- Enhanced GPU selection logic for improved resource management.
- Added support for single-frame and multi-frame computations with new
parameters for atom energy and virial calculations.
	- Extended functionality for mixed-type computations in the model.

- **Bug Fixes**
	- Improved error handling during initialization and model execution.
- Added output tensor dimension validations to ensure expected
structures are maintained.

- **Documentation**
- Clarified output tensor validation to ensure expected dimensions are
maintained.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_cc/src/DataModifierTF.cc | 7 +++++--
 source/api_cc/src/DeepPotTF.cc      | 7 +++++--
 source/api_cc/src/DeepTensorTF.cc   | 7 +++++--
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/source/api_cc/src/DataModifierTF.cc b/source/api_cc/src/DataModifierTF.cc
index 324cb14098..aaa2252955 100644
--- a/source/api_cc/src/DataModifierTF.cc
+++ b/source/api_cc/src/DataModifierTF.cc
@@ -49,8 +49,11 @@ void DipoleChargeModifierTF::init(const std::string& model,
         0.9);
     options.config.mutable_gpu_options()->set_allow_growth(true);
     DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
-    std::string str = "/gpu:";
-    str += std::to_string(gpu_rank % gpu_num);
+    std::string str = "/gpu:0";
+    // See
+    // https://github.com/tensorflow/tensorflow/blame/8fac27b486939f40bc8e362b94a16a4a8bb51869/tensorflow/core/protobuf/config.proto#L80
+    options.config.mutable_gpu_options()->set_visible_device_list(
+        std::to_string(gpu_rank % gpu_num));
     graph::SetDefaultDevice(str, graph_def);
   }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/api_cc/src/DeepPotTF.cc b/source/api_cc/src/DeepPotTF.cc
index 2c09c17a69..d7a7edfb60 100644
--- a/source/api_cc/src/DeepPotTF.cc
+++ b/source/api_cc/src/DeepPotTF.cc
@@ -447,8 +447,11 @@ void DeepPotTF::init(const std::string& model,
         0.9);
     options.config.mutable_gpu_options()->set_allow_growth(true);
     DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
-    std::string str = "/gpu:";
-    str += std::to_string(gpu_rank % gpu_num);
+    std::string str = "/gpu:0";
+    // See
+    // https://github.com/tensorflow/tensorflow/blame/8fac27b486939f40bc8e362b94a16a4a8bb51869/tensorflow/core/protobuf/config.proto#L80
+    options.config.mutable_gpu_options()->set_visible_device_list(
+        std::to_string(gpu_rank % gpu_num));
     graph::SetDefaultDevice(str, graph_def);
   }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/api_cc/src/DeepTensorTF.cc b/source/api_cc/src/DeepTensorTF.cc
index 34a47bc6f3..c69b7c018e 100644
--- a/source/api_cc/src/DeepTensorTF.cc
+++ b/source/api_cc/src/DeepTensorTF.cc
@@ -46,8 +46,11 @@ void DeepTensorTF::init(const std::string &model,
         0.9);
     options.config.mutable_gpu_options()->set_allow_growth(true);
     DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
-    std::string str = "/gpu:";
-    str += std::to_string(gpu_rank % gpu_num);
+    std::string str = "/gpu:0";
+    // See
+    // https://github.com/tensorflow/tensorflow/blame/8fac27b486939f40bc8e362b94a16a4a8bb51869/tensorflow/core/protobuf/config.proto#L80
+    options.config.mutable_gpu_options()->set_visible_device_list(
+        std::to_string(gpu_rank % gpu_num));
     graph::SetDefaultDevice(str, graph_def);
   }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

From a0747b92d764aeb383f6d2eef872ee918e0316c0 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 6 Oct 2024 19:49:12 -0400
Subject: [PATCH 15/39] fix(tf): throw errors when loc_cellnum is 0 (#4180)

Fix #4122.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Enhanced error handling in the computation process, providing clearer
error messages for invalid local cell numbers.

- **Bug Fixes**
- Improved robustness of the `compute_cell_info` function to prevent
failures due to invalid input conditions.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/lib/src/coord.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/source/lib/src/coord.cc b/source/lib/src/coord.cc
index b1456bc7f1..8e759f372f 100644
--- a/source/lib/src/coord.cc
+++ b/source/lib/src/coord.cc
@@ -4,6 +4,7 @@
 #include <vector>
 
 #include "SimulationRegion.h"
+#include "errors.h"
 #include "neighbor_list.h"
 
 using namespace deepmd;
@@ -95,6 +96,12 @@ void deepmd::compute_cell_info(
   }
   cell_info[21] = (cell_info[3 + 0]) * (cell_info[3 + 1]) *
                   (cell_info[3 + 2]);  // loc_cellnum
+  if (cell_info[21] <= 0) {
+    throw deepmd::deepmd_exception(
+        "loc_cellnum should be positive but is " +
+        std::to_string(cell_info[21]) +
+        ". You may give a PBC box with zero volume.");
+  }
   cell_info[22] = (2 * cell_info[12 + 0] + cell_info[3 + 0]) *
                   (2 * cell_info[12 + 1] + cell_info[3 + 1]) *
                   (2 * cell_info[12 + 2] + cell_info[3 + 2]);  // total_cellnum

From 2feb21c6a06420a71919fcd8253d8e7762d90c0a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 7 Oct 2024 02:47:24 +0000
Subject: [PATCH 16/39] [pre-commit.ci] pre-commit autoupdate (#4173)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.6.7 →
v0.6.8](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.7...v0.6.8)
- [github.com/pre-commit/mirrors-clang-format: v18.1.8 →
v19.1.0](https://github.com/pre-commit/mirrors-clang-format/compare/v18.1.8...v19.1.0)
- https://github.com/pylint-dev/pylint/: v3.3.0 → v3.3.1
<!--pre-commit.ci end-->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .pre-commit-config.yaml       |  8 ++++----
 source/lib/include/gpu_cuda.h | 12 ++++++++----
 source/lib/include/gpu_rocm.h | 12 ++++++++----
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5d34f39752..486b5e94fd 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
         exclude: ^source/3rdparty
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.6.7
+    rev: v0.6.8
     hooks:
       - id: ruff
         args: ["--fix"]
@@ -52,10 +52,10 @@ repos:
       - id: blacken-docs
   # C++
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v18.1.8
+    rev: v19.1.0
     hooks:
       - id: clang-format
-        exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc
+        exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$)
   # markdown, yaml, CSS, javascript
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v4.0.0-alpha.8
@@ -146,7 +146,7 @@ repos:
         exclude: .pre-commit-config.yaml|source/lmp
   # customized pylint rules
   - repo: https://github.com/pylint-dev/pylint/
-    rev: v3.3.0
+    rev: v3.3.1
     hooks:
       - id: pylint
         entry: env PYTHONPATH=source/checker pylint
diff --git a/source/lib/include/gpu_cuda.h b/source/lib/include/gpu_cuda.h
index fb467674cb..9504a95b7a 100644
--- a/source/lib/include/gpu_cuda.h
+++ b/source/lib/include/gpu_cuda.h
@@ -18,8 +18,10 @@
 #define gpuMemset cudaMemset
 
 #define GPU_MAX_NBOR_SIZE 4096
-#define DPErrcheck(res) \
-  { DPAssert((res), __FILE__, __LINE__); }
+#define DPErrcheck(res)                  \
+  {                                      \
+    DPAssert((res), __FILE__, __LINE__); \
+  }
 inline void DPAssert(cudaError_t code,
                      const char *file,
                      int line,
@@ -54,8 +56,10 @@ inline void DPAssert(cudaError_t code,
   }
 }
 
-#define nborErrcheck(res) \
-  { nborAssert((res), __FILE__, __LINE__); }
+#define nborErrcheck(res)                  \
+  {                                        \
+    nborAssert((res), __FILE__, __LINE__); \
+  }
 inline void nborAssert(cudaError_t code,
                        const char *file,
                        int line,
diff --git a/source/lib/include/gpu_rocm.h b/source/lib/include/gpu_rocm.h
index fbd5e1ce3f..abb7ddfa62 100644
--- a/source/lib/include/gpu_rocm.h
+++ b/source/lib/include/gpu_rocm.h
@@ -20,8 +20,10 @@
 #define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
 #define gpuMemset hipMemset
 
-#define DPErrcheck(res) \
-  { DPAssert((res), __FILE__, __LINE__); }
+#define DPErrcheck(res)                  \
+  {                                      \
+    DPAssert((res), __FILE__, __LINE__); \
+  }
 inline void DPAssert(hipError_t code,
                      const char *file,
                      int line,
@@ -39,8 +41,10 @@ inline void DPAssert(hipError_t code,
   }
 }
 
-#define nborErrcheck(res) \
-  { nborAssert((res), __FILE__, __LINE__); }
+#define nborErrcheck(res)                  \
+  {                                        \
+    nborAssert((res), __FILE__, __LINE__); \
+  }
 inline void nborAssert(hipError_t code,
                        const char *file,
                        int line,

From d667929bc4ec1b9721dee2c194e39d15cdf7725d Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 7 Oct 2024 13:32:31 -0400
Subject: [PATCH 17/39] docs: add documentation for installation requirements
 of DPA-2 (#4178)

Fix #4161.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Added installation requirements for the DPA-2 model in the
documentation, including customized OP library instructions.

- **Improvements**
- Enhanced error messaging in the `border_op` function for better user
guidance.
- Clarified parameter handling and documentation in the
`DescrptBlockRepformers` class.
- Improved logic for processing input tensors and neighbor lists in the
`forward` method.
- Strengthened input statistics handling in the `compute_input_stats`
method.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/pt/model/descriptor/repformers.py |  3 ++-
 doc/model/dpa2.md                        | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
index 406758faa6..64965825a0 100644
--- a/deepmd/pt/model/descriptor/repformers.py
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -57,7 +57,8 @@ def border_op(
         argument8,
     ) -> torch.Tensor:
         raise NotImplementedError(
-            "border_op is not available since customized PyTorch OP library is not built when freezing the model."
+            "border_op is not available since customized PyTorch OP library is not built when freezing the model. "
+            "See documentation for DPA-2 for details."
         )
 
     # Note: this hack cannot actually save a model that can be runned using LAMMPS.
diff --git a/doc/model/dpa2.md b/doc/model/dpa2.md
index 5de30ee6b2..24ce5222e9 100644
--- a/doc/model/dpa2.md
+++ b/doc/model/dpa2.md
@@ -8,6 +8,16 @@ The DPA-2 model implementation. See https://arxiv.org/abs/2312.15492 for more de
 
 Training example: `examples/water/dpa2/input_torch_medium.json`, see [README](../../examples/water/dpa2/README.md) for inputs in different levels.
 
+## Requirements of installation {{ pytorch_icon }}
+
+If one wants to run the DPA-2 model on LAMMPS, the customized OP library for the Python interface must be installed when [freezing the model](../freeze/freeze.md).
+
+The customized OP library for the Python interface can be installed by setting environment variable {envvar}`DP_ENABLE_PYTORCH` to `1` during installation.
+
+If one runs LAMMPS with MPI, the customized OP library for the C++ interface should be compiled against the same MPI library as the runtime MPI.
+If one runs LAMMPS with MPI and CUDA devices, it is recommended to compile the customized OP library for the C++ interface with a [CUDA-Aware MPI](https://developer.nvidia.com/mpi-solutions-gpus) library and CUDA,
+otherwise the communication between GPU cards falls back to the slower CPU implementation.
+
 ## Data format
 
 DPA-2 supports both the [standard data format](../data/system.md) and the [mixed type data format](../data/system.md#mixed-type).

From dcdd804a6eb20867d27eff24a9c300f1bc6fe370 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 7 Oct 2024 20:40:40 -0400
Subject: [PATCH 18/39] chore: use `functools.cached_property` for cached
 properties (#4187)

`functools.cached_property` (new in Python 3.8) is more suitable for
cached properties.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Introduced a new parameter `neighbor_list` for enhanced neighbor list
handling in model evaluation.
- Added support for percentage strings in the `test_size` parameter for
flexible test size configuration.
- New method `_make_auto_ts` to facilitate test size calculations based
on specified percentages.

- **Bug Fixes**
- Improved caching mechanisms for properties, enhancing performance and
memory management.

- **Documentation**
- Added comments and clarifications in the code to improve understanding
of batch and test size handling.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/tf/infer/deep_eval.py | 20 +++++++-------------
 deepmd/tf/utils/tabulate.py  |  4 ++--
 deepmd/utils/data_system.py  |  5 ++---
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/deepmd/tf/infer/deep_eval.py b/deepmd/tf/infer/deep_eval.py
index 33725007f3..56df7f782f 100644
--- a/deepmd/tf/infer/deep_eval.py
+++ b/deepmd/tf/infer/deep_eval.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 from functools import (
-    cache,
+    cached_property,
 )
 from typing import (
     TYPE_CHECKING,
@@ -263,8 +263,7 @@ def _init_attr(self):
         else:
             self.modifier_type = None
 
-    @property
-    @cache
+    @cached_property
     def model_type(self) -> type["DeepEvalWrapper"]:
         """Get type of model.
 
@@ -288,8 +287,7 @@ def model_type(self) -> type["DeepEvalWrapper"]:
         else:
             raise RuntimeError(f"unknown model type {model_type}")
 
-    @property
-    @cache
+    @cached_property
     def model_version(self) -> str:
         """Get version of model.
 
@@ -307,8 +305,7 @@ def model_version(self) -> str:
             [mt] = run_sess(self.sess, [t_mt], feed_dict={})
             return mt.decode("utf-8")
 
-    @property
-    @cache
+    @cached_property
     def sess(self) -> tf.Session:
         """Get TF session."""
         # start a tf session associated to the graph
@@ -1192,8 +1189,7 @@ def __init__(
 
         self.neighbor_list = neighbor_list
 
-    @property
-    @cache
+    @cached_property
     def model_type(self) -> str:
         """Get type of model.
 
@@ -1203,8 +1199,7 @@ def model_type(self) -> str:
         [mt] = run_sess(self.sess, [t_mt], feed_dict={})
         return mt.decode("utf-8")
 
-    @property
-    @cache
+    @cached_property
     def model_version(self) -> str:
         """Get version of model.
 
@@ -1222,8 +1217,7 @@ def model_version(self) -> str:
             [mt] = run_sess(self.sess, [t_mt], feed_dict={})
             return mt.decode("utf-8")
 
-    @property
-    @cache
+    @cached_property
     def sess(self) -> tf.Session:
         """Get TF session."""
         # start a tf session associated to the graph
diff --git a/deepmd/tf/utils/tabulate.py b/deepmd/tf/utils/tabulate.py
index afb94bb050..1dc6128f62 100644
--- a/deepmd/tf/utils/tabulate.py
+++ b/deepmd/tf/utils/tabulate.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from functools import (
+    cached_property,
     lru_cache,
 )
 from typing import (
@@ -770,8 +771,7 @@ def _get_layer_size(self):
             raise RuntimeError("Unsupported descriptor")
         return layer_size
 
-    @property
-    @lru_cache
+    @cached_property
     def _n_all_excluded(self) -> int:
         """Then number of types excluding all types."""
         return sum(int(self._all_excluded(ii)) for ii in range(0, self.ntypes))
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index e499163e6a..7bec0b16f4 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -3,7 +3,7 @@
 import logging
 import warnings
 from functools import (
-    cache,
+    cached_property,
 )
 from typing import (
     Any,
@@ -238,8 +238,7 @@ def _load_test(self, ntests=-1):
             for nn in test_system_data:
                 self.test_data[nn].append(test_system_data[nn])
 
-    @property
-    @cache
+    @cached_property
     def default_mesh(self) -> list[np.ndarray]:
         """Mesh for each system."""
         return [

From b807bb4d3ac572b5903ec58c906bacffbfef9a9e Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 7 Oct 2024 20:43:06 -0400
Subject: [PATCH 19/39] docs: document more for multiprocessing (#4190)

Fix #4182.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Documentation**
- Updated `lammps-command.md` to clarify GPU usage and unit handling in
LAMMPS.
- Enhanced `howtoset_num_nodes.md` with new sections on MPI and
multiprocessing for TensorFlow and PyTorch, improving clarity and
usability.
	- Added guidance on GPU resource allocation for parallel processes.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 doc/third-party/lammps-command.md         |  5 +++++
 doc/troubleshooting/howtoset_num_nodes.md | 21 +++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md
index 4baba00e05..6a16605bfc 100644
--- a/doc/third-party/lammps-command.md
+++ b/doc/third-party/lammps-command.md
@@ -4,6 +4,11 @@
 See [Environment variables](../env.md) for the runtime environment variables.
 :::
 
+:::{note}
+Each MPI rank can only use at most one GPU card.
+See [How to control the parallelism of a job](../troubleshooting/howtoset_num_nodes.md) for details.
+:::
+
 ## units
 
 All units in LAMMPS except `lj` are supported. `lj` is not supported.
diff --git a/doc/troubleshooting/howtoset_num_nodes.md b/doc/troubleshooting/howtoset_num_nodes.md
index 0c547650fb..b09fb80cb6 100644
--- a/doc/troubleshooting/howtoset_num_nodes.md
+++ b/doc/troubleshooting/howtoset_num_nodes.md
@@ -4,11 +4,26 @@ DeePMD-kit has three levels of parallelism.
 To get the best performance, one should control the number of threads used by DeePMD-kit.
 One should make sure the product of the parallel numbers is less than or equal to the number of cores available.
 
-## MPI (optional)
+## MPI or multiprocessing (optional)
 
 Parallelism for MPI is optional and used for multiple nodes, multiple GPU cards, or sometimes multiple CPU cores.
 
-To enable MPI support for training, one should [install horovod](../install/install-from-source.md#install-horovod-and-mpi4py) in advance. Note that the parallelism mode is data parallelism, so it is not expected to see the training time per batch decreases.
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
+To enable MPI support for training in the TensorFlow interface, one should [install horovod](../install/install-from-source.md#install-horovod-and-mpi4py) in advance.
+
+:::
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+Multiprocessing support for training in the PyTorch backend is implemented with [torchrun](https://pytorch.org/docs/stable/elastic/run.html).
+
+:::
+::::
+
+Note that the parallelism mode is data parallelism, so it is not expected to see the training time per batch decreases.
+See [Parallel training](../train/parallel-training.md) for details.
 
 MPI support for inference is not directly supported by DeePMD-kit, but indirectly supported by the third-party software. For example, [LAMMPS enables running simulations in parallel](https://docs.lammps.org/Developer_parallel.html) using the MPI parallel communication standard with distributed data. That software has to build against MPI.
 
@@ -22,6 +37,8 @@ Note that `mpirun` here should be the same as the MPI used to build software. Fo
 
 Sometimes, `$num_nodes` and the nodes information can be directly given by the HPC scheduler system, if the MPI used here is the same as the MPI used to build the scheduler system. Otherwise, one have to manually assign these information.
 
+Each process can use at most one GPU card.
+
 ## Parallelism between independent operators
 
 For CPU devices, TensorFlow and PyTorch use multiple streams to run independent operators (OP).

From 9a15bc0ff804e2921bf85fb921e6fe78f6c9bff6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 8 Oct 2024 13:50:00 -0400
Subject: [PATCH 20/39] [pre-commit.ci] pre-commit autoupdate (#4192)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/pre-commit/pre-commit-hooks: v4.6.0 →
v5.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.6.0...v5.0.0)
- [github.com/astral-sh/ruff-pre-commit: v0.6.8 →
v0.6.9](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.8...v0.6.9)
- [github.com/asottile/blacken-docs: 1.18.0 →
1.19.0](https://github.com/asottile/blacken-docs/compare/1.18.0...1.19.0)
- [github.com/pre-commit/mirrors-clang-format: v19.1.0 →
v19.1.1](https://github.com/pre-commit/mirrors-clang-format/compare/v19.1.0...v19.1.1)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 486b5e94fd..6a1d303f64 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: trailing-whitespace
         exclude: "^.+\\.pbtxt$"
@@ -29,7 +29,7 @@ repos:
         exclude: ^source/3rdparty
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.6.8
+    rev: v0.6.9
     hooks:
       - id: ruff
         args: ["--fix"]
@@ -47,12 +47,12 @@ repos:
         exclude: ^source/3rdparty
   # Python inside docs
   - repo: https://github.com/asottile/blacken-docs
-    rev: 1.18.0
+    rev: 1.19.0
     hooks:
       - id: blacken-docs
   # C++
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v19.1.0
+    rev: v19.1.1
     hooks:
       - id: clang-format
         exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$)

From 3939786f1dab77c18f501c42340535f2a3708141 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 9 Oct 2024 07:19:47 -0400
Subject: [PATCH 21/39] feat(jax/array-api): dpa1 (#4160)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Updated method for converting input to NumPy arrays, enhancing
performance and compatibility with array-like structures.
- Simplified handling of weight, bias, and identity variables for
improved compatibility with array backends.
- Introduced new network classes and enhanced network management
functionalities.
	- Added support for the new `array_api_strict` backend in testing.

- **Bug Fixes**
- Fixed serialization process to ensure accurate conversion of weights
and biases.

- **Tests**
- Added tests to validate the new functionalities and ensure
compatibility across various backends, including JAX and Array API
Strict.

- **Chores**
- Continued improvements to project structure and dependencies for
better maintainability.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/dpmodel/array_api.py                   |  40 +++
 deepmd/dpmodel/descriptor/dpa1.py             | 236 +++++++++++++-----
 deepmd/dpmodel/utils/env_mat.py               |  29 ++-
 deepmd/dpmodel/utils/exclude_mask.py          |  34 ++-
 deepmd/dpmodel/utils/network.py               |  38 +--
 deepmd/dpmodel/utils/nlist.py                 | 118 +++++----
 deepmd/dpmodel/utils/region.py                |  34 +--
 deepmd/dpmodel/utils/type_embed.py            |   2 +-
 deepmd/jax/common.py                          |  50 +++-
 deepmd/jax/descriptor/__init__.py             |   1 +
 deepmd/jax/descriptor/dpa1.py                 |  86 +++++++
 deepmd/jax/env.py                             |   4 +
 deepmd/jax/utils/exclude_mask.py              |  18 ++
 deepmd/jax/utils/network.py                   |  51 +++-
 deepmd/jax/utils/type_embed.py                |   2 +
 pyproject.toml                                |   1 +
 source/tests/array_api_strict/__init__.py     |   2 +
 source/tests/array_api_strict/common.py       |  25 ++
 .../array_api_strict/descriptor/__init__.py   |   1 +
 .../tests/array_api_strict/descriptor/dpa1.py |  81 ++++++
 .../tests/array_api_strict/utils/__init__.py  |   1 +
 .../array_api_strict/utils/exclude_mask.py    |  17 ++
 .../tests/array_api_strict/utils/network.py   |  45 ++++
 .../array_api_strict/utils/type_embed.py      |  22 ++
 .../common/dpmodel/test_descriptor_dpa1.py    |  19 ++
 source/tests/consistent/common.py             |  66 +++++
 source/tests/consistent/descriptor/common.py  |  63 +++++
 .../tests/consistent/descriptor/test_dpa1.py  |  96 +++++++
 .../tests/consistent/test_type_embedding.py   |  13 +
 29 files changed, 1022 insertions(+), 173 deletions(-)
 create mode 100644 deepmd/jax/descriptor/__init__.py
 create mode 100644 deepmd/jax/descriptor/dpa1.py
 create mode 100644 deepmd/jax/utils/exclude_mask.py
 create mode 100644 source/tests/array_api_strict/__init__.py
 create mode 100644 source/tests/array_api_strict/common.py
 create mode 100644 source/tests/array_api_strict/descriptor/__init__.py
 create mode 100644 source/tests/array_api_strict/descriptor/dpa1.py
 create mode 100644 source/tests/array_api_strict/utils/__init__.py
 create mode 100644 source/tests/array_api_strict/utils/exclude_mask.py
 create mode 100644 source/tests/array_api_strict/utils/network.py
 create mode 100644 source/tests/array_api_strict/utils/type_embed.py

diff --git a/deepmd/dpmodel/array_api.py b/deepmd/dpmodel/array_api.py
index e4af2ad627..360df78a7b 100644
--- a/deepmd/dpmodel/array_api.py
+++ b/deepmd/dpmodel/array_api.py
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Utilities for the array API."""
 
+import array_api_compat
+
 
 def support_array_api(version: str) -> callable:
     """Mark a function as supporting the specific version of the array API.
@@ -27,3 +29,41 @@ def set_version(func: callable) -> callable:
         return func
 
     return set_version
+
+
+# array api adds take_along_axis in https://github.com/data-apis/array-api/pull/816
+# but it hasn't been released yet
+# below is a pure Python implementation of take_along_axis
+# https://github.com/data-apis/array-api/issues/177#issuecomment-2093630595
+def xp_swapaxes(a, axis1, axis2):
+    xp = array_api_compat.array_namespace(a)
+    axes = list(range(a.ndim))
+    axes[axis1], axes[axis2] = axes[axis2], axes[axis1]
+    a = xp.permute_dims(a, axes)
+    return a
+
+
+def xp_take_along_axis(arr, indices, axis):
+    xp = array_api_compat.array_namespace(arr)
+    arr = xp_swapaxes(arr, axis, -1)
+    indices = xp_swapaxes(indices, axis, -1)
+
+    m = arr.shape[-1]
+    n = indices.shape[-1]
+
+    shape = list(arr.shape)
+    shape.pop(-1)
+    shape = [*shape, n]
+
+    arr = xp.reshape(arr, (-1,))
+    if n != 0:
+        indices = xp.reshape(indices, (-1, n))
+    else:
+        indices = xp.reshape(indices, (0, 0))
+
+    offset = (xp.arange(indices.shape[0]) * m)[:, xp.newaxis]
+    indices = xp.reshape(offset + indices, (-1,))
+
+    out = xp.take(arr, indices)
+    out = xp.reshape(out, shape)
+    return xp_swapaxes(out, axis, -1)
diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
index 5ba3fc11b2..add9cb9f71 100644
--- a/deepmd/dpmodel/descriptor/dpa1.py
+++ b/deepmd/dpmodel/descriptor/dpa1.py
@@ -6,6 +6,7 @@
     Union,
 )
 
+import array_api_compat
 import numpy as np
 
 from deepmd.dpmodel import (
@@ -13,6 +14,9 @@
     PRECISION_DICT,
     NativeOP,
 )
+from deepmd.dpmodel.array_api import (
+    xp_take_along_axis,
+)
 from deepmd.dpmodel.utils import (
     EmbeddingNet,
     EnvMat,
@@ -32,9 +36,6 @@
 from deepmd.dpmodel.utils.update_sel import (
     UpdateSel,
 )
-from deepmd.env import (
-    GLOBAL_NP_FLOAT_PRECISION,
-)
 from deepmd.utils.data_system import (
     DeepmdDataSystem,
 )
@@ -59,13 +60,16 @@
 
 
 def np_softmax(x, axis=-1):
-    x = np.nan_to_num(x)  # to avoid value warning
-    e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
-    return e_x / np.sum(e_x, axis=axis, keepdims=True)
+    xp = array_api_compat.array_namespace(x)
+    # x = xp.nan_to_num(x)  # to avoid value warning
+    x = xp.where(xp.isnan(x), xp.zeros_like(x), x)
+    e_x = xp.exp(x - xp.max(x, axis=axis, keepdims=True))
+    return e_x / xp.sum(e_x, axis=axis, keepdims=True)
 
 
 def np_normalize(x, axis=-1):
-    return x / np.linalg.norm(x, axis=axis, keepdims=True)
+    xp = array_api_compat.array_namespace(x)
+    return x / xp.linalg.vector_norm(x, axis=axis, keepdims=True)
 
 
 @BaseDescriptor.register("se_atten")
@@ -474,10 +478,14 @@ def call(
             The smooth switch function.
         """
         del mapping
+        xp = array_api_compat.array_namespace(coord_ext, atype_ext, nlist)
         nf, nloc, nnei = nlist.shape
-        nall = coord_ext.reshape(nf, -1).shape[1] // 3
+        nall = xp.reshape(coord_ext, (nf, -1)).shape[1] // 3
         # nf x nall x tebd_dim
-        atype_embd_ext = self.type_embedding.call()[atype_ext]
+        atype_embd_ext = xp.reshape(
+            xp.take(self.type_embedding.call(), xp.reshape(atype_ext, [-1]), axis=0),
+            (nf, nall, self.tebd_dim),
+        )
         # nfnl x tebd_dim
         atype_embd = atype_embd_ext[:, :nloc, :]
         grrg, g2, h2, rot_mat, sw = self.se_atten(
@@ -489,8 +497,8 @@ def call(
         )
         # nf x nloc x (ng x ng1 + tebd_dim)
         if self.concat_output_tebd:
-            grrg = np.concatenate(
-                [grrg, atype_embd.reshape(nf, nloc, self.tebd_dim)], axis=-1
+            grrg = xp.concat(
+                [grrg, xp.reshape(atype_embd, (nf, nloc, self.tebd_dim))], axis=-1
             )
         return grrg, rot_mat, None, None, sw
 
@@ -536,8 +544,8 @@ def serialize(self) -> dict:
             "exclude_types": obj.exclude_types,
             "env_protection": obj.env_protection,
             "@variables": {
-                "davg": obj["davg"],
-                "dstd": obj["dstd"],
+                "davg": np.array(obj["davg"]),
+                "dstd": np.array(obj["dstd"]),
             },
             ## to be updated when the options are supported.
             "trainable": self.trainable,
@@ -683,12 +691,12 @@ def __init__(
             self.embd_input_dim = 1 + self.tebd_dim_input
         else:
             self.embd_input_dim = 1
-        self.embeddings = NetworkCollection(
+        embeddings = NetworkCollection(
             ndim=0,
             ntypes=self.ntypes,
             network_type="embedding_network",
         )
-        self.embeddings[0] = EmbeddingNet(
+        embeddings[0] = EmbeddingNet(
             self.embd_input_dim,
             self.neuron,
             self.activation_function,
@@ -696,13 +704,14 @@ def __init__(
             self.precision,
             seed=child_seed(seed, 0),
         )
+        self.embeddings = embeddings
         if self.tebd_input_mode in ["strip"]:
-            self.embeddings_strip = NetworkCollection(
+            embeddings_strip = NetworkCollection(
                 ndim=0,
                 ntypes=self.ntypes,
                 network_type="embedding_network",
             )
-            self.embeddings_strip[0] = EmbeddingNet(
+            embeddings_strip[0] = EmbeddingNet(
                 self.tebd_dim_input,
                 self.neuron,
                 self.activation_function,
@@ -710,6 +719,7 @@ def __init__(
                 self.precision,
                 seed=child_seed(seed, 1),
             )
+            self.embeddings_strip = embeddings_strip
         else:
             self.embeddings_strip = None
         self.dpa1_attention = NeighborGatedAttention(
@@ -837,9 +847,10 @@ def cal_g(
         ss,
         embedding_idx,
     ):
+        xp = array_api_compat.array_namespace(ss)
         nfnl, nnei = ss.shape[0:2]
-        shape2 = np.prod(ss.shape[2:])
-        ss = ss.reshape(nfnl, nnei, shape2)
+        shape2 = xp.prod(xp.asarray(ss.shape[2:]))
+        ss = xp.reshape(ss, (nfnl, nnei, shape2))
         # nfnl x nnei x ng
         gg = self.embeddings[embedding_idx].call(ss)
         return gg
@@ -850,9 +861,10 @@ def cal_g_strip(
         embedding_idx,
     ):
         assert self.embeddings_strip is not None
+        xp = array_api_compat.array_namespace(ss)
         nfnl, nnei = ss.shape[0:2]
-        shape2 = np.prod(ss.shape[2:])
-        ss = ss.reshape(nfnl, nnei, shape2)
+        shape2 = xp.prod(xp.asarray(ss.shape[2:]))
+        ss = xp.reshape(ss, (nfnl, nnei, shape2))
         # nfnl x nnei x ng
         gg = self.embeddings_strip[embedding_idx].call(ss)
         return gg
@@ -865,6 +877,7 @@ def call(
         atype_embd_ext: Optional[np.ndarray] = None,
         mapping: Optional[np.ndarray] = None,
     ):
+        xp = array_api_compat.array_namespace(nlist, coord_ext, atype_ext)
         # nf x nloc x nnei x 4
         dmatrix, diff, sw = self.env_mat.call(
             coord_ext, atype_ext, nlist, self.mean, self.stddev
@@ -872,41 +885,42 @@ def call(
         nf, nloc, nnei, _ = dmatrix.shape
         exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext)
         # nfnl x nnei
-        exclude_mask = exclude_mask.reshape(nf * nloc, nnei)
+        exclude_mask = xp.reshape(exclude_mask, (nf * nloc, nnei))
         # nfnl x nnei
-        nlist = nlist.reshape(nf * nloc, nnei)
-        nlist = np.where(exclude_mask, nlist, -1)
+        nlist = xp.reshape(nlist, (nf * nloc, nnei))
+        nlist = xp.where(exclude_mask, nlist, xp.full_like(nlist, -1))
         # nfnl x nnei x 4
-        dmatrix = dmatrix.reshape(nf * nloc, nnei, 4)
+        dmatrix = xp.reshape(dmatrix, (nf * nloc, nnei, 4))
         # nfnl x nnei x 1
-        sw = sw.reshape(nf * nloc, nnei, 1)
+        sw = xp.reshape(sw, (nf * nloc, nnei, 1))
         # nfnl x tebd_dim
-        atype_embd = atype_embd_ext[:, :nloc, :].reshape(nf * nloc, self.tebd_dim)
+        atype_embd = xp.reshape(atype_embd_ext[:, :nloc, :], (nf * nloc, self.tebd_dim))
         # nfnl x nnei x tebd_dim
-        atype_embd_nnei = np.tile(atype_embd[:, np.newaxis, :], (1, nnei, 1))
+        atype_embd_nnei = xp.tile(atype_embd[:, xp.newaxis, :], (1, nnei, 1))
         # nfnl x nnei
         nlist_mask = nlist != -1
         # nfnl x nnei x 1
-        sw = np.where(nlist_mask[:, :, None], sw, 0.0)
-        nlist_masked = np.where(nlist_mask, nlist, 0)
-        index = np.tile(nlist_masked.reshape(nf, -1, 1), (1, 1, self.tebd_dim))
+        sw = xp.where(nlist_mask[:, :, None], sw, xp.full_like(sw, 0.0))
+        nlist_masked = xp.where(nlist_mask, nlist, xp.zeros_like(nlist))
+        index = xp.tile(xp.reshape(nlist_masked, (nf, -1, 1)), (1, 1, self.tebd_dim))
         # nfnl x nnei x tebd_dim
-        atype_embd_nlist = np.take_along_axis(atype_embd_ext, index, axis=1).reshape(
-            nf * nloc, nnei, self.tebd_dim
+        atype_embd_nlist = xp_take_along_axis(atype_embd_ext, index, axis=1)
+        atype_embd_nlist = xp.reshape(
+            atype_embd_nlist, (nf * nloc, nnei, self.tebd_dim)
         )
         ng = self.neuron[-1]
         # nfnl x nnei x 4
-        rr = dmatrix.reshape(nf * nloc, nnei, 4)
-        rr = rr * exclude_mask[:, :, None]
+        rr = xp.reshape(dmatrix, (nf * nloc, nnei, 4))
+        rr = rr * xp.astype(exclude_mask[:, :, None], rr.dtype)
         # nfnl x nnei x 1
         ss = rr[..., 0:1]
         if self.tebd_input_mode in ["concat"]:
             if not self.type_one_side:
                 # nfnl x nnei x (1 + 2 * tebd_dim)
-                ss = np.concatenate([ss, atype_embd_nlist, atype_embd_nnei], axis=-1)
+                ss = xp.concat([ss, atype_embd_nlist, atype_embd_nnei], axis=-1)
             else:
                 # nfnl x nnei x (1 + tebd_dim)
-                ss = np.concatenate([ss, atype_embd_nlist], axis=-1)
+                ss = xp.concat([ss, atype_embd_nlist], axis=-1)
                 # calculate gg
                 # nfnl x nnei x ng
             gg = self.cal_g(ss, 0)
@@ -916,42 +930,47 @@ def call(
             assert self.embeddings_strip is not None
             if not self.type_one_side:
                 # nfnl x nnei x (tebd_dim * 2)
-                tt = np.concatenate([atype_embd_nlist, atype_embd_nnei], axis=-1)
+                tt = xp.concat([atype_embd_nlist, atype_embd_nnei], axis=-1)
             else:
                 # nfnl x nnei x tebd_dim
                 tt = atype_embd_nlist
             # nfnl x nnei x ng
             gg_t = self.cal_g_strip(tt, 0)
             if self.smooth:
-                gg_t = gg_t * sw.reshape(-1, self.nnei, 1)
+                gg_t = gg_t * xp.reshape(sw, (-1, self.nnei, 1))
             # nfnl x nnei x ng
             gg = gg_s * gg_t + gg_s
         else:
             raise NotImplementedError
 
-        input_r = rr.reshape(-1, nnei, 4)[:, :, 1:4] / np.maximum(
-            np.linalg.norm(rr.reshape(-1, nnei, 4)[:, :, 1:4], axis=-1, keepdims=True),
-            1e-12,
+        normed = xp.linalg.vector_norm(
+            xp.reshape(rr, (-1, nnei, 4))[:, :, 1:4], axis=-1, keepdims=True
+        )
+        input_r = xp.reshape(rr, (-1, nnei, 4))[:, :, 1:4] / xp.maximum(
+            normed,
+            xp.full_like(normed, 1e-12),
         )
         gg = self.dpa1_attention(
             gg, nlist_mask, input_r=input_r, sw=sw
         )  # shape is [nframes*nloc, self.neei, out_size]
         # nfnl x ng x 4
-        gr = np.einsum("lni,lnj->lij", gg, rr)
+        # gr = xp.einsum("lni,lnj->lij", gg, rr)
+        gr = xp.sum(gg[:, :, :, None] * rr[:, :, None, :], axis=1)
         gr /= self.nnei
         gr1 = gr[:, : self.axis_neuron, :]
         # nfnl x ng x ng1
-        grrg = np.einsum("lid,ljd->lij", gr, gr1)
+        # grrg = xp.einsum("lid,ljd->lij", gr, gr1)
+        grrg = xp.sum(gr[:, :, None, :] * gr1[:, None, :, :], axis=3)
         # nf x nloc x (ng x ng1)
-        grrg = grrg.reshape(nf, nloc, ng * self.axis_neuron).astype(
-            GLOBAL_NP_FLOAT_PRECISION
+        grrg = xp.astype(
+            xp.reshape(grrg, (nf, nloc, ng * self.axis_neuron)), coord_ext.dtype
         )
         return (
-            grrg.reshape(nf, nloc, self.filter_neuron[-1] * self.axis_neuron),
-            gg.reshape(nf, nloc, self.nnei, self.filter_neuron[-1]),
-            dmatrix.reshape(nf, nloc, self.nnei, 4)[..., 1:],
-            gr[..., 1:].reshape(nf, nloc, self.filter_neuron[-1], 3),
-            sw,
+            xp.reshape(grrg, (nf, nloc, self.filter_neuron[-1] * self.axis_neuron)),
+            xp.reshape(gg, (nf, nloc, self.nnei, self.filter_neuron[-1])),
+            xp.reshape(dmatrix, (nf, nloc, self.nnei, 4))[..., 1:],
+            xp.reshape(gr[..., 1:], (nf, nloc, self.filter_neuron[-1], 3)),
+            xp.reshape(sw, (nf, nloc, nnei, 1)),
         )
 
     def has_message_passing(self) -> bool:
@@ -962,6 +981,77 @@ def need_sorted_nlist_for_lower(self) -> bool:
         """Returns whether the descriptor block needs sorted nlist when using `forward_lower`."""
         return False
 
+    def serialize(self) -> dict:
+        """Serialize the descriptor to dict."""
+        obj = self
+        data = {
+            "@class": "DescriptorBlock",
+            "type": "dpa1",
+            "@version": 1,
+            "rcut": obj.rcut,
+            "rcut_smth": obj.rcut_smth,
+            "sel": obj.sel,
+            "ntypes": obj.ntypes,
+            "neuron": obj.neuron,
+            "axis_neuron": obj.axis_neuron,
+            "tebd_dim": obj.tebd_dim,
+            "tebd_input_mode": obj.tebd_input_mode,
+            "set_davg_zero": obj.set_davg_zero,
+            "attn": obj.attn,
+            "attn_layer": obj.attn_layer,
+            "attn_dotr": obj.attn_dotr,
+            "attn_mask": obj.attn_mask,
+            "activation_function": obj.activation_function,
+            "resnet_dt": obj.resnet_dt,
+            "scaling_factor": obj.scaling_factor,
+            "normalize": obj.normalize,
+            "temperature": obj.temperature,
+            "trainable_ln": obj.trainable_ln,
+            "ln_eps": obj.ln_eps,
+            "smooth": obj.smooth,
+            "type_one_side": obj.type_one_side,
+            # make deterministic
+            "precision": np.dtype(PRECISION_DICT[obj.precision]).name,
+            "embeddings": obj.embeddings.serialize(),
+            "attention_layers": obj.dpa1_attention.serialize(),
+            "env_mat": obj.env_mat.serialize(),
+            "exclude_types": obj.exclude_types,
+            "env_protection": obj.env_protection,
+            "@variables": {
+                "davg": np.array(obj["davg"]),
+                "dstd": np.array(obj["dstd"]),
+            },
+        }
+        if obj.tebd_input_mode in ["strip"]:
+            data.update({"embeddings_strip": obj.embeddings_strip.serialize()})
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptDPA1":
+        """Deserialize from dict."""
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        data.pop("type")
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        attention_layers = data.pop("attention_layers")
+        env_mat = data.pop("env_mat")
+        tebd_input_mode = data["tebd_input_mode"]
+        if tebd_input_mode in ["strip"]:
+            embeddings_strip = data.pop("embeddings_strip")
+        else:
+            embeddings_strip = None
+        obj = cls(**data)
+
+        obj["davg"] = variables["davg"]
+        obj["dstd"] = variables["dstd"]
+        obj.embeddings = NetworkCollection.deserialize(embeddings)
+        if tebd_input_mode in ["strip"]:
+            obj.embeddings_strip = NetworkCollection.deserialize(embeddings_strip)
+        obj.dpa1_attention = NeighborGatedAttention.deserialize(attention_layers)
+        return obj
+
 
 class NeighborGatedAttention(NativeOP):
     def __init__(
@@ -1254,18 +1344,23 @@ def __init__(
         )
 
     def call(self, query, nei_mask, input_r=None, sw=None, attnw_shift=20.0):
+        xp = array_api_compat.array_namespace(query, nei_mask)
         # Linear projection
-        q, k, v = np.split(self.in_proj(query), 3, axis=-1)
+        # q, k, v = xp.split(self.in_proj(query), 3, axis=-1)
+        _query = self.in_proj(query)
+        q = _query[..., 0 : self.head_dim]
+        k = _query[..., self.head_dim : self.head_dim * 2]
+        v = _query[..., self.head_dim * 2 : self.head_dim * 3]
         # Reshape and normalize
         # (nf x nloc) x num_heads x nnei x head_dim
-        q = q.reshape(-1, self.nnei, self.num_heads, self.head_dim).transpose(
-            0, 2, 1, 3
+        q = xp.permute_dims(
+            xp.reshape(q, (-1, self.nnei, self.num_heads, self.head_dim)), (0, 2, 1, 3)
         )
-        k = k.reshape(-1, self.nnei, self.num_heads, self.head_dim).transpose(
-            0, 2, 1, 3
+        k = xp.permute_dims(
+            xp.reshape(k, (-1, self.nnei, self.num_heads, self.head_dim)), (0, 2, 1, 3)
         )
-        v = v.reshape(-1, self.nnei, self.num_heads, self.head_dim).transpose(
-            0, 2, 1, 3
+        v = xp.permute_dims(
+            xp.reshape(v, (-1, self.nnei, self.num_heads, self.head_dim)), (0, 2, 1, 3)
         )
         if self.normalize:
             q = np_normalize(q, axis=-1)
@@ -1274,29 +1369,38 @@ def call(self, query, nei_mask, input_r=None, sw=None, attnw_shift=20.0):
         q = q * self.scaling
         # Attention weights
         # (nf x nloc) x num_heads x nnei x nnei
-        attn_weights = q @ k.transpose(0, 1, 3, 2)
-        nei_mask = nei_mask.reshape(-1, self.nnei)
+        attn_weights = q @ xp.permute_dims(k, (0, 1, 3, 2))
+        nei_mask = xp.reshape(nei_mask, (-1, self.nnei))
         if self.smooth:
-            sw = sw.reshape(-1, 1, self.nnei)
+            sw = xp.reshape(sw, (-1, 1, self.nnei))
             attn_weights = (attn_weights + attnw_shift) * sw[:, :, :, None] * sw[
                 :, :, None, :
             ] - attnw_shift
         else:
-            attn_weights = np.where(nei_mask[:, None, None, :], attn_weights, -np.inf)
+            attn_weights = xp.where(
+                nei_mask[:, None, None, :],
+                attn_weights,
+                xp.full_like(attn_weights, -xp.inf),
+            )
         attn_weights = np_softmax(attn_weights, axis=-1)
-        attn_weights = np.where(nei_mask[:, None, :, None], attn_weights, 0.0)
+        attn_weights = xp.where(
+            nei_mask[:, None, :, None], attn_weights, xp.zeros_like(attn_weights)
+        )
         if self.smooth:
             attn_weights = attn_weights * sw[:, :, :, None] * sw[:, :, None, :]
         if self.dotr:
-            angular_weight = (input_r @ input_r.transpose(0, 2, 1)).reshape(
-                -1, 1, self.nnei, self.nnei
+            angular_weight = xp.reshape(
+                input_r @ xp.permute_dims(input_r, (0, 2, 1)),
+                (-1, 1, self.nnei, self.nnei),
             )
             attn_weights = attn_weights * angular_weight
         # Output projection
         # (nf x nloc) x num_heads x nnei x head_dim
         o = attn_weights @ v
         # (nf x nloc) x nnei x (num_heads x head_dim)
-        o = o.transpose(0, 2, 1, 3).reshape(-1, self.nnei, self.hidden_dim)
+        o = xp.reshape(
+            xp.permute_dims(o, (0, 2, 1, 3)), (-1, self.nnei, self.hidden_dim)
+        )
         output = self.out_proj(o)
         return output, attn_weights
 
diff --git a/deepmd/dpmodel/utils/env_mat.py b/deepmd/dpmodel/utils/env_mat.py
index 41f2591279..f4bc333a03 100644
--- a/deepmd/dpmodel/utils/env_mat.py
+++ b/deepmd/dpmodel/utils/env_mat.py
@@ -12,6 +12,7 @@
 )
 from deepmd.dpmodel.array_api import (
     support_array_api,
+    xp_take_along_axis,
 )
 
 
@@ -44,33 +45,34 @@ def _make_env_mat(
     protection: float = 0.0,
 ):
     """Make smooth environment matrix."""
+    xp = array_api_compat.array_namespace(nlist)
     nf, nloc, nnei = nlist.shape
     # nf x nall x 3
-    coord = coord.reshape(nf, -1, 3)
+    coord = xp.reshape(coord, (nf, -1, 3))
     mask = nlist >= 0
-    nlist = nlist * mask
+    nlist = nlist * xp.astype(mask, nlist.dtype)
     # nf x (nloc x nnei) x 3
-    index = np.tile(nlist.reshape(nf, -1, 1), (1, 1, 3))
-    coord_r = np.take_along_axis(coord, index, 1)
+    index = xp.tile(xp.reshape(nlist, (nf, -1, 1)), (1, 1, 3))
+    coord_r = xp_take_along_axis(coord, index, 1)
     # nf x nloc x nnei x 3
-    coord_r = coord_r.reshape(nf, nloc, nnei, 3)
+    coord_r = xp.reshape(coord_r, (nf, nloc, nnei, 3))
     # nf x nloc x 1 x 3
-    coord_l = coord[:, :nloc].reshape(nf, -1, 1, 3)
+    coord_l = xp.reshape(coord[:, :nloc, ...], (nf, -1, 1, 3))
     # nf x nloc x nnei x 3
     diff = coord_r - coord_l
     # nf x nloc x nnei
-    length = np.linalg.norm(diff, axis=-1, keepdims=True)
+    length = xp.linalg.vector_norm(diff, axis=-1, keepdims=True)
     # for index 0 nloc atom
-    length = length + ~np.expand_dims(mask, -1)
+    length = length + xp.astype(~xp.expand_dims(mask, axis=-1), length.dtype)
     t0 = 1 / (length + protection)
     t1 = diff / (length + protection) ** 2
     weight = compute_smooth_weight(length, ruct_smth, rcut)
-    weight = weight * np.expand_dims(mask, -1)
+    weight = weight * xp.astype(xp.expand_dims(mask, axis=-1), weight.dtype)
     if radial_only:
         env_mat = t0 * weight
     else:
-        env_mat = np.concatenate([t0, t1], axis=-1) * weight
-    return env_mat, diff * np.expand_dims(mask, -1), weight
+        env_mat = xp.concat([t0, t1], axis=-1) * weight
+    return env_mat, diff * xp.astype(xp.expand_dims(mask, axis=-1), diff.dtype), weight
 
 
 class EnvMat(NativeOP):
@@ -122,13 +124,14 @@ def call(
         switch
             The value of switch function. shape: nf x nloc x nnei
         """
+        xp = array_api_compat.array_namespace(coord_ext, atype_ext, nlist)
         em, diff, sw = self._call(nlist, coord_ext, radial_only)
         nf, nloc, nnei = nlist.shape
         atype = atype_ext[:, :nloc]
         if davg is not None:
-            em -= davg[atype]
+            em -= xp.reshape(xp.take(davg, xp.reshape(atype, (-1,)), axis=0), em.shape)
         if dstd is not None:
-            em /= dstd[atype]
+            em /= xp.reshape(xp.take(dstd, xp.reshape(atype, (-1,)), axis=0), em.shape)
         return em, diff, sw
 
     def _call(self, nlist, coord_ext, radial_only):
diff --git a/deepmd/dpmodel/utils/exclude_mask.py b/deepmd/dpmodel/utils/exclude_mask.py
index d0a739b9d4..5469e66d97 100644
--- a/deepmd/dpmodel/utils/exclude_mask.py
+++ b/deepmd/dpmodel/utils/exclude_mask.py
@@ -1,7 +1,12 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 
+import array_api_compat
 import numpy as np
 
+from deepmd.dpmodel.array_api import (
+    xp_take_along_axis,
+)
+
 
 class AtomExcludeMask:
     """Computes the type exclusion mask for atoms."""
@@ -45,8 +50,9 @@ def build_type_exclude_mask(
             otherwise being 1.
 
         """
+        xp = array_api_compat.array_namespace(atype)
         nf, natom = atype.shape
-        return self.type_mask[atype].reshape(nf, natom)
+        return xp.reshape(self.type_mask[atype], (nf, natom))
 
 
 class PairExcludeMask:
@@ -64,7 +70,7 @@ def __init__(
             self.exclude_types.add((tt[0], tt[1]))
             self.exclude_types.add((tt[1], tt[0]))
         # ntypes + 1 for nlist masks
-        self.type_mask = np.array(
+        type_mask = np.array(
             [
                 [
                     1 if (tt_i, tt_j) not in self.exclude_types else 0
@@ -75,7 +81,7 @@ def __init__(
             dtype=np.int32,
         )
         # (ntypes+1 x ntypes+1)
-        self.type_mask = self.type_mask.reshape([-1])
+        self.type_mask = type_mask.reshape([-1])
 
     def get_exclude_types(self):
         return self.exclude_types
@@ -102,23 +108,29 @@ def build_type_exclude_mask(
             otherwise being 1.
 
         """
+        xp = array_api_compat.array_namespace(nlist, atype_ext)
         if len(self.exclude_types) == 0:
             # safely return 1 if nothing is excluded.
-            return np.ones_like(nlist, dtype=np.int32)
+            return xp.ones_like(nlist, dtype=xp.int32)
         nf, nloc, nnei = nlist.shape
         nall = atype_ext.shape[1]
         # add virtual atom of type ntypes. nf x nall+1
-        ae = np.concatenate(
-            [atype_ext, self.ntypes * np.ones([nf, 1], dtype=atype_ext.dtype)], axis=-1
+        ae = xp.concat(
+            [atype_ext, self.ntypes * xp.ones([nf, 1], dtype=atype_ext.dtype)], axis=-1
         )
-        type_i = atype_ext[:, :nloc].reshape(nf, nloc) * (self.ntypes + 1)
+        type_i = xp.reshape(atype_ext[:, :nloc], (nf, nloc)) * (self.ntypes + 1)
         # nf x nloc x nnei
-        index = np.where(nlist == -1, nall, nlist).reshape(nf, nloc * nnei)
-        type_j = np.take_along_axis(ae, index, axis=1).reshape(nf, nloc, nnei)
+        index = xp.reshape(
+            xp.where(nlist == -1, xp.full_like(nlist, nall), nlist), (nf, nloc * nnei)
+        )
+        type_j = xp_take_along_axis(ae, index, axis=1)
+        type_j = xp.reshape(type_j, (nf, nloc, nnei))
         type_ij = type_i[:, :, None] + type_j
         # nf x (nloc x nnei)
-        type_ij = type_ij.reshape(nf, nloc * nnei)
-        mask = self.type_mask[type_ij].reshape(nf, nloc, nnei)
+        type_ij = xp.reshape(type_ij, (nf, nloc * nnei))
+        mask = xp.reshape(
+            xp.take(self.type_mask, xp.reshape(type_ij, (-1,))), (nf, nloc, nnei)
+        )
         return mask
 
     def __contains__(self, item):
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
index e1242c3669..339035ff4e 100644
--- a/deepmd/dpmodel/utils/network.py
+++ b/deepmd/dpmodel/utils/network.py
@@ -146,15 +146,18 @@ def deserialize(cls, data: dict) -> "NativeLayer":
             num_out,
             **data,
         )
-        obj.w, obj.b, obj.idt = (
+        w, b, idt = (
             variables["w"],
             variables.get("b", None),
             variables.get("idt", None),
         )
-        if obj.b is not None:
-            obj.b = obj.b.ravel()
-        if obj.idt is not None:
-            obj.idt = obj.idt.ravel()
+        if b is not None:
+            b = b.ravel()
+        if idt is not None:
+            idt = idt.ravel()
+        obj.w = w
+        obj.b = b
+        obj.idt = idt
         obj.check_shape_consistency()
         return obj
 
@@ -175,8 +178,11 @@ def check_type_consistency(self):
 
         def check_var(var):
             if var is not None:
+                # array api standard doesn't provide a API to get the dtype name
+                # this is really hacked
+                dtype_name = str(var.dtype).split(".")[-1]
                 # assertion "float64" == "double" would fail
-                assert PRECISION_DICT[var.dtype.name] is PRECISION_DICT[precision]
+                assert PRECISION_DICT[dtype_name] is PRECISION_DICT[precision]
 
         check_var(self.w)
         check_var(self.b)
@@ -249,7 +255,7 @@ def call(self, x: np.ndarray) -> np.ndarray:
         if self.resnet and self.w.shape[1] == self.w.shape[0]:
             y += x
         elif self.resnet and self.w.shape[1] == 2 * self.w.shape[0]:
-            y += xp.concatenate([x, x], axis=-1)
+            y += xp.concat([x, x], axis=-1)
         return y
 
 
@@ -360,10 +366,11 @@ def __init__(
             precision=precision,
             seed=seed,
         )
-        self.w = self.w.squeeze(0)  # keep the weight shape to be [num_in]
+        xp = array_api_compat.array_namespace(self.w, self.b)
+        self.w = xp.squeeze(self.w, 0)  # keep the weight shape to be [num_in]
         if self.uni_init:
-            self.w = np.ones_like(self.w)
-            self.b = np.zeros_like(self.b)
+            self.w = xp.ones_like(self.w)
+            self.b = xp.zeros_like(self.b)
         # only to keep consistent with other backends
         self.trainable = trainable
 
@@ -376,8 +383,8 @@ def serialize(self) -> dict:
             The serialized layer.
         """
         data = {
-            "w": self.w,
-            "b": self.b,
+            "w": to_numpy_array(self.w),
+            "b": to_numpy_array(self.b),
         }
         return {
             "@class": "LayerNorm",
@@ -471,11 +478,12 @@ def call(self, x: np.ndarray) -> np.ndarray:
 
     @staticmethod
     def layer_norm_numpy(x, shape, weight=None, bias=None, eps=1e-5):
+        xp = array_api_compat.array_namespace(x)
         # mean and variance
-        mean = np.mean(x, axis=tuple(range(-len(shape), 0)), keepdims=True)
-        var = np.var(x, axis=tuple(range(-len(shape), 0)), keepdims=True)
+        mean = xp.mean(x, axis=tuple(range(-len(shape), 0)), keepdims=True)
+        var = xp.var(x, axis=tuple(range(-len(shape), 0)), keepdims=True)
         # normalize
-        x_normalized = (x - mean) / np.sqrt(var + eps)
+        x_normalized = (x - mean) / xp.sqrt(var + eps)
         # shift and scale
         if weight is not None and bias is not None:
             x_normalized = x_normalized * weight + bias
diff --git a/deepmd/dpmodel/utils/nlist.py b/deepmd/dpmodel/utils/nlist.py
index 4d0b3e3286..4806fa4cd8 100644
--- a/deepmd/dpmodel/utils/nlist.py
+++ b/deepmd/dpmodel/utils/nlist.py
@@ -4,8 +4,13 @@
     Union,
 )
 
+import array_api_compat
 import numpy as np
 
+from deepmd.dpmodel.array_api import (
+    xp_take_along_axis,
+)
+
 from .region import (
     normalize_coord,
     to_face_distance,
@@ -88,34 +93,36 @@ def build_neighbor_list(
         For virtual atoms all neighboring positions are filled with -1.
 
     """
+    xp = array_api_compat.array_namespace(coord, atype)
     batch_size = coord.shape[0]
-    coord = coord.reshape(batch_size, -1)
+    coord = xp.reshape(coord, (batch_size, -1))
     nall = coord.shape[1] // 3
     # fill virtual atoms with large coords so they are not neighbors of any
     # real atom.
     if coord.size > 0:
-        xmax = np.max(coord) + 2.0 * rcut
+        xmax = xp.max(coord) + 2.0 * rcut
     else:
         xmax = 2.0 * rcut
     # nf x nall
     is_vir = atype < 0
-    coord1 = np.where(
-        is_vir[:, :, None], xmax, coord.reshape(batch_size, nall, 3)
-    ).reshape(batch_size, nall * 3)
+    coord1 = xp.where(
+        is_vir[:, :, None], xmax, xp.reshape(coord, (batch_size, nall, 3))
+    )
+    coord1 = xp.reshape(coord1, (batch_size, nall * 3))
     if isinstance(sel, int):
         sel = [sel]
     nsel = sum(sel)
     coord0 = coord1[:, : nloc * 3]
     diff = (
-        coord1.reshape([batch_size, -1, 3])[:, None, :, :]
-        - coord0.reshape([batch_size, -1, 3])[:, :, None, :]
+        xp.reshape(coord1, [batch_size, -1, 3])[:, None, :, :]
+        - xp.reshape(coord0, [batch_size, -1, 3])[:, :, None, :]
     )
     assert list(diff.shape) == [batch_size, nloc, nall, 3]
-    rr = np.linalg.norm(diff, axis=-1)
+    rr = xp.linalg.vector_norm(diff, axis=-1)
     # if central atom has two zero distances, sorting sometimes can not exclude itself
-    rr -= np.eye(nloc, nall, dtype=diff.dtype)[np.newaxis, :, :]
-    nlist = np.argsort(rr, axis=-1)
-    rr = np.sort(rr, axis=-1)
+    rr -= xp.eye(nloc, nall, dtype=diff.dtype)[xp.newaxis, :, :]
+    nlist = xp.argsort(rr, axis=-1)
+    rr = xp.sort(rr, axis=-1)
     rr = rr[:, :, 1:]
     nlist = nlist[:, :, 1:]
     nnei = rr.shape[2]
@@ -123,16 +130,20 @@ def build_neighbor_list(
         rr = rr[:, :, :nsel]
         nlist = nlist[:, :, :nsel]
     else:
-        rr = np.concatenate(
-            [rr, np.ones([batch_size, nloc, nsel - nnei]) + rcut],  # pylint: disable=no-explicit-dtype
+        rr = xp.concatenate(
+            [rr, xp.ones([batch_size, nloc, nsel - nnei]) + rcut],  # pylint: disable=no-explicit-dtype
             axis=-1,
         )
-        nlist = np.concatenate(
-            [nlist, np.ones([batch_size, nloc, nsel - nnei], dtype=nlist.dtype)],
+        nlist = xp.concatenate(
+            [nlist, xp.ones([batch_size, nloc, nsel - nnei], dtype=nlist.dtype)],
             axis=-1,
         )
     assert list(nlist.shape) == [batch_size, nloc, nsel]
-    nlist = np.where(np.logical_or((rr > rcut), is_vir[:, :nloc, None]), -1, nlist)
+    nlist = xp.where(
+        xp.logical_or((rr > rcut), is_vir[:, :nloc, None]),
+        xp.full_like(nlist, -1),
+        nlist,
+    )
 
     if distinguish_types:
         return nlist_distinguish_types(nlist, atype, sel)
@@ -149,23 +160,24 @@ def nlist_distinguish_types(
     distinguish atom types.
 
     """
+    xp = array_api_compat.array_namespace(nlist, atype)
     nf, nloc, _ = nlist.shape
     ret_nlist = []
-    tmp_atype = np.tile(atype[:, None], [1, nloc, 1])
+    tmp_atype = xp.tile(atype[:, None], [1, nloc, 1])
     mask = nlist == -1
     tnlist_0 = nlist.copy()
     tnlist_0[mask] = 0
-    tnlist = np.take_along_axis(tmp_atype, tnlist_0, axis=2).squeeze()
-    tnlist = np.where(mask, -1, tnlist)
+    tnlist = xp_take_along_axis(tmp_atype, tnlist_0, axis=2).squeeze()
+    tnlist = xp.where(mask, -1, tnlist)
     snsel = tnlist.shape[2]
     for ii, ss in enumerate(sel):
-        pick_mask = (tnlist == ii).astype(np.int32)
-        sorted_indices = np.argsort(-pick_mask, kind="stable", axis=-1)
-        pick_mask_sorted = -np.sort(-pick_mask, axis=-1)
-        inlist = np.take_along_axis(nlist, sorted_indices, axis=2)
-        inlist = np.where(~pick_mask_sorted.astype(bool), -1, inlist)
-        ret_nlist.append(np.split(inlist, [ss, snsel - ss], axis=-1)[0])
-    ret = np.concatenate(ret_nlist, axis=-1)
+        pick_mask = (tnlist == ii).astype(xp.int32)
+        sorted_indices = xp.argsort(-pick_mask, kind="stable", axis=-1)
+        pick_mask_sorted = -xp.sort(-pick_mask, axis=-1)
+        inlist = xp_take_along_axis(nlist, sorted_indices, axis=2)
+        inlist = xp.where(~pick_mask_sorted.astype(bool), -1, inlist)
+        ret_nlist.append(xp.split(inlist, [ss, snsel - ss], axis=-1)[0])
+    ret = xp.concat(ret_nlist, axis=-1)
     return ret
 
 
@@ -263,36 +275,46 @@ def extend_coord_with_ghosts(
         maping extended index to the local index
 
     """
+    xp = array_api_compat.array_namespace(coord, atype)
     nf, nloc = atype.shape
-    aidx = np.tile(np.arange(nloc)[np.newaxis, :], (nf, 1))  # pylint: disable=no-explicit-dtype
+    aidx = xp.tile(xp.arange(nloc)[xp.newaxis, :], (nf, 1))  # pylint: disable=no-explicit-dtype
     if cell is None:
         nall = nloc
-        extend_coord = coord.copy()
-        extend_atype = atype.copy()
-        extend_aidx = aidx.copy()
+        extend_coord = coord
+        extend_atype = atype
+        extend_aidx = aidx
     else:
-        coord = coord.reshape((nf, nloc, 3))
-        cell = cell.reshape((nf, 3, 3))
+        coord = xp.reshape(coord, (nf, nloc, 3))
+        cell = xp.reshape(cell, (nf, 3, 3))
         to_face = to_face_distance(cell)
-        nbuff = np.ceil(rcut / to_face).astype(int)
-        nbuff = np.max(nbuff, axis=0)
-        xi = np.arange(-nbuff[0], nbuff[0] + 1, 1)  # pylint: disable=no-explicit-dtype
-        yi = np.arange(-nbuff[1], nbuff[1] + 1, 1)  # pylint: disable=no-explicit-dtype
-        zi = np.arange(-nbuff[2], nbuff[2] + 1, 1)  # pylint: disable=no-explicit-dtype
-        xyz = np.outer(xi, np.array([1, 0, 0]))[:, np.newaxis, np.newaxis, :]
-        xyz = xyz + np.outer(yi, np.array([0, 1, 0]))[np.newaxis, :, np.newaxis, :]
-        xyz = xyz + np.outer(zi, np.array([0, 0, 1]))[np.newaxis, np.newaxis, :, :]
-        xyz = xyz.reshape(-1, 3)
-        shift_idx = xyz[np.argsort(np.linalg.norm(xyz, axis=1))]
+        nbuff = xp.astype(xp.ceil(rcut / to_face), xp.int64)
+        nbuff = xp.max(nbuff, axis=0)
+        xi = xp.arange(-int(nbuff[0]), int(nbuff[0]) + 1, 1)  # pylint: disable=no-explicit-dtype
+        yi = xp.arange(-int(nbuff[1]), int(nbuff[1]) + 1, 1)  # pylint: disable=no-explicit-dtype
+        zi = xp.arange(-int(nbuff[2]), int(nbuff[2]) + 1, 1)  # pylint: disable=no-explicit-dtype
+        xyz = xp.linalg.outer(xi, xp.asarray([1, 0, 0]))[:, xp.newaxis, xp.newaxis, :]
+        xyz = (
+            xyz
+            + xp.linalg.outer(yi, xp.asarray([0, 1, 0]))[xp.newaxis, :, xp.newaxis, :]
+        )
+        xyz = (
+            xyz
+            + xp.linalg.outer(zi, xp.asarray([0, 0, 1]))[xp.newaxis, xp.newaxis, :, :]
+        )
+        xyz = xp.reshape(xyz, (-1, 3))
+        xyz = xp.astype(xyz, coord.dtype)
+        shift_idx = xp.take(xyz, xp.argsort(xp.linalg.vector_norm(xyz, axis=1)), axis=0)
         ns, _ = shift_idx.shape
         nall = ns * nloc
-        shift_vec = np.einsum("sd,fdk->fsk", shift_idx, cell)
+        # shift_vec = xp.einsum("sd,fdk->fsk", shift_idx, cell)
+        shift_vec = xp.tensordot(shift_idx, cell, axes=([1], [1]))
+        shift_vec = xp.permute_dims(shift_vec, (1, 0, 2))
         extend_coord = coord[:, None, :, :] + shift_vec[:, :, None, :]
-        extend_atype = np.tile(atype[:, :, np.newaxis], (1, ns, 1))
-        extend_aidx = np.tile(aidx[:, :, np.newaxis], (1, ns, 1))
+        extend_atype = xp.tile(atype[:, :, xp.newaxis], (1, ns, 1))
+        extend_aidx = xp.tile(aidx[:, :, xp.newaxis], (1, ns, 1))
 
     return (
-        extend_coord.reshape((nf, nall * 3)),
-        extend_atype.reshape((nf, nall)),
-        extend_aidx.reshape((nf, nall)),
+        xp.reshape(extend_coord, (nf, nall * 3)),
+        xp.reshape(extend_atype, (nf, nall)),
+        xp.reshape(extend_aidx, (nf, nall)),
     )
diff --git a/deepmd/dpmodel/utils/region.py b/deepmd/dpmodel/utils/region.py
index ddbc4b29b8..8102020827 100644
--- a/deepmd/dpmodel/utils/region.py
+++ b/deepmd/dpmodel/utils/region.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import array_api_compat
 import numpy as np
 
 
@@ -21,8 +22,9 @@ def phys2inter(
         the internal coordinates
 
     """
-    rec_cell = np.linalg.inv(cell)
-    return np.matmul(coord, rec_cell)
+    xp = array_api_compat.array_namespace(coord, cell)
+    rec_cell = xp.linalg.inv(cell)
+    return xp.matmul(coord, rec_cell)
 
 
 def inter2phys(
@@ -44,7 +46,8 @@ def inter2phys(
         the physical coordinates
 
     """
-    return np.matmul(coord, cell)
+    xp = array_api_compat.array_namespace(coord, cell)
+    return xp.matmul(coord, cell)
 
 
 def normalize_coord(
@@ -66,8 +69,9 @@ def normalize_coord(
         wrapped coordinates of shape [*, na, 3].
 
     """
+    xp = array_api_compat.array_namespace(coord, cell)
     icoord = phys2inter(coord, cell)
-    icoord = np.remainder(icoord, 1.0)
+    icoord = xp.remainder(icoord, 1.0)
     return inter2phys(icoord, cell)
 
 
@@ -87,17 +91,19 @@ def to_face_distance(
         the to face distances of shape [*, 3]
 
     """
+    xp = array_api_compat.array_namespace(cell)
     cshape = cell.shape
-    dist = b_to_face_distance(cell.reshape([-1, 3, 3]))
-    return dist.reshape(list(cshape[:-2]) + [3])  # noqa:RUF005
+    dist = b_to_face_distance(xp.reshape(cell, [-1, 3, 3]))
+    return xp.reshape(dist, list(cshape[:-2]) + [3])  # noqa:RUF005
 
 
 def b_to_face_distance(cell):
-    volume = np.linalg.det(cell)
-    c_yz = np.cross(cell[:, 1], cell[:, 2], axis=-1)
-    _h2yz = volume / np.linalg.norm(c_yz, axis=-1)
-    c_zx = np.cross(cell[:, 2], cell[:, 0], axis=-1)
-    _h2zx = volume / np.linalg.norm(c_zx, axis=-1)
-    c_xy = np.cross(cell[:, 0], cell[:, 1], axis=-1)
-    _h2xy = volume / np.linalg.norm(c_xy, axis=-1)
-    return np.stack([_h2yz, _h2zx, _h2xy], axis=1)
+    xp = array_api_compat.array_namespace(cell)
+    volume = xp.linalg.det(cell)
+    c_yz = xp.linalg.cross(cell[:, 1, ...], cell[:, 2, ...], axis=-1)
+    _h2yz = volume / xp.linalg.vector_norm(c_yz, axis=-1)
+    c_zx = xp.linalg.cross(cell[:, 2, ...], cell[:, 0, ...], axis=-1)
+    _h2zx = volume / xp.linalg.vector_norm(c_zx, axis=-1)
+    c_xy = xp.linalg.cross(cell[:, 0, ...], cell[:, 1, ...], axis=-1)
+    _h2xy = volume / xp.linalg.vector_norm(c_xy, axis=-1)
+    return xp.stack([_h2yz, _h2zx, _h2xy], axis=1)
diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py
index d67d8e50fd..e28b6abb31 100644
--- a/deepmd/dpmodel/utils/type_embed.py
+++ b/deepmd/dpmodel/utils/type_embed.py
@@ -106,7 +106,7 @@ def call(self) -> np.ndarray:
             embed = self.embedding_net(self.econf_tebd)
         if self.padding:
             embed_pad = xp.zeros((1, embed.shape[-1]), dtype=embed.dtype)
-            embed = xp.concatenate([embed, embed_pad], axis=0)
+            embed = xp.concat([embed, embed_pad], axis=0)
         return embed
 
     @classmethod
diff --git a/deepmd/jax/common.py b/deepmd/jax/common.py
index 550b168b29..9c144a41d1 100644
--- a/deepmd/jax/common.py
+++ b/deepmd/jax/common.py
@@ -1,13 +1,18 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Union,
+    Any,
+    Optional,
     overload,
 )
 
 import numpy as np
 
+from deepmd.dpmodel.common import (
+    NativeOP,
+)
 from deepmd.jax.env import (
     jnp,
+    nnx,
 )
 
 
@@ -19,7 +24,7 @@ def to_jax_array(array: np.ndarray) -> jnp.ndarray: ...
 def to_jax_array(array: None) -> None: ...
 
 
-def to_jax_array(array: Union[np.ndarray]) -> Union[jnp.ndarray]:
+def to_jax_array(array: Optional[np.ndarray]) -> Optional[jnp.ndarray]:
     """Convert a numpy array to a JAX array.
 
     Parameters
@@ -35,3 +40,44 @@ def to_jax_array(array: Union[np.ndarray]) -> Union[jnp.ndarray]:
     if array is None:
         return None
     return jnp.array(array)
+
+
+def flax_module(
+    module: NativeOP,
+) -> nnx.Module:
+    """Convert a NativeOP to a Flax module.
+
+    Parameters
+    ----------
+    module : NativeOP
+        The NativeOP to convert.
+
+    Returns
+    -------
+    flax.nnx.Module
+        The Flax module.
+
+    Examples
+    --------
+    >>> @flax_module
+    ... class MyModule(NativeOP):
+    ...     pass
+    """
+    metas = set()
+    if not issubclass(type(nnx.Module), type(module)):
+        metas.add(type(module))
+    if not issubclass(type(module), type(nnx.Module)):
+        metas.add(type(nnx.Module))
+
+    class MixedMetaClass(*metas):
+        def __call__(self, *args, **kwargs):
+            return type(nnx.Module).__call__(self, *args, **kwargs)
+
+    class FlaxModule(module, nnx.Module, metaclass=MixedMetaClass):
+        def __init_subclass__(cls, **kwargs) -> None:
+            return super().__init_subclass__(**kwargs)
+
+        def __setattr__(self, name: str, value: Any) -> None:
+            return super().__setattr__(name, value)
+
+    return FlaxModule
diff --git a/deepmd/jax/descriptor/__init__.py b/deepmd/jax/descriptor/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/jax/descriptor/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/jax/descriptor/dpa1.py b/deepmd/jax/descriptor/dpa1.py
new file mode 100644
index 0000000000..a9b0404970
--- /dev/null
+++ b/deepmd/jax/descriptor/dpa1.py
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.descriptor.dpa1 import DescrptBlockSeAtten as DescrptBlockSeAttenDP
+from deepmd.dpmodel.descriptor.dpa1 import DescrptDPA1 as DescrptDPA1DP
+from deepmd.dpmodel.descriptor.dpa1 import GatedAttentionLayer as GatedAttentionLayerDP
+from deepmd.dpmodel.descriptor.dpa1 import (
+    NeighborGatedAttention as NeighborGatedAttentionDP,
+)
+from deepmd.dpmodel.descriptor.dpa1 import (
+    NeighborGatedAttentionLayer as NeighborGatedAttentionLayerDP,
+)
+from deepmd.jax.common import (
+    flax_module,
+    to_jax_array,
+)
+from deepmd.jax.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.jax.utils.network import (
+    LayerNorm,
+    NativeLayer,
+    NetworkCollection,
+)
+from deepmd.jax.utils.type_embed import (
+    TypeEmbedNet,
+)
+
+
+@flax_module
+class GatedAttentionLayer(GatedAttentionLayerDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"in_proj", "out_proj"}:
+            value = NativeLayer.deserialize(value.serialize())
+        return super().__setattr__(name, value)
+
+
+@flax_module
+class NeighborGatedAttentionLayer(NeighborGatedAttentionLayerDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name == "attention_layer":
+            value = GatedAttentionLayer.deserialize(value.serialize())
+        elif name == "attn_layer_norm":
+            value = LayerNorm.deserialize(value.serialize())
+        return super().__setattr__(name, value)
+
+
+@flax_module
+class NeighborGatedAttention(NeighborGatedAttentionDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name == "attention_layers":
+            value = [
+                NeighborGatedAttentionLayer.deserialize(ii.serialize()) for ii in value
+            ]
+        return super().__setattr__(name, value)
+
+
+@flax_module
+class DescrptBlockSeAtten(DescrptBlockSeAttenDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"mean", "stddev"}:
+            value = to_jax_array(value)
+        elif name in {"embeddings", "embeddings_strip"}:
+            if value is not None:
+                value = NetworkCollection.deserialize(value.serialize())
+        elif name == "dpa1_attention":
+            value = NeighborGatedAttention.deserialize(value.serialize())
+        elif name == "env_mat":
+            # env_mat doesn't store any value
+            pass
+        elif name == "emask":
+            value = PairExcludeMask(value.ntypes, value.exclude_types)
+
+        return super().__setattr__(name, value)
+
+
+@flax_module
+class DescrptDPA1(DescrptDPA1DP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name == "se_atten":
+            value = DescrptBlockSeAtten.deserialize(value.serialize())
+        elif name == "type_embedding":
+            value = TypeEmbedNet.deserialize(value.serialize())
+        return super().__setattr__(name, value)
diff --git a/deepmd/jax/env.py b/deepmd/jax/env.py
index 34e4aa6240..5a5a7f6bf0 100644
--- a/deepmd/jax/env.py
+++ b/deepmd/jax/env.py
@@ -5,10 +5,14 @@
 
 import jax
 import jax.numpy as jnp
+from flax import (
+    nnx,
+)
 
 jax.config.update("jax_enable_x64", True)
 
 __all__ = [
     "jax",
     "jnp",
+    "nnx",
 ]
diff --git a/deepmd/jax/utils/exclude_mask.py b/deepmd/jax/utils/exclude_mask.py
new file mode 100644
index 0000000000..cac4cee092
--- /dev/null
+++ b/deepmd/jax/utils/exclude_mask.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.utils.exclude_mask import PairExcludeMask as PairExcludeMaskDP
+from deepmd.jax.common import (
+    flax_module,
+    to_jax_array,
+)
+
+
+@flax_module
+class PairExcludeMask(PairExcludeMaskDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"type_mask"}:
+            value = to_jax_array(value)
+        return super().__setattr__(name, value)
diff --git a/deepmd/jax/utils/network.py b/deepmd/jax/utils/network.py
index 629b51b8cd..2c406095cd 100644
--- a/deepmd/jax/utils/network.py
+++ b/deepmd/jax/utils/network.py
@@ -1,29 +1,74 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Any,
+    ClassVar,
 )
 
 from deepmd.dpmodel.common import (
     NativeOP,
 )
+from deepmd.dpmodel.utils.network import LayerNorm as LayerNormDP
 from deepmd.dpmodel.utils.network import NativeLayer as NativeLayerDP
+from deepmd.dpmodel.utils.network import NetworkCollection as NetworkCollectionDP
 from deepmd.dpmodel.utils.network import (
     make_embedding_network,
     make_fitting_network,
     make_multilayer_network,
 )
 from deepmd.jax.common import (
+    flax_module,
     to_jax_array,
 )
+from deepmd.jax.env import (
+    nnx,
+)
+
+
+class ArrayAPIParam(nnx.Param):
+    def __array__(self, *args, **kwargs):
+        return self.value.__array__(*args, **kwargs)
+
+    def __array_namespace__(self, *args, **kwargs):
+        return self.value.__array_namespace__(*args, **kwargs)
 
+    def __dlpack__(self, *args, **kwargs):
+        return self.value.__dlpack__(*args, **kwargs)
 
+    def __dlpack_device__(self, *args, **kwargs):
+        return self.value.__dlpack_device__(*args, **kwargs)
+
+
+@flax_module
 class NativeLayer(NativeLayerDP):
     def __setattr__(self, name: str, value: Any) -> None:
         if name in {"w", "b", "idt"}:
             value = to_jax_array(value)
+            if value is not None:
+                value = ArrayAPIParam(value)
         return super().__setattr__(name, value)
 
 
-NativeNet = make_multilayer_network(NativeLayer, NativeOP)
-EmbeddingNet = make_embedding_network(NativeNet, NativeLayer)
-FittingNet = make_fitting_network(EmbeddingNet, NativeNet, NativeLayer)
+@flax_module
+class NativeNet(make_multilayer_network(NativeLayer, NativeOP)):
+    pass
+
+
+class EmbeddingNet(make_embedding_network(NativeNet, NativeLayer)):
+    pass
+
+
+class FittingNet(make_fitting_network(EmbeddingNet, NativeNet, NativeLayer)):
+    pass
+
+
+@flax_module
+class NetworkCollection(NetworkCollectionDP):
+    NETWORK_TYPE_MAP: ClassVar[dict[str, type]] = {
+        "network": NativeNet,
+        "embedding_network": EmbeddingNet,
+        "fitting_network": FittingNet,
+    }
+
+
+class LayerNorm(LayerNormDP, NativeLayer):
+    pass
diff --git a/deepmd/jax/utils/type_embed.py b/deepmd/jax/utils/type_embed.py
index bc7c469524..3143460244 100644
--- a/deepmd/jax/utils/type_embed.py
+++ b/deepmd/jax/utils/type_embed.py
@@ -5,6 +5,7 @@
 
 from deepmd.dpmodel.utils.type_embed import TypeEmbedNet as TypeEmbedNetDP
 from deepmd.jax.common import (
+    flax_module,
     to_jax_array,
 )
 from deepmd.jax.utils.network import (
@@ -12,6 +13,7 @@
 )
 
 
+@flax_module
 class TypeEmbedNet(TypeEmbedNetDP):
     def __setattr__(self, name: str, value: Any) -> None:
         if name in {"econf_tebd"}:
diff --git a/pyproject.toml b/pyproject.toml
index 6932960ace..b13dceeb07 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -137,6 +137,7 @@ cu12 = [
 ]
 jax = [
     'jax>=0.4.33;python_version>="3.10"',
+    'flax>=0.8.0;python_version>="3.10"',
 ]
 
 [tool.deepmd_build_backend.scripts]
diff --git a/source/tests/array_api_strict/__init__.py b/source/tests/array_api_strict/__init__.py
new file mode 100644
index 0000000000..27785c2fd5
--- /dev/null
+++ b/source/tests/array_api_strict/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Synchronize with deepmd.jax for test purpose only."""
diff --git a/source/tests/array_api_strict/common.py b/source/tests/array_api_strict/common.py
new file mode 100644
index 0000000000..28f67a97f6
--- /dev/null
+++ b/source/tests/array_api_strict/common.py
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import array_api_strict
+import numpy as np
+
+
+def to_array_api_strict_array(array: Optional[np.ndarray]):
+    """Convert a numpy array to a JAX array.
+
+    Parameters
+    ----------
+    array : np.ndarray
+        The numpy array to convert.
+
+    Returns
+    -------
+    jnp.ndarray
+        The JAX tensor.
+    """
+    if array is None:
+        return None
+    return array_api_strict.asarray(array)
diff --git a/source/tests/array_api_strict/descriptor/__init__.py b/source/tests/array_api_strict/descriptor/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/array_api_strict/descriptor/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/array_api_strict/descriptor/dpa1.py b/source/tests/array_api_strict/descriptor/dpa1.py
new file mode 100644
index 0000000000..ebd688e303
--- /dev/null
+++ b/source/tests/array_api_strict/descriptor/dpa1.py
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.descriptor.dpa1 import DescrptBlockSeAtten as DescrptBlockSeAttenDP
+from deepmd.dpmodel.descriptor.dpa1 import DescrptDPA1 as DescrptDPA1DP
+from deepmd.dpmodel.descriptor.dpa1 import GatedAttentionLayer as GatedAttentionLayerDP
+from deepmd.dpmodel.descriptor.dpa1 import (
+    NeighborGatedAttention as NeighborGatedAttentionDP,
+)
+from deepmd.dpmodel.descriptor.dpa1 import (
+    NeighborGatedAttentionLayer as NeighborGatedAttentionLayerDP,
+)
+
+from ..common import (
+    to_array_api_strict_array,
+)
+from ..utils.exclude_mask import (
+    PairExcludeMask,
+)
+from ..utils.network import (
+    LayerNorm,
+    NativeLayer,
+    NetworkCollection,
+)
+from ..utils.type_embed import (
+    TypeEmbedNet,
+)
+
+
+class GatedAttentionLayer(GatedAttentionLayerDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"in_proj", "out_proj"}:
+            value = NativeLayer.deserialize(value.serialize())
+        return super().__setattr__(name, value)
+
+
+class NeighborGatedAttentionLayer(NeighborGatedAttentionLayerDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name == "attention_layer":
+            value = GatedAttentionLayer.deserialize(value.serialize())
+        elif name == "attn_layer_norm":
+            value = LayerNorm.deserialize(value.serialize())
+        return super().__setattr__(name, value)
+
+
+class NeighborGatedAttention(NeighborGatedAttentionDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name == "attention_layers":
+            value = [
+                NeighborGatedAttentionLayer.deserialize(ii.serialize()) for ii in value
+            ]
+        return super().__setattr__(name, value)
+
+
+class DescrptBlockSeAtten(DescrptBlockSeAttenDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"mean", "stddev"}:
+            value = to_array_api_strict_array(value)
+        elif name in {"embeddings", "embeddings_strip"}:
+            if value is not None:
+                value = NetworkCollection.deserialize(value.serialize())
+        elif name == "dpa1_attention":
+            value = NeighborGatedAttention.deserialize(value.serialize())
+        elif name == "env_mat":
+            # env_mat doesn't store any value
+            pass
+        elif name == "emask":
+            value = PairExcludeMask(value.ntypes, value.exclude_types)
+
+        return super().__setattr__(name, value)
+
+
+class DescrptDPA1(DescrptDPA1DP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name == "se_atten":
+            value = DescrptBlockSeAtten.deserialize(value.serialize())
+        elif name == "type_embedding":
+            value = TypeEmbedNet.deserialize(value.serialize())
+        return super().__setattr__(name, value)
diff --git a/source/tests/array_api_strict/utils/__init__.py b/source/tests/array_api_strict/utils/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/array_api_strict/utils/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/array_api_strict/utils/exclude_mask.py b/source/tests/array_api_strict/utils/exclude_mask.py
new file mode 100644
index 0000000000..06f2e94b52
--- /dev/null
+++ b/source/tests/array_api_strict/utils/exclude_mask.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.utils.exclude_mask import PairExcludeMask as PairExcludeMaskDP
+
+from ..common import (
+    to_array_api_strict_array,
+)
+
+
+class PairExcludeMask(PairExcludeMaskDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"type_mask"}:
+            value = to_array_api_strict_array(value)
+        return super().__setattr__(name, value)
diff --git a/source/tests/array_api_strict/utils/network.py b/source/tests/array_api_strict/utils/network.py
new file mode 100644
index 0000000000..42b0bb5c61
--- /dev/null
+++ b/source/tests/array_api_strict/utils/network.py
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+    ClassVar,
+)
+
+from deepmd.dpmodel.common import (
+    NativeOP,
+)
+from deepmd.dpmodel.utils.network import LayerNorm as LayerNormDP
+from deepmd.dpmodel.utils.network import NativeLayer as NativeLayerDP
+from deepmd.dpmodel.utils.network import NetworkCollection as NetworkCollectionDP
+from deepmd.dpmodel.utils.network import (
+    make_embedding_network,
+    make_fitting_network,
+    make_multilayer_network,
+)
+
+from ..common import (
+    to_array_api_strict_array,
+)
+
+
+class NativeLayer(NativeLayerDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"w", "b", "idt"}:
+            value = to_array_api_strict_array(value)
+        return super().__setattr__(name, value)
+
+
+NativeNet = make_multilayer_network(NativeLayer, NativeOP)
+EmbeddingNet = make_embedding_network(NativeNet, NativeLayer)
+FittingNet = make_fitting_network(EmbeddingNet, NativeNet, NativeLayer)
+
+
+class NetworkCollection(NetworkCollectionDP):
+    NETWORK_TYPE_MAP: ClassVar[dict[str, type]] = {
+        "network": NativeNet,
+        "embedding_network": EmbeddingNet,
+        "fitting_network": FittingNet,
+    }
+
+
+class LayerNorm(LayerNormDP, NativeLayer):
+    pass
diff --git a/source/tests/array_api_strict/utils/type_embed.py b/source/tests/array_api_strict/utils/type_embed.py
new file mode 100644
index 0000000000..7551279002
--- /dev/null
+++ b/source/tests/array_api_strict/utils/type_embed.py
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.utils.type_embed import TypeEmbedNet as TypeEmbedNetDP
+
+from ..common import (
+    to_array_api_strict_array,
+)
+from ..utils.network import (
+    EmbeddingNet,
+)
+
+
+class TypeEmbedNet(TypeEmbedNetDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"econf_tebd"}:
+            value = to_array_api_strict_array(value)
+        if name in {"embedding_net"}:
+            value = EmbeddingNet.deserialize(value.serialize())
+        return super().__setattr__(name, value)
diff --git a/source/tests/common/dpmodel/test_descriptor_dpa1.py b/source/tests/common/dpmodel/test_descriptor_dpa1.py
index 317f4c3d3d..f441895f15 100644
--- a/source/tests/common/dpmodel/test_descriptor_dpa1.py
+++ b/source/tests/common/dpmodel/test_descriptor_dpa1.py
@@ -36,3 +36,22 @@ def test_self_consistency(
         mm1 = em1.call(self.coord_ext, self.atype_ext, self.nlist)
         for ii in [0, 1, 4]:
             np.testing.assert_allclose(mm0[ii], mm1[ii])
+
+    def test_multiple_frames(self):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        em0 = DescrptDPA1(self.rcut, self.rcut_smth, self.sel, ntypes=2)
+        em0.davg = davg
+        em0.dstd = dstd
+        two_coord_ext = np.concatenate([self.coord_ext, self.coord_ext], axis=0)
+        two_atype_ext = np.concatenate([self.atype_ext, self.atype_ext], axis=0)
+        two_nlist = np.concatenate([self.nlist, self.nlist], axis=0)
+
+        mm0 = em0.call(two_coord_ext, two_atype_ext, two_nlist)
+        for ii in [0, 1, 4]:
+            np.testing.assert_allclose(mm0[ii][0], mm0[ii][2], err_msg=f"{ii} 0~2")
+            np.testing.assert_allclose(mm0[ii][1], mm0[ii][3], err_msg=f"{ii} 1~3")
diff --git a/source/tests/consistent/common.py b/source/tests/consistent/common.py
index c64b14c273..1070fe0f79 100644
--- a/source/tests/consistent/common.py
+++ b/source/tests/consistent/common.py
@@ -10,6 +10,9 @@
 from enum import (
     Enum,
 )
+from importlib.util import (
+    find_spec,
+)
 from typing import (
     Any,
     Callable,
@@ -33,6 +36,7 @@
 INSTALLED_TF = Backend.get_backend("tensorflow")().is_available()
 INSTALLED_PT = Backend.get_backend("pytorch")().is_available()
 INSTALLED_JAX = Backend.get_backend("jax")().is_available()
+INSTALLED_ARRAY_API_STRICT = find_spec("array_api_strict") is not None
 
 if os.environ.get("CI") and not (INSTALLED_TF and INSTALLED_PT):
     raise ImportError("TensorFlow or PyTorch should be tested in the CI")
@@ -56,6 +60,7 @@
     "INSTALLED_TF",
     "INSTALLED_PT",
     "INSTALLED_JAX",
+    "INSTALLED_ARRAY_API_STRICT",
 ]
 
 
@@ -72,6 +77,7 @@ class CommonTest(ABC):
     """PyTorch model class."""
     jax_class: ClassVar[Optional[type]]
     """JAX model class."""
+    array_api_strict_class: ClassVar[Optional[type]]
     args: ClassVar[Optional[Union[Argument, list[Argument]]]]
     """Arguments that maps to the `data`."""
     skip_dp: ClassVar[bool] = False
@@ -83,6 +89,8 @@ class CommonTest(ABC):
     # we may usually skip jax before jax is fully supported
     skip_jax: ClassVar[bool] = True
     """Whether to skip the JAX model."""
+    skip_array_api_strict: ClassVar[bool] = True
+    """Whether to skip the array_api_strict model."""
     rtol = 1e-10
     """Relative tolerance for comparing the return value. Override for float32."""
     atol = 1e-10
@@ -163,6 +171,16 @@ def eval_jax(self, jax_obj: Any) -> Any:
         """
         raise NotImplementedError("Not implemented")
 
+    def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
+        """Evaluate the return value of array_api_strict.
+
+        Parameters
+        ----------
+        array_api_strict_obj : Any
+            The object of array_api_strict
+        """
+        raise NotImplementedError("Not implemented")
+
     class RefBackend(Enum):
         """Reference backend."""
 
@@ -170,6 +188,7 @@ class RefBackend(Enum):
         DP = 2
         PT = 3
         JAX = 5
+        ARRAY_API_STRICT = 6
 
     @abstractmethod
     def extract_ret(self, ret: Any, backend: RefBackend) -> tuple[np.ndarray, ...]:
@@ -235,6 +254,11 @@ def get_jax_ret_serialization_from_cls(self, obj):
         data = obj.serialize()
         return ret, data
 
+    def get_array_api_strict_ret_serialization_from_cls(self, obj):
+        ret = self.eval_array_api_strict(obj)
+        data = obj.serialize()
+        return ret, data
+
     def get_reference_backend(self):
         """Get the reference backend.
 
@@ -248,6 +272,8 @@ def get_reference_backend(self):
             return self.RefBackend.PT
         if not self.skip_jax:
             return self.RefBackend.JAX
+        if not self.skip_array_api_strict:
+            return self.RefBackend.ARRAY_API_STRICT
         raise ValueError("No available reference")
 
     def get_reference_ret_serialization(self, ref: RefBackend):
@@ -261,6 +287,12 @@ def get_reference_ret_serialization(self, ref: RefBackend):
         if ref == self.RefBackend.PT:
             obj = self.init_backend_cls(self.pt_class)
             return self.get_pt_ret_serialization_from_cls(obj)
+        if ref == self.RefBackend.JAX:
+            obj = self.init_backend_cls(self.jax_class)
+            return self.get_jax_ret_serialization_from_cls(obj)
+        if ref == self.RefBackend.ARRAY_API_STRICT:
+            obj = self.init_backend_cls(self.array_api_strict_class)
+            return self.get_array_api_strict_ret_serialization_from_cls(obj)
         raise ValueError("No available reference")
 
     def test_tf_consistent_with_ref(self):
@@ -415,6 +447,40 @@ def test_jax_self_consistent(self):
             else:
                 self.assertEqual(rr1, rr2)
 
+    def test_array_api_strict_consistent_with_ref(self):
+        """Test whether array_api_strict and reference are consistent."""
+        if self.skip_array_api_strict:
+            self.skipTest("Unsupported backend")
+        ref_backend = self.get_reference_backend()
+        if ref_backend == self.RefBackend.ARRAY_API_STRICT:
+            self.skipTest("Reference is self")
+        ret1, data1 = self.get_reference_ret_serialization(ref_backend)
+        ret1 = self.extract_ret(ret1, ref_backend)
+        array_api_strict_obj = self.array_api_strict_class.deserialize(data1)
+        ret2 = self.eval_array_api_strict(array_api_strict_obj)
+        ret2 = self.extract_ret(ret2, self.RefBackend.ARRAY_API_STRICT)
+        data2 = array_api_strict_obj.serialize()
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
+            assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+
+    def test_array_api_strict_self_consistent(self):
+        """Test whether array_api_strict is self consistent."""
+        if self.skip_array_api_strict:
+            self.skipTest("Unsupported backend")
+        obj1 = self.init_backend_cls(self.array_api_strict_class)
+        ret1, data1 = self.get_array_api_strict_ret_serialization_from_cls(obj1)
+        obj1 = self.array_api_strict_class.deserialize(data1)
+        ret2, data2 = self.get_array_api_strict_ret_serialization_from_cls(obj1)
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            if isinstance(rr1, np.ndarray) and isinstance(rr2, np.ndarray):
+                np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
+                assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+            else:
+                self.assertEqual(rr1, rr2)
+
     def tearDown(self) -> None:
         """Clear the TF session."""
         if not self.skip_tf:
diff --git a/source/tests/consistent/descriptor/common.py b/source/tests/consistent/descriptor/common.py
index 74fc3d9b07..e0ca30c799 100644
--- a/source/tests/consistent/descriptor/common.py
+++ b/source/tests/consistent/descriptor/common.py
@@ -3,6 +3,8 @@
     Any,
 )
 
+import numpy as np
+
 from deepmd.common import (
     make_default_mesh,
 )
@@ -12,6 +14,8 @@
 )
 
 from ..common import (
+    INSTALLED_ARRAY_API_STRICT,
+    INSTALLED_JAX,
     INSTALLED_PT,
     INSTALLED_TF,
 )
@@ -29,6 +33,12 @@
         GLOBAL_TF_FLOAT_PRECISION,
         tf,
     )
+if INSTALLED_JAX:
+    from deepmd.jax.env import (
+        jnp,
+    )
+if INSTALLED_ARRAY_API_STRICT:
+    import array_api_strict
 
 
 class DescriptorTest:
@@ -99,3 +109,56 @@ def eval_pt_descriptor(
             x.detach().cpu().numpy() if torch.is_tensor(x) else x
             for x in pt_obj(ext_coords, ext_atype, nlist=nlist, mapping=mapping)
         ]
+
+    def eval_jax_descriptor(
+        self, jax_obj: Any, natoms, coords, atype, box, mixed_types: bool = False
+    ) -> Any:
+        ext_coords, ext_atype, mapping = extend_coord_with_ghosts(
+            jnp.array(coords).reshape(1, -1, 3),
+            jnp.array(atype).reshape(1, -1),
+            jnp.array(box).reshape(1, 3, 3),
+            jax_obj.get_rcut(),
+        )
+        nlist = build_neighbor_list(
+            ext_coords,
+            ext_atype,
+            natoms[0],
+            jax_obj.get_rcut(),
+            jax_obj.get_sel(),
+            distinguish_types=(not mixed_types),
+        )
+        return [
+            np.asarray(x) if isinstance(x, jnp.ndarray) else x
+            for x in jax_obj(ext_coords, ext_atype, nlist=nlist, mapping=mapping)
+        ]
+
+    def eval_array_api_strict_descriptor(
+        self,
+        array_api_strict_obj: Any,
+        natoms,
+        coords,
+        atype,
+        box,
+        mixed_types: bool = False,
+    ) -> Any:
+        array_api_strict.set_array_api_strict_flags(api_version="2023.12")
+        ext_coords, ext_atype, mapping = extend_coord_with_ghosts(
+            array_api_strict.asarray(coords.reshape(1, -1, 3)),
+            array_api_strict.asarray(atype.reshape(1, -1)),
+            array_api_strict.asarray(box.reshape(1, 3, 3)),
+            array_api_strict_obj.get_rcut(),
+        )
+        nlist = build_neighbor_list(
+            ext_coords,
+            ext_atype,
+            natoms[0],
+            array_api_strict_obj.get_rcut(),
+            array_api_strict_obj.get_sel(),
+            distinguish_types=(not mixed_types),
+        )
+        return [
+            np.asarray(x) if hasattr(x, "__array_namespace__") else x
+            for x in array_api_strict_obj(
+                ext_coords, ext_atype, nlist=nlist, mapping=mapping
+            )
+        ]
diff --git a/source/tests/consistent/descriptor/test_dpa1.py b/source/tests/consistent/descriptor/test_dpa1.py
index 59d7369753..ed7884adb9 100644
--- a/source/tests/consistent/descriptor/test_dpa1.py
+++ b/source/tests/consistent/descriptor/test_dpa1.py
@@ -16,6 +16,8 @@
 )
 
 from ..common import (
+    INSTALLED_ARRAY_API_STRICT,
+    INSTALLED_JAX,
     INSTALLED_PT,
     INSTALLED_TF,
     CommonTest,
@@ -33,6 +35,14 @@
     from deepmd.tf.descriptor.se_atten import DescrptDPA1Compat as DescrptDPA1TF
 else:
     DescrptDPA1TF = None
+if INSTALLED_JAX:
+    from deepmd.jax.descriptor.dpa1 import DescrptDPA1 as DescriptorDPA1JAX
+else:
+    DescriptorDPA1JAX = None
+if INSTALLED_ARRAY_API_STRICT:
+    from ...array_api_strict.descriptor.dpa1 import DescrptDPA1 as DescriptorDPA1Strict
+else:
+    DescriptorDPA1Strict = None
 from deepmd.utils.argcheck import (
     descrpt_se_atten_args,
 )
@@ -183,6 +193,69 @@ def skip_dp(self) -> bool:
             temperature,
         )
 
+    @property
+    def skip_jax(self) -> bool:
+        (
+            tebd_dim,
+            tebd_input_mode,
+            resnet_dt,
+            type_one_side,
+            attn,
+            attn_layer,
+            attn_dotr,
+            excluded_types,
+            env_protection,
+            set_davg_zero,
+            scaling_factor,
+            normalize,
+            temperature,
+            ln_eps,
+            smooth_type_embedding,
+            concat_output_tebd,
+            precision,
+            use_econf_tebd,
+            use_tebd_bias,
+        ) = self.param
+        return not INSTALLED_JAX or self.is_meaningless_zero_attention_layer_tests(
+            attn_layer,
+            attn_dotr,
+            normalize,
+            temperature,
+        )
+
+    @property
+    def skip_array_api_strict(self) -> bool:
+        (
+            tebd_dim,
+            tebd_input_mode,
+            resnet_dt,
+            type_one_side,
+            attn,
+            attn_layer,
+            attn_dotr,
+            excluded_types,
+            env_protection,
+            set_davg_zero,
+            scaling_factor,
+            normalize,
+            temperature,
+            ln_eps,
+            smooth_type_embedding,
+            concat_output_tebd,
+            precision,
+            use_econf_tebd,
+            use_tebd_bias,
+        ) = self.param
+        return (
+            not INSTALLED_ARRAY_API_STRICT
+            or self.is_meaningless_zero_attention_layer_tests(
+                attn_layer,
+                attn_dotr,
+                normalize,
+                temperature,
+            )
+        )
+
     @property
     def skip_tf(self) -> bool:
         (
@@ -226,6 +299,9 @@ def skip_tf(self) -> bool:
     tf_class = DescrptDPA1TF
     dp_class = DescrptDPA1DP
     pt_class = DescrptDPA1PT
+    jax_class = DescriptorDPA1JAX
+    array_api_strict_class = DescriptorDPA1Strict
+
     args = descrpt_se_atten_args().append(Argument("ntypes", int, optional=False))
 
     def setUp(self):
@@ -313,6 +389,26 @@ def eval_pt(self, pt_obj: Any) -> Any:
             mixed_types=True,
         )
 
+    def eval_jax(self, jax_obj: Any) -> Any:
+        return self.eval_jax_descriptor(
+            jax_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            mixed_types=True,
+        )
+
+    def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
+        return self.eval_array_api_strict_descriptor(
+            array_api_strict_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            mixed_types=True,
+        )
+
     def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 
diff --git a/source/tests/consistent/test_type_embedding.py b/source/tests/consistent/test_type_embedding.py
index 1464517581..e2836c7a6c 100644
--- a/source/tests/consistent/test_type_embedding.py
+++ b/source/tests/consistent/test_type_embedding.py
@@ -12,6 +12,7 @@
 )
 
 from .common import (
+    INSTALLED_ARRAY_API_STRICT,
     INSTALLED_JAX,
     INSTALLED_PT,
     INSTALLED_TF,
@@ -37,6 +38,10 @@
     from deepmd.jax.utils.type_embed import TypeEmbedNet as TypeEmbedNetJAX
 else:
     TypeEmbedNetJAX = object
+if INSTALLED_ARRAY_API_STRICT:
+    from ..array_api_strict.utils.type_embed import TypeEmbedNet as TypeEmbedNetStrict
+else:
+    TypeEmbedNetStrict = None
 
 
 @parameterized(
@@ -71,8 +76,10 @@ def data(self) -> dict:
     dp_class = TypeEmbedNetDP
     pt_class = TypeEmbedNetPT
     jax_class = TypeEmbedNetJAX
+    array_api_strict_class = TypeEmbedNetStrict
     args = type_embedding_args()
     skip_jax = not INSTALLED_JAX
+    skip_array_api_strict = not INSTALLED_ARRAY_API_STRICT
 
     @property
     def addtional_data(self) -> dict:
@@ -120,6 +127,12 @@ def eval_jax(self, jax_obj: Any) -> Any:
                 raise ValueError("Output is numpy array")
         return [np.array(x) if isinstance(x, jnp.ndarray) else x for x in (out,)]
 
+    def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
+        out = array_api_strict_obj()
+        return [
+            np.asarray(x) if hasattr(x, "__array_namespace__") else x for x in (out,)
+        ]
+
     def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 

From 61f1681b48d2dc8f5a892ac782e5d48369ff7bd0 Mon Sep 17 00:00:00 2001
From: Anyang Peng <137014849+anyangml@users.noreply.github.com>
Date: Fri, 11 Oct 2024 10:24:13 +0800
Subject: [PATCH 22/39] Feat (pt): Expose Linear Ener Model  (#4194)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced two new JSON configuration files for linear energy
calculations in water simulations.
- Launched the `LinearEnergyModel` class for advanced energy and force
calculations.
- Added a parameter for customizable model weighting in the linear
energy model.
- Expanded test suite with new test classes for validating linear energy
models.
- Added new model configurations and test classes to enhance testing
capabilities.

- **Bug Fixes**
- Corrected input handling in the deserialization method for version
compatibility.
	- Adjusted numerical values in data files for accurate testing.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../model/atomic_model/linear_atomic_model.py |  57 +++++-
 deepmd/pt/model/model/__init__.py             |  62 +++++++
 deepmd/pt/model/model/dp_linear_model.py      | 166 ++++++++++++++++++
 doc/model/linear.md                           |   4 +-
 examples/water/d3/dftd3.txt                   |   2 +-
 examples/water/d3/input_pt.json               |  96 ++++++++++
 examples/water/linear/input_pt.json           | 124 +++++++++++++
 examples/water/zbl/input.json                 |   2 +-
 source/tests/common/test_examples.py          |   2 +
 source/tests/pt/model/test_permutation.py     |   1 +
 .../universal/common/cases/model/model.py     |  19 ++
 source/tests/universal/pt/model/test_model.py |  99 +++++++++++
 12 files changed, 622 insertions(+), 12 deletions(-)
 create mode 100644 deepmd/pt/model/model/dp_linear_model.py
 create mode 100644 examples/water/d3/input_pt.json
 create mode 100644 examples/water/linear/input_pt.json

diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py
index d88c4c3af5..8d27fbcac4 100644
--- a/deepmd/pt/model/atomic_model/linear_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py
@@ -48,12 +48,15 @@ class LinearEnergyAtomicModel(BaseAtomicModel):
     type_map : list[str]
         Mapping atom type to the name (str) of the type.
         For example `type_map[1]` gives the name of the type 1.
+    weights : Optional[Union[str,list[float]]]
+        Weights of the models. If str, must be `sum` or `mean`. If list, must be a list of float.
     """
 
     def __init__(
         self,
         models: list[BaseAtomicModel],
         type_map: list[str],
+        weights: Optional[Union[str, list[float]]] = "mean",
         **kwargs,
     ):
         super().__init__(type_map, **kwargs)
@@ -89,6 +92,16 @@ def __init__(
         )
         self.nsels = torch.tensor(self.get_model_nsels(), device=env.DEVICE)  # pylint: disable=no-explicit-dtype
 
+        if isinstance(weights, str):
+            assert weights in ["sum", "mean"]
+        elif isinstance(weights, list):
+            assert len(weights) == len(models)
+        else:
+            raise ValueError(
+                f"'weights' must be a string ('sum' or 'mean') or a list of float of length {len(models)}."
+            )
+        self.weights = weights
+
     def mixed_types(self) -> bool:
         """If true, the model
         1. assumes total number of atoms aligned across frames;
@@ -320,7 +333,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel":
         data = copy.deepcopy(data)
-        check_version_compatibility(data.get("@version", 2), 2, 1)
+        check_version_compatibility(data.pop("@version", 2), 2, 1)
         data.pop("@class", None)
         data.pop("type", None)
         models = [
@@ -331,16 +344,42 @@ def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel":
         return super().deserialize(data)
 
     def _compute_weight(
-        self, extended_coord, extended_atype, nlists_
+        self,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        nlists_: list[torch.Tensor],
     ) -> list[torch.Tensor]:
         """This should be a list of user defined weights that matches the number of models to be combined."""
         nmodels = len(self.models)
         nframes, nloc, _ = nlists_[0].shape
-        return [
-            torch.ones((nframes, nloc, 1), dtype=torch.float64, device=env.DEVICE)
-            / nmodels
-            for _ in range(nmodels)
-        ]
+        if isinstance(self.weights, str):
+            if self.weights == "sum":
+                return [
+                    torch.ones(
+                        (nframes, nloc, 1), dtype=torch.float64, device=env.DEVICE
+                    )
+                    for _ in range(nmodels)
+                ]
+            elif self.weights == "mean":
+                return [
+                    torch.ones(
+                        (nframes, nloc, 1), dtype=torch.float64, device=env.DEVICE
+                    )
+                    / nmodels
+                    for _ in range(nmodels)
+                ]
+            else:
+                raise ValueError(
+                    "`weights` must be 'sum' or 'mean' when provided as a string."
+                )
+        elif isinstance(self.weights, list):
+            return [
+                torch.ones((nframes, nloc, 1), dtype=torch.float64, device=env.DEVICE)
+                * w
+                for w in self.weights
+            ]
+        else:
+            raise NotImplementedError
 
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
@@ -365,7 +404,9 @@ def get_sel_type(self) -> list[int]:
         return torch.unique(
             torch.cat(
                 [
-                    torch.as_tensor(model.get_sel_type(), dtype=torch.int32)
+                    torch.as_tensor(
+                        model.get_sel_type(), dtype=torch.int64, device=env.DEVICE
+                    )
                     for model in self.models
                 ]
             )
diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
index 1c81d42013..26aefa6201 100644
--- a/deepmd/pt/model/model/__init__.py
+++ b/deepmd/pt/model/model/__init__.py
@@ -36,6 +36,9 @@
 from .dos_model import (
     DOSModel,
 )
+from .dp_linear_model import (
+    LinearEnergyModel,
+)
 from .dp_model import (
     DPModelCommon,
 )
@@ -105,6 +108,62 @@ def get_spin_model(model_params):
     return SpinEnergyModel(backbone_model=backbone_model, spin=spin)
 
 
+def get_linear_model(model_params):
+    model_params = copy.deepcopy(model_params)
+    weights = model_params.get("weights", "mean")
+    list_of_models = []
+    ntypes = len(model_params["type_map"])
+    for sub_model_params in model_params["models"]:
+        if "descriptor" in sub_model_params:
+            # descriptor
+            sub_model_params["descriptor"]["ntypes"] = ntypes
+            sub_model_params["descriptor"]["type_map"] = copy.deepcopy(
+                model_params["type_map"]
+            )
+            descriptor = BaseDescriptor(**sub_model_params["descriptor"])
+            # fitting
+            fitting_net = sub_model_params.get("fitting_net", {})
+            fitting_net["type"] = fitting_net.get("type", "ener")
+            fitting_net["ntypes"] = descriptor.get_ntypes()
+            fitting_net["type_map"] = copy.deepcopy(model_params["type_map"])
+            fitting_net["mixed_types"] = descriptor.mixed_types()
+            if fitting_net["type"] in ["dipole", "polar"]:
+                fitting_net["embedding_width"] = descriptor.get_dim_emb()
+            fitting_net["dim_descrpt"] = descriptor.get_dim_out()
+            grad_force = "direct" not in fitting_net["type"]
+            if not grad_force:
+                fitting_net["out_dim"] = descriptor.get_dim_emb()
+                if "ener" in fitting_net["type"]:
+                    fitting_net["return_energy"] = True
+            fitting = BaseFitting(**fitting_net)
+            list_of_models.append(
+                DPAtomicModel(descriptor, fitting, type_map=model_params["type_map"])
+            )
+
+        else:  # must be pairtab
+            assert (
+                "type" in sub_model_params and sub_model_params["type"] == "pairtab"
+            ), "Sub-models in LinearEnergyModel must be a DPModel or a PairTable Model"
+            list_of_models.append(
+                PairTabAtomicModel(
+                    sub_model_params["tab_file"],
+                    sub_model_params["rcut"],
+                    sub_model_params["sel"],
+                    type_map=model_params["type_map"],
+                )
+            )
+
+    atom_exclude_types = model_params.get("atom_exclude_types", [])
+    pair_exclude_types = model_params.get("pair_exclude_types", [])
+    return LinearEnergyModel(
+        models=list_of_models,
+        type_map=model_params["type_map"],
+        weights=weights,
+        atom_exclude_types=atom_exclude_types,
+        pair_exclude_types=pair_exclude_types,
+    )
+
+
 def get_zbl_model(model_params):
     model_params = copy.deepcopy(model_params)
     ntypes = len(model_params["type_map"])
@@ -247,6 +306,8 @@ def get_model(model_params):
             return get_zbl_model(model_params)
         else:
             return get_standard_model(model_params)
+    elif model_type == "linear_ener":
+        return get_linear_model(model_params)
     else:
         return BaseModel.get_class_by_type(model_type).get_model(model_params)
 
@@ -265,4 +326,5 @@ def get_model(model_params):
     "DPZBLModel",
     "make_model",
     "make_hessian_model",
+    "LinearEnergyModel",
 ]
diff --git a/deepmd/pt/model/model/dp_linear_model.py b/deepmd/pt/model/model/dp_linear_model.py
new file mode 100644
index 0000000000..ef2e84bd19
--- /dev/null
+++ b/deepmd/pt/model/model/dp_linear_model.py
@@ -0,0 +1,166 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+from typing import (
+    Optional,
+)
+
+import torch
+
+from deepmd.pt.model.atomic_model import (
+    LinearEnergyAtomicModel,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+from .dp_model import (
+    DPModelCommon,
+)
+from .make_model import (
+    make_model,
+)
+
+DPLinearModel_ = make_model(LinearEnergyAtomicModel)
+
+
+@BaseModel.register("linear_ener")
+class LinearEnergyModel(DPLinearModel_):
+    model_type = "ener"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+    def translated_output_def(self):
+        out_def_data = self.model_output_def().get_data()
+        output_def = {
+            "atom_energy": deepcopy(out_def_data["energy"]),
+            "energy": deepcopy(out_def_data["energy_redu"]),
+        }
+        if self.do_grad_r("energy"):
+            output_def["force"] = deepcopy(out_def_data["energy_derv_r"])
+            output_def["force"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            output_def["virial"] = deepcopy(out_def_data["energy_derv_c_redu"])
+            output_def["virial"].squeeze(-2)
+            output_def["atom_virial"] = deepcopy(out_def_data["energy_derv_c"])
+            output_def["atom_virial"].squeeze(-3)
+        if "mask" in out_def_data:
+            output_def["mask"] = deepcopy(out_def_data["mask"])
+        return output_def
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, torch.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        if self.do_grad_r("energy"):
+            model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+            if do_atomic_virial:
+                model_predict["atom_virial"] = model_ret["energy_derv_c"].squeeze(-3)
+        else:
+            model_predict["force"] = model_ret["dforce"]
+        if "mask" in model_ret:
+            model_predict["mask"] = model_ret["mask"]
+        return model_predict
+
+    @torch.jit.export
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            extra_nlist_sort=self.need_sorted_nlist_for_lower(),
+        )
+
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        if self.do_grad_r("energy"):
+            model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+            if do_atomic_virial:
+                model_predict["extended_virial"] = model_ret["energy_derv_c"].squeeze(
+                    -3
+                )
+        else:
+            assert model_ret["dforce"] is not None
+            model_predict["dforce"] = model_ret["dforce"]
+        return model_predict
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        type_map = local_jdata_cpy["type_map"]
+        min_nbor_dist = None
+        for idx, sub_model in enumerate(local_jdata_cpy["models"]):
+            if "tab_file" not in sub_model:
+                sub_model, temp_min = DPModelCommon.update_sel(
+                    train_data, type_map, local_jdata["models"][idx]
+                )
+                if min_nbor_dist is None or temp_min <= min_nbor_dist:
+                    min_nbor_dist = temp_min
+        return local_jdata_cpy, min_nbor_dist
diff --git a/doc/model/linear.md b/doc/model/linear.md
index 3891559d90..47fdd1750b 100644
--- a/doc/model/linear.md
+++ b/doc/model/linear.md
@@ -1,7 +1,7 @@
-## Linear model {{ tensorflow_icon }}
+## Linear model {{ tensorflow_icon }} {{ pytorch_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}
 :::
 
 One can linearly combine existing models with arbitrary coefficients:
diff --git a/examples/water/d3/dftd3.txt b/examples/water/d3/dftd3.txt
index bbc9726134..09e5fb697a 100644
--- a/examples/water/d3/dftd3.txt
+++ b/examples/water/d3/dftd3.txt
@@ -97,4 +97,4 @@
 9.700000000000001066e+00 -1.186747936398473687e-05 -7.637113677130612127e-06 -5.528293849956352819e-06
 9.800000000000000711e+00 -1.114523618469756001e-05 -7.174288601187318493e-06 -5.194401230658985063e-06
 9.900000000000000355e+00 -1.047381249252528874e-05 -6.743886368019750717e-06 -4.883815978498405921e-06
-1.000000000000000000e+01  0.000000000000000e00e+00  0.000000000000000e00e+00  0.000000000000000e00e+00
+1.000000000000000000e+01  0.000000000000000000e+00  0.000000000000000000e+00  0.000000000000000000e+00
diff --git a/examples/water/d3/input_pt.json b/examples/water/d3/input_pt.json
new file mode 100644
index 0000000000..c2d9304a7e
--- /dev/null
+++ b/examples/water/d3/input_pt.json
@@ -0,0 +1,96 @@
+{
+  "_comment1": " model parameters",
+  "model": {
+    "type": "linear_ener",
+    "weights": "sum",
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "models": [
+      {
+        "descriptor": {
+          "type": "se_atten",
+          "sel": [
+            46,
+            92
+          ],
+          "rcut_smth": 0.50,
+          "rcut": 6.00,
+          "neuron": [
+            25,
+            50,
+            100
+          ],
+          "resnet_dt": false,
+          "axis_neuron": 16,
+          "type_one_side": true,
+          "precision": "float64",
+          "seed": 1,
+          "_comment2": " that's all"
+        },
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "precision": "float64",
+          "seed": 1,
+          "_comment3": " that's all"
+        },
+        "_comment4": " that's all"
+      },
+      {
+        "type": "pairtab",
+        "tab_file": "dftd3.txt",
+        "rcut": 10.0,
+        "sel": 534
+      }
+    ]
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment5": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment6": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment7": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment8": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment9": "that's all"
+  },
+  "_comment10": "that's all"
+}
diff --git a/examples/water/linear/input_pt.json b/examples/water/linear/input_pt.json
new file mode 100644
index 0000000000..e8d8e07136
--- /dev/null
+++ b/examples/water/linear/input_pt.json
@@ -0,0 +1,124 @@
+{
+  "_comment1": " model parameters",
+  "model": {
+    "type": "linear_ener",
+    "weights": "sum",
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "models": [
+      {
+        "descriptor": {
+          "type": "se_atten",
+          "sel": [
+            46,
+            92
+          ],
+          "rcut_smth": 0.50,
+          "rcut": 6.00,
+          "neuron": [
+            25,
+            50,
+            100
+          ],
+          "resnet_dt": false,
+          "axis_neuron": 16,
+          "type_one_side": true,
+          "precision": "float64",
+          "seed": 1,
+          "_comment2": " that's all"
+        },
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "precision": "float64",
+          "seed": 1,
+          "_comment3": " that's all"
+        },
+        "_comment4": " that's all"
+      },
+      {
+        "descriptor": {
+          "type": "se_atten",
+          "sel": [
+            46,
+            92
+          ],
+          "rcut_smth": 0.50,
+          "rcut": 6.00,
+          "neuron": [
+            25,
+            50,
+            100
+          ],
+          "resnet_dt": false,
+          "axis_neuron": 16,
+          "type_one_side": true,
+          "precision": "float64",
+          "seed": 1,
+          "_comment2": " that's all"
+        },
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "precision": "float64",
+          "seed": 1,
+          "_comment3": " that's all"
+        },
+        "_comment4": " that's all"
+      }
+    ]
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment5": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment6": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment7": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment8": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment9": "that's all"
+  },
+  "_comment10": "that's all"
+}
diff --git a/examples/water/zbl/input.json b/examples/water/zbl/input.json
index cb5602d92d..54586ca0cf 100644
--- a/examples/water/zbl/input.json
+++ b/examples/water/zbl/input.json
@@ -10,7 +10,7 @@
       "H"
     ],
     "descriptor": {
-      "type": "se_e2_a",
+      "type": "se_atten_v2",
       "sel": [
         46,
         92
diff --git a/source/tests/common/test_examples.py b/source/tests/common/test_examples.py
index 6abb482824..246e767f01 100644
--- a/source/tests/common/test_examples.py
+++ b/source/tests/common/test_examples.py
@@ -34,7 +34,9 @@
     p_examples / "water" / "hybrid" / "input.json",
     p_examples / "water" / "dplr" / "train" / "dw.json",
     p_examples / "water" / "dplr" / "train" / "ener.json",
+    p_examples / "water" / "d3" / "input_pt.json",
     p_examples / "water" / "linear" / "input.json",
+    p_examples / "water" / "linear" / "input_pt.json",
     p_examples / "nopbc" / "train" / "input.json",
     p_examples / "water_tensor" / "dipole" / "dipole_input.json",
     p_examples / "water_tensor" / "polar" / "polar_input.json",
diff --git a/source/tests/pt/model/test_permutation.py b/source/tests/pt/model/test_permutation.py
index 6aec895041..2d391c7115 100644
--- a/source/tests/pt/model/test_permutation.py
+++ b/source/tests/pt/model/test_permutation.py
@@ -98,6 +98,7 @@
     "data_stat_nbatch": 20,
 }
 
+
 model_spin = {
     "type_map": ["O", "H", "B"],
     "descriptor": {
diff --git a/source/tests/universal/common/cases/model/model.py b/source/tests/universal/common/cases/model/model.py
index c31f5cd889..cee69d9d6c 100644
--- a/source/tests/universal/common/cases/model/model.py
+++ b/source/tests/universal/common/cases/model/model.py
@@ -28,6 +28,25 @@ def setUpClass(cls) -> None:
         cls.epsilon_dict = {}
 
 
+class LinearEnerModelTest(ModelTestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.expected_rcut = 5.0
+        cls.expected_type_map = ["O", "H"]
+        cls.expected_dim_fparam = 0
+        cls.expected_dim_aparam = 0
+        cls.expected_sel_type = [0, 1]
+        cls.expected_aparam_nall = False
+        cls.expected_model_output_type = ["energy", "mask"]
+        cls.model_output_equivariant = []
+        cls.expected_sel = [46, 92]
+        cls.expected_sel_mix = sum(cls.expected_sel)
+        cls.expected_has_message_passing = False
+        cls.aprec_dict = {}
+        cls.rprec_dict = {}
+        cls.epsilon_dict = {}
+
+
 class DipoleModelTest(ModelTestCase):
     @classmethod
     def setUpClass(cls) -> None:
diff --git a/source/tests/universal/pt/model/test_model.py b/source/tests/universal/pt/model/test_model.py
index 41df0cf762..81c32eb94c 100644
--- a/source/tests/universal/pt/model/test_model.py
+++ b/source/tests/universal/pt/model/test_model.py
@@ -21,6 +21,7 @@
     DOSModel,
     DPZBLModel,
     EnergyModel,
+    LinearEnergyModel,
     PolarModel,
     PropertyModel,
     SpinEnergyModel,
@@ -43,6 +44,7 @@
     DipoleModelTest,
     DosModelTest,
     EnerModelTest,
+    LinearEnerModelTest,
     PolarModelTest,
     PropertyModelTest,
     SpinEnerModelTest,
@@ -803,3 +805,100 @@ def setUpClass(cls):
         cls.expected_sel_type = ft.get_sel_type()
         cls.expected_dim_fparam = ft.get_dim_fparam()
         cls.expected_dim_aparam = ft.get_dim_aparam()
+
+
+@parameterized(
+    des_parameterized=(
+        (
+            *[(param_func, DescrptDPA1) for param_func in DescriptorParamDPA1List],
+            *[(param_func, DescrptDPA2) for param_func in DescriptorParamDPA2List],
+            (DescriptorParamHybridMixed, DescrptHybrid),
+            (DescriptorParamHybridMixedTTebd, DescrptHybrid),
+        ),  # descrpt_class_param & class
+        ((FittingParamEnergy, EnergyFittingNet),),  # fitting_class_param & class
+    ),
+    fit_parameterized=(
+        (
+            (DescriptorParamDPA1, DescrptDPA1),
+            (DescriptorParamDPA2, DescrptDPA2),
+        ),  # descrpt_class_param & class
+        (
+            *[(param_func, EnergyFittingNet) for param_func in FittingParamEnergyList],
+        ),  # fitting_class_param & class
+    ),
+)
+class TestLinearEnergyModelPT(unittest.TestCase, LinearEnerModelTest, PTTestCase):
+    @property
+    def modules_to_test(self):
+        skip_test_jit = getattr(self, "skip_test_jit", False)
+        modules = PTTestCase.modules_to_test.fget(self)
+        if not skip_test_jit:
+            # for Model, we can test script module API
+            modules += [
+                self._script_module
+                if hasattr(self, "_script_module")
+                else self.script_module
+            ]
+        return modules
+
+    @classmethod
+    def setUpClass(cls):
+        LinearEnerModelTest.setUpClass()
+        (DescriptorParam, Descrpt) = cls.param[0]
+        (FittingParam, Fitting) = cls.param[1]
+        # set special precision
+        cls.aprec_dict["test_smooth"] = 1e-5
+        cls.input_dict_ds = DescriptorParam(
+            len(cls.expected_type_map),
+            cls.expected_rcut,
+            cls.expected_rcut / 2,
+            cls.expected_sel,
+            cls.expected_type_map,
+        )
+
+        # set skip tests
+        skiptest, skip_reason = skip_model_tests(cls)
+        if skiptest:
+            raise cls.skipTest(cls, skip_reason)
+
+        ds1, ds2 = Descrpt(**cls.input_dict_ds), Descrpt(**cls.input_dict_ds)
+        cls.input_dict_ft = FittingParam(
+            ntypes=len(cls.expected_type_map),
+            dim_descrpt=ds1.get_dim_out(),
+            mixed_types=ds1.mixed_types(),
+            type_map=cls.expected_type_map,
+        )
+        ft1 = Fitting(
+            **cls.input_dict_ft,
+        )
+        ft2 = Fitting(
+            **cls.input_dict_ft,
+        )
+        dp_model1 = DPAtomicModel(
+            ds1,
+            ft1,
+            type_map=cls.expected_type_map,
+        )
+        dp_model2 = DPAtomicModel(
+            ds2,
+            ft2,
+            type_map=cls.expected_type_map,
+        )
+        cls.module = LinearEnergyModel(
+            [dp_model1, dp_model2],
+            type_map=cls.expected_type_map,
+        )
+        # only test jit API once for different models
+        if (
+            DescriptorParam not in defalut_des_param
+            or FittingParam not in defalut_fit_param
+        ):
+            cls.skip_test_jit = True
+        else:
+            with torch.jit.optimized_execution(False):
+                cls._script_module = torch.jit.script(cls.module)
+        cls.output_def = cls.module.translated_output_def()
+        cls.expected_has_message_passing = ds1.has_message_passing()
+        cls.expected_dim_fparam = ft1.get_dim_fparam()
+        cls.expected_dim_aparam = ft1.get_dim_aparam()
+        cls.expected_sel_type = ft1.get_sel_type()

From 2ca1c06c6e24f2742ac6984f7036fddbde617a93 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 10 Oct 2024 22:25:40 -0400
Subject: [PATCH 23/39] chore: cache `deserialized_module` and `script_module`
 (#4196)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced performance of module serialization and deserialization by
converting instance properties to class methods for improved access and
efficiency.
- **Bug Fixes**
- Resolved issues related to instance-level access, now allowing direct
class-level method access for better functionality.
- **Refactor**
- Updated property signatures to utilize class methods for caching
results, optimizing performance and resource management.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/universal/dpmodel/backend.py | 18 +++++++++++++++
 source/tests/universal/pt/backend.py      | 27 +++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/source/tests/universal/dpmodel/backend.py b/source/tests/universal/dpmodel/backend.py
index 99170c20e1..4f624ae501 100644
--- a/source/tests/universal/dpmodel/backend.py
+++ b/source/tests/universal/dpmodel/backend.py
@@ -1,4 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from functools import (
+    lru_cache,
+)
+
 import numpy as np
 
 from deepmd.dpmodel.common import (
@@ -30,8 +34,15 @@ def convert_to_numpy(cls, xx: np.ndarray) -> np.ndarray:
     def convert_from_numpy(cls, xx: np.ndarray) -> np.ndarray:
         return xx
 
+    @classmethod
+    @lru_cache(maxsize=1)
+    def _get_deserialized_module(cls):
+        return cls.module.deserialize(cls.module.serialize())
+
     @property
     def deserialized_module(self):
+        if hasattr(self.__class__, "module"):
+            return self._get_deserialized_module()
         return self.module.deserialize(self.module.serialize())
 
     @property
@@ -41,3 +52,10 @@ def modules_to_test(self):
             self.deserialized_module,
         ]
         return modules
+
+    @classmethod
+    def tearDownClass(cls):
+        super().tearDownClass()
+        if hasattr(cls, "module"):
+            del cls.module
+        cls._get_deserialized_module.cache_clear()
diff --git a/source/tests/universal/pt/backend.py b/source/tests/universal/pt/backend.py
index 951bf18262..5146fdc79b 100644
--- a/source/tests/universal/pt/backend.py
+++ b/source/tests/universal/pt/backend.py
@@ -1,4 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from functools import (
+    lru_cache,
+)
+
 import numpy as np
 import torch
 
@@ -18,13 +22,28 @@ class PTTestCase(BackendTestCase):
     module: "torch.nn.Module"
     """PT module to test."""
 
+    @classmethod
+    @lru_cache(maxsize=1)
+    def _get_script_module(cls):
+        with torch.jit.optimized_execution(False):
+            return torch.jit.script(cls.module)
+
     @property
     def script_module(self):
+        if hasattr(self.__class__, "module"):
+            return self._get_script_module()
         with torch.jit.optimized_execution(False):
             return torch.jit.script(self.module)
 
+    @classmethod
+    @lru_cache(maxsize=1)
+    def _get_deserialized_module(cls):
+        return cls.module.deserialize(cls.module.serialize())
+
     @property
     def deserialized_module(self):
+        if hasattr(self.__class__, "module"):
+            return self._get_deserialized_module()
         return self.module.deserialize(self.module.serialize())
 
     @property
@@ -35,6 +54,14 @@ def modules_to_test(self):
         ]
         return modules
 
+    @classmethod
+    def tearDownClass(cls):
+        super().tearDownClass()
+        if hasattr(cls, "module"):
+            del cls.module
+        cls._get_deserialized_module.cache_clear()
+        cls._get_script_module.cache_clear()
+
     def test_jit(self):
         if getattr(self, "skip_test_jit", False):
             self.skipTest("Skip test jit.")

From 8174cf113625885ed6b58f17149e6a212175945a Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 10 Oct 2024 23:20:55 -0400
Subject: [PATCH 24/39] chore(ci): skip more tests on GPU CI (#4200)

Also, only skip these GPU tests on the CI. When we test locally, it's
expected to run the tests.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a global variable `CI` to enhance test execution control
based on the continuous integration environment.

- **Bug Fixes**
- Updated test skipping conditions across multiple test classes to
ensure tests are only executed on CPU when the CI environment is active.

- **Documentation**
- Enhanced clarity on test conditions by including the `CI` variable in
relevant test decorators.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/consistent/common.py                   | 10 ++++++++++
 source/tests/universal/common/cases/model/utils.py  | 13 +++++++------
 .../dpmodel/atomc_model/test_atomic_model.py        | 13 +++++++------
 .../universal/dpmodel/descriptor/test_descriptor.py |  3 ++-
 .../tests/universal/dpmodel/fitting/test_fitting.py |  3 ++-
 source/tests/universal/dpmodel/model/test_model.py  |  5 +++--
 .../universal/dpmodel/utils/test_type_embed.py      |  3 ++-
 source/tests/utils.py                               |  3 +++
 8 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/source/tests/consistent/common.py b/source/tests/consistent/common.py
index 1070fe0f79..e3bf808978 100644
--- a/source/tests/consistent/common.py
+++ b/source/tests/consistent/common.py
@@ -3,6 +3,7 @@
 import itertools
 import os
 import sys
+import unittest
 from abc import (
     ABC,
     abstractmethod,
@@ -33,6 +34,11 @@
     Backend,
 )
 
+from ..utils import (
+    CI,
+    TEST_DEVICE,
+)
+
 INSTALLED_TF = Backend.get_backend("tensorflow")().is_available()
 INSTALLED_PT = Backend.get_backend("pytorch")().is_available()
 INSTALLED_JAX = Backend.get_backend("jax")().is_available()
@@ -340,6 +346,7 @@ def test_tf_self_consistent(self):
             np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
             assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
 
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_dp_consistent_with_ref(self):
         """Test whether DP and reference are consistent."""
         if self.skip_dp:
@@ -358,6 +365,7 @@ def test_dp_consistent_with_ref(self):
             np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
             assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
 
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_dp_self_consistent(self):
         """Test whether DP is self consistent."""
         if self.skip_dp:
@@ -447,6 +455,7 @@ def test_jax_self_consistent(self):
             else:
                 self.assertEqual(rr1, rr2)
 
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_array_api_strict_consistent_with_ref(self):
         """Test whether array_api_strict and reference are consistent."""
         if self.skip_array_api_strict:
@@ -465,6 +474,7 @@ def test_array_api_strict_consistent_with_ref(self):
             np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
             assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
 
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_array_api_strict_self_consistent(self):
         """Test whether array_api_strict is self consistent."""
         if self.skip_array_api_strict:
diff --git a/source/tests/universal/common/cases/model/utils.py b/source/tests/universal/common/cases/model/utils.py
index d583d06b05..628c415eb2 100644
--- a/source/tests/universal/common/cases/model/utils.py
+++ b/source/tests/universal/common/cases/model/utils.py
@@ -22,6 +22,7 @@
     GLOBAL_SEED,
 )
 from .....utils import (
+    CI,
     TEST_DEVICE,
 )
 
@@ -327,7 +328,7 @@ def test_zero_forward(self):
                 continue
             np.testing.assert_allclose(rr1, rr2, atol=aprec)
 
-    @unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_permutation(self):
         """Test permutation."""
         if getattr(self, "skip_test_permutation", False):
@@ -413,7 +414,7 @@ def test_permutation(self):
             else:
                 raise RuntimeError(f"Unknown output key: {kk}")
 
-    @unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_trans(self):
         """Test translation."""
         if getattr(self, "skip_test_trans", False):
@@ -482,7 +483,7 @@ def test_trans(self):
             else:
                 raise RuntimeError(f"Unknown output key: {kk}")
 
-    @unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_rot(self):
         """Test rotation."""
         if getattr(self, "skip_test_rot", False):
@@ -672,7 +673,7 @@ def test_rot(self):
             else:
                 raise RuntimeError(f"Unknown output key: {kk}")
 
-    @unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_smooth(self):
         """Test smooth."""
         if getattr(self, "skip_test_smooth", False):
@@ -779,7 +780,7 @@ def test_smooth(self):
             else:
                 raise RuntimeError(f"Unknown output key: {kk}")
 
-    @unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_autodiff(self):
         """Test autodiff."""
         if getattr(self, "skip_test_autodiff", False):
@@ -919,7 +920,7 @@ def ff_cell(bb):
             # not support virial by far
             pass
 
-    @unittest.skipIf(TEST_DEVICE == "cpu", "Skip test on CPU.")
+    @unittest.skipIf(TEST_DEVICE == "cpu" and CI, "Skip test on CPU.")
     def test_device_consistence(self):
         """Test forward consistency between devices."""
         test_spin = getattr(self, "test_spin", False)
diff --git a/source/tests/universal/dpmodel/atomc_model/test_atomic_model.py b/source/tests/universal/dpmodel/atomc_model/test_atomic_model.py
index 4c5a2b291b..8e7324e2bc 100644
--- a/source/tests/universal/dpmodel/atomc_model/test_atomic_model.py
+++ b/source/tests/universal/dpmodel/atomc_model/test_atomic_model.py
@@ -26,6 +26,7 @@
     parameterized,
 )
 from ....utils import (
+    CI,
     TEST_DEVICE,
 )
 from ...common.cases.atomic_model.atomic_model import (
@@ -98,7 +99,7 @@
         ),  # fitting_class_param & class
     ),
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestEnergyAtomicModelDP(unittest.TestCase, EnerAtomicModelTest, DPTestCase):
     @classmethod
     def setUpClass(cls):
@@ -165,7 +166,7 @@ def setUpClass(cls):
         ),  # fitting_class_param & class
     ),
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestDosAtomicModelDP(unittest.TestCase, DosAtomicModelTest, DPTestCase):
     @classmethod
     def setUpClass(cls):
@@ -227,7 +228,7 @@ def setUpClass(cls):
         ),  # fitting_class_param & class
     ),
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestDipoleAtomicModelDP(unittest.TestCase, DipoleAtomicModelTest, DPTestCase):
     @classmethod
     def setUpClass(cls):
@@ -290,7 +291,7 @@ def setUpClass(cls):
         ),  # fitting_class_param & class
     ),
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestPolarAtomicModelDP(unittest.TestCase, PolarAtomicModelTest, DPTestCase):
     @classmethod
     def setUpClass(cls):
@@ -351,7 +352,7 @@ def setUpClass(cls):
         ),  # fitting_class_param & class
     ),
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestZBLAtomicModelDP(unittest.TestCase, ZBLAtomicModelTest, DPTestCase):
     @classmethod
     def setUpClass(cls):
@@ -429,7 +430,7 @@ def setUpClass(cls):
         ),  # fitting_class_param & class
     ),
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestPropertyAtomicModelDP(unittest.TestCase, PropertyAtomicModelTest, DPTestCase):
     @classmethod
     def setUpClass(cls):
diff --git a/source/tests/universal/dpmodel/descriptor/test_descriptor.py b/source/tests/universal/dpmodel/descriptor/test_descriptor.py
index 256bea74f8..fc7ee8b075 100644
--- a/source/tests/universal/dpmodel/descriptor/test_descriptor.py
+++ b/source/tests/universal/dpmodel/descriptor/test_descriptor.py
@@ -26,6 +26,7 @@
     GLOBAL_SEED,
 )
 from ....utils import (
+    CI,
     TEST_DEVICE,
 )
 from ...common.cases.descriptor.descriptor import (
@@ -519,7 +520,7 @@ def DescriptorParamHybridMixedTTebd(ntypes, rcut, rcut_smth, sel, type_map, **kw
         (DescriptorParamHybridMixedTTebd, DescrptHybrid),
     )  # class_param & class
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestDescriptorDP(unittest.TestCase, DescriptorTest, DPTestCase):
     def setUp(self):
         DescriptorTest.setUp(self)
diff --git a/source/tests/universal/dpmodel/fitting/test_fitting.py b/source/tests/universal/dpmodel/fitting/test_fitting.py
index 393bab1707..f64faee76f 100644
--- a/source/tests/universal/dpmodel/fitting/test_fitting.py
+++ b/source/tests/universal/dpmodel/fitting/test_fitting.py
@@ -20,6 +20,7 @@
     GLOBAL_SEED,
 )
 from ....utils import (
+    CI,
     TEST_DEVICE,
 )
 from ...common.cases.fitting.fitting import (
@@ -236,7 +237,7 @@ def FittingParamProperty(
     ),  # class_param & class
     (True, False),  # mixed_types
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestFittingDP(unittest.TestCase, FittingTest, DPTestCase):
     def setUp(self):
         ((FittingParam, Fitting), self.mixed_types) = self.param
diff --git a/source/tests/universal/dpmodel/model/test_model.py b/source/tests/universal/dpmodel/model/test_model.py
index 66edc2d50e..265dc43c6c 100644
--- a/source/tests/universal/dpmodel/model/test_model.py
+++ b/source/tests/universal/dpmodel/model/test_model.py
@@ -25,6 +25,7 @@
     parameterized,
 )
 from ....utils import (
+    CI,
     TEST_DEVICE,
 )
 from ...common.cases.model.model import (
@@ -112,7 +113,7 @@ def skip_model_tests(test_obj):
         ),  # fitting_class_param & class
     ),
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestEnergyModelDP(unittest.TestCase, EnerModelTest, DPTestCase):
     @classmethod
     def setUpClass(cls):
@@ -200,7 +201,7 @@ def setUpClass(cls):
         ),  # fitting_class_param & class
     ),
 )
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestSpinEnergyModelDP(unittest.TestCase, SpinEnerModelTest, DPTestCase):
     @classmethod
     def setUpClass(cls):
diff --git a/source/tests/universal/dpmodel/utils/test_type_embed.py b/source/tests/universal/dpmodel/utils/test_type_embed.py
index 67faef0a8d..ee3063af7d 100644
--- a/source/tests/universal/dpmodel/utils/test_type_embed.py
+++ b/source/tests/universal/dpmodel/utils/test_type_embed.py
@@ -6,6 +6,7 @@
 )
 
 from ....utils import (
+    CI,
     TEST_DEVICE,
 )
 from ...common.cases.utils.type_embed import (
@@ -16,7 +17,7 @@
 )
 
 
-@unittest.skipIf(TEST_DEVICE != "cpu", "Only test on CPU.")
+@unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
 class TestTypeEmbd(unittest.TestCase, TypeEmbdTest, DPTestCase):
     def setUp(self):
         TypeEmbdTest.setUp(self)
diff --git a/source/tests/utils.py b/source/tests/utils.py
index 694f55186e..bfb3d445af 100644
--- a/source/tests/utils.py
+++ b/source/tests/utils.py
@@ -5,3 +5,6 @@
     TEST_DEVICE = "cpu"
 else:
     TEST_DEVICE = "cuda"
+
+# see https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables
+CI = os.environ.get("CI") == "true"

From c10bc3c7bcf91c7d12b080df3a39181ffaf5bd93 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 13 Oct 2024 00:58:53 -0400
Subject: [PATCH 25/39] chore(tf): filter TF deprecation warnings (#4199)

Fix #2367. Fix #3039.

These warnings are not true - these deprecated APIs have existed for
several years and never been removed.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
	- Enhanced logging capabilities for TensorFlow warnings.
- Introduced a new filter to manage specific warning messages from the
TensorFlow logger.

- **Bug Fixes**
- Improved the configuration sequence for the TensorFlow logger to
ensure proper functionality.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/tf/env.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/deepmd/tf/env.py b/deepmd/tf/env.py
index 03f36fb675..5a66498dba 100644
--- a/deepmd/tf/env.py
+++ b/deepmd/tf/env.py
@@ -2,6 +2,7 @@
 """Module that sets tensorflow working environment and exports inportant constants."""
 
 import ctypes
+import logging
 import os
 import platform
 from importlib import (
@@ -75,17 +76,27 @@ def dlopen_library(module: str, filename: str):
     dlopen_library("nvidia.cusparse.lib", "libcusparse.so*")
     dlopen_library("nvidia.cudnn.lib", "libcudnn.so*")
 
+
+FILTER_MSGS = [
+    "is deprecated and will be removed in a future version.",
+    "disable_mixed_precision_graph_rewrite() called when mixed precision is already disabled.",
+]
+
+
+class TFWarningFilter(logging.Filter):
+    def filter(self, record):
+        return not any(msg in record.getMessage().strip() for msg in FILTER_MSGS)
+
+
 # keras 3 is incompatible with tf.compat.v1
 # https://keras.io/getting_started/#tensorflow--keras-2-backwards-compatibility
 # 2024/04/24: deepmd.tf doesn't import tf.keras any more
 
 # import tensorflow v1 compatability
-try:
-    import tensorflow.compat.v1 as tf
+import tensorflow.compat.v1 as tf
 
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
+tf.get_logger().addFilter(TFWarningFilter())
+tf.disable_v2_behavior()
 try:
     import tensorflow.compat.v2 as tfv2
 except ImportError:

From 8279ccaaf4fa94d2919865128de57f942c30562e Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 13 Oct 2024 01:02:54 -0400
Subject: [PATCH 26/39] feat(jax/array-api): energy fitting (#4204)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced a fitting module for energy models using JAX, enhancing
compatibility with different array backends.
- Added `AtomExcludeMask` class for improved attribute handling in
exclusion masks.

- **Improvements**
- Updated serialization and array handling methods for better
integration with array APIs.
- Enhanced testing capabilities for energy fitting with support for
different backends.

- **Documentation**
- Added SPDX license identifier to relevant files for licensing clarity.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/dpmodel/fitting/general_fitting.py     | 48 +++++++------
 deepmd/dpmodel/utils/exclude_mask.py          |  8 ++-
 deepmd/jax/fitting/__init__.py                |  1 +
 deepmd/jax/fitting/fitting.py                 | 39 +++++++++++
 deepmd/jax/utils/exclude_mask.py              |  9 +++
 .../array_api_strict/fitting/__init__.py      |  1 +
 .../tests/array_api_strict/fitting/fitting.py | 38 +++++++++++
 .../array_api_strict/utils/exclude_mask.py    |  8 +++
 source/tests/consistent/fitting/test_ener.py  | 67 +++++++++++++++++++
 9 files changed, 197 insertions(+), 22 deletions(-)
 create mode 100644 deepmd/jax/fitting/__init__.py
 create mode 100644 deepmd/jax/fitting/fitting.py
 create mode 100644 source/tests/array_api_strict/fitting/__init__.py
 create mode 100644 source/tests/array_api_strict/fitting/fitting.py

diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
index a587f69449..fd80ccb4aa 100644
--- a/deepmd/dpmodel/fitting/general_fitting.py
+++ b/deepmd/dpmodel/fitting/general_fitting.py
@@ -9,12 +9,16 @@
     Union,
 )
 
+import array_api_compat
 import numpy as np
 
 from deepmd.dpmodel import (
     DEFAULT_PRECISION,
     NativeOP,
 )
+from deepmd.dpmodel.common import (
+    to_numpy_array,
+)
 from deepmd.dpmodel.utils import (
     AtomExcludeMask,
     FittingNet,
@@ -283,11 +287,11 @@ def serialize(self) -> dict:
             "exclude_types": self.exclude_types,
             "nets": self.nets.serialize(),
             "@variables": {
-                "bias_atom_e": self.bias_atom_e,
-                "fparam_avg": self.fparam_avg,
-                "fparam_inv_std": self.fparam_inv_std,
-                "aparam_avg": self.aparam_avg,
-                "aparam_inv_std": self.aparam_inv_std,
+                "bias_atom_e": to_numpy_array(self.bias_atom_e),
+                "fparam_avg": to_numpy_array(self.fparam_avg),
+                "fparam_inv_std": to_numpy_array(self.fparam_inv_std),
+                "aparam_avg": to_numpy_array(self.aparam_avg),
+                "aparam_inv_std": to_numpy_array(self.aparam_inv_std),
             },
             "type_map": self.type_map,
             # not supported
@@ -344,6 +348,7 @@ def _call_common(
             The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam`
 
         """
+        xp = array_api_compat.array_namespace(descriptor, atype)
         nf, nloc, nd = descriptor.shape
         net_dim_out = self._net_out_dim()
         # check input dim
@@ -359,7 +364,7 @@ def _call_common(
             # we consider it as always zero for convenience.
             # Needs a compute_input_stats for vaccum passed from the
             # descriptor.
-            xx_zeros = np.zeros_like(xx)
+            xx_zeros = xp.zeros_like(xx)
         else:
             xx_zeros = None
         # check fparam dim, concate to input descriptor
@@ -371,13 +376,15 @@ def _call_common(
                     "which is not consistent with {self.numb_fparam}.",
                 )
             fparam = (fparam - self.fparam_avg) * self.fparam_inv_std
-            fparam = np.tile(fparam.reshape([nf, 1, self.numb_fparam]), [1, nloc, 1])
-            xx = np.concatenate(
+            fparam = xp.tile(
+                xp.reshape(fparam, [nf, 1, self.numb_fparam]), (1, nloc, 1)
+            )
+            xx = xp.concat(
                 [xx, fparam],
                 axis=-1,
             )
             if xx_zeros is not None:
-                xx_zeros = np.concatenate(
+                xx_zeros = xp.concat(
                     [xx_zeros, fparam],
                     axis=-1,
                 )
@@ -389,24 +396,24 @@ def _call_common(
                     "get an input aparam of dim {aparam.shape[-1]}, ",
                     "which is not consistent with {self.numb_aparam}.",
                 )
-            aparam = aparam.reshape([nf, nloc, self.numb_aparam])
+            aparam = xp.reshape(aparam, [nf, nloc, self.numb_aparam])
             aparam = (aparam - self.aparam_avg) * self.aparam_inv_std
-            xx = np.concatenate(
+            xx = xp.concat(
                 [xx, aparam],
                 axis=-1,
             )
             if xx_zeros is not None:
-                xx_zeros = np.concatenate(
+                xx_zeros = xp.concat(
                     [xx_zeros, aparam],
                     axis=-1,
                 )
 
         # calcualte the prediction
         if not self.mixed_types:
-            outs = np.zeros([nf, nloc, net_dim_out])  # pylint: disable=no-explicit-dtype
+            outs = xp.zeros([nf, nloc, net_dim_out])  # pylint: disable=no-explicit-dtype
             for type_i in range(self.ntypes):
-                mask = np.tile(
-                    (atype == type_i).reshape([nf, nloc, 1]), [1, 1, net_dim_out]
+                mask = xp.tile(
+                    xp.reshape((atype == type_i), [nf, nloc, 1]), (1, 1, net_dim_out)
                 )
                 atom_property = self.nets[(type_i,)](xx)
                 if self.remove_vaccum_contribution is not None and not (
@@ -415,15 +422,18 @@ def _call_common(
                 ):
                     assert xx_zeros is not None
                     atom_property -= self.nets[(type_i,)](xx_zeros)
-                atom_property = atom_property + self.bias_atom_e[type_i]
-                atom_property = atom_property * mask
+                atom_property = atom_property + self.bias_atom_e[type_i, ...]
+                atom_property = atom_property * xp.astype(mask, atom_property.dtype)
                 outs = outs + atom_property  # Shape is [nframes, natoms[0], 1]
         else:
-            outs = self.nets[()](xx) + self.bias_atom_e[atype]
+            outs = self.nets[()](xx) + xp.reshape(
+                xp.take(self.bias_atom_e, xp.reshape(atype, [-1]), axis=0),
+                [nf, nloc, net_dim_out],
+            )
             if xx_zeros is not None:
                 outs -= self.nets[()](xx_zeros)
         # nf x nloc
         exclude_mask = self.emask.build_type_exclude_mask(atype)
         # nf x nloc x nod
-        outs = outs * exclude_mask[:, :, None]
+        outs = outs * xp.astype(exclude_mask[:, :, None], outs.dtype)
         return {self.var_name: outs}
diff --git a/deepmd/dpmodel/utils/exclude_mask.py b/deepmd/dpmodel/utils/exclude_mask.py
index 5469e66d97..b09a9b3e47 100644
--- a/deepmd/dpmodel/utils/exclude_mask.py
+++ b/deepmd/dpmodel/utils/exclude_mask.py
@@ -18,12 +18,12 @@ def __init__(
     ):
         self.ntypes = ntypes
         self.exclude_types = exclude_types
-        self.type_mask = np.array(
+        type_mask = np.array(
             [1 if tt_i not in self.exclude_types else 0 for tt_i in range(ntypes)],
             dtype=np.int32,
         )
         # (ntypes)
-        self.type_mask = self.type_mask.reshape([-1])
+        self.type_mask = type_mask.reshape([-1])
 
     def get_exclude_types(self):
         return self.exclude_types
@@ -52,7 +52,9 @@ def build_type_exclude_mask(
         """
         xp = array_api_compat.array_namespace(atype)
         nf, natom = atype.shape
-        return xp.reshape(self.type_mask[atype], (nf, natom))
+        return xp.reshape(
+            xp.take(self.type_mask, xp.reshape(atype, [-1]), axis=0), (nf, natom)
+        )
 
 
 class PairExcludeMask:
diff --git a/deepmd/jax/fitting/__init__.py b/deepmd/jax/fitting/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/jax/fitting/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/jax/fitting/fitting.py b/deepmd/jax/fitting/fitting.py
new file mode 100644
index 0000000000..27ad791db9
--- /dev/null
+++ b/deepmd/jax/fitting/fitting.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.fitting.ener_fitting import EnergyFittingNet as EnergyFittingNetDP
+from deepmd.jax.common import (
+    flax_module,
+    to_jax_array,
+)
+from deepmd.jax.utils.exclude_mask import (
+    AtomExcludeMask,
+)
+from deepmd.jax.utils.network import (
+    NetworkCollection,
+)
+
+
+def setattr_for_general_fitting(name: str, value: Any) -> Any:
+    if name in {
+        "bias_atom_e",
+        "fparam_avg",
+        "fparam_inv_std",
+        "aparam_avg",
+        "aparam_inv_std",
+    }:
+        value = to_jax_array(value)
+    elif name == "emask":
+        value = AtomExcludeMask(value.ntypes, value.exclude_types)
+    elif name == "nets":
+        value = NetworkCollection.deserialize(value.serialize())
+    return value
+
+
+@flax_module
+class EnergyFittingNet(EnergyFittingNetDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        value = setattr_for_general_fitting(name, value)
+        return super().__setattr__(name, value)
diff --git a/deepmd/jax/utils/exclude_mask.py b/deepmd/jax/utils/exclude_mask.py
index cac4cee092..a6cf210f94 100644
--- a/deepmd/jax/utils/exclude_mask.py
+++ b/deepmd/jax/utils/exclude_mask.py
@@ -3,6 +3,7 @@
     Any,
 )
 
+from deepmd.dpmodel.utils.exclude_mask import AtomExcludeMask as AtomExcludeMaskDP
 from deepmd.dpmodel.utils.exclude_mask import PairExcludeMask as PairExcludeMaskDP
 from deepmd.jax.common import (
     flax_module,
@@ -10,6 +11,14 @@
 )
 
 
+@flax_module
+class AtomExcludeMask(AtomExcludeMaskDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"type_mask"}:
+            value = to_jax_array(value)
+        return super().__setattr__(name, value)
+
+
 @flax_module
 class PairExcludeMask(PairExcludeMaskDP):
     def __setattr__(self, name: str, value: Any) -> None:
diff --git a/source/tests/array_api_strict/fitting/__init__.py b/source/tests/array_api_strict/fitting/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/array_api_strict/fitting/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/array_api_strict/fitting/fitting.py b/source/tests/array_api_strict/fitting/fitting.py
new file mode 100644
index 0000000000..2e6bd9fe25
--- /dev/null
+++ b/source/tests/array_api_strict/fitting/fitting.py
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.fitting.ener_fitting import EnergyFittingNet as EnergyFittingNetDP
+
+from ..common import (
+    to_array_api_strict_array,
+)
+from ..utils.exclude_mask import (
+    AtomExcludeMask,
+)
+from ..utils.network import (
+    NetworkCollection,
+)
+
+
+def setattr_for_general_fitting(name: str, value: Any) -> Any:
+    if name in {
+        "bias_atom_e",
+        "fparam_avg",
+        "fparam_inv_std",
+        "aparam_avg",
+        "aparam_inv_std",
+    }:
+        value = to_array_api_strict_array(value)
+    elif name == "emask":
+        value = AtomExcludeMask(value.ntypes, value.exclude_types)
+    elif name == "nets":
+        value = NetworkCollection.deserialize(value.serialize())
+    return value
+
+
+class EnergyFittingNet(EnergyFittingNetDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        value = setattr_for_general_fitting(name, value)
+        return super().__setattr__(name, value)
diff --git a/source/tests/array_api_strict/utils/exclude_mask.py b/source/tests/array_api_strict/utils/exclude_mask.py
index 06f2e94b52..7f5c29e0a8 100644
--- a/source/tests/array_api_strict/utils/exclude_mask.py
+++ b/source/tests/array_api_strict/utils/exclude_mask.py
@@ -3,6 +3,7 @@
     Any,
 )
 
+from deepmd.dpmodel.utils.exclude_mask import AtomExcludeMask as AtomExcludeMaskDP
 from deepmd.dpmodel.utils.exclude_mask import PairExcludeMask as PairExcludeMaskDP
 
 from ..common import (
@@ -10,6 +11,13 @@
 )
 
 
+class AtomExcludeMask(AtomExcludeMaskDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"type_mask"}:
+            value = to_array_api_strict_array(value)
+        return super().__setattr__(name, value)
+
+
 class PairExcludeMask(PairExcludeMaskDP):
     def __setattr__(self, name: str, value: Any) -> None:
         if name in {"type_mask"}:
diff --git a/source/tests/consistent/fitting/test_ener.py b/source/tests/consistent/fitting/test_ener.py
index ac4f7ae543..ba2be1d86b 100644
--- a/source/tests/consistent/fitting/test_ener.py
+++ b/source/tests/consistent/fitting/test_ener.py
@@ -12,6 +12,8 @@
 )
 
 from ..common import (
+    INSTALLED_ARRAY_API_STRICT,
+    INSTALLED_JAX,
     INSTALLED_PT,
     INSTALLED_TF,
     CommonTest,
@@ -36,6 +38,22 @@
     fitting_ener,
 )
 
+if INSTALLED_JAX:
+    from deepmd.jax.env import (
+        jnp,
+    )
+    from deepmd.jax.fitting.fitting import EnergyFittingNet as EnerFittingJAX
+else:
+    EnerFittingJAX = object
+if INSTALLED_ARRAY_API_STRICT:
+    import array_api_strict
+
+    from ...array_api_strict.fitting.fitting import (
+        EnergyFittingNet as EnerFittingStrict,
+    )
+else:
+    EnerFittingStrict = None
+
 
 @parameterized(
     (True, False),  # resnet_dt
@@ -74,9 +92,25 @@ def skip_pt(self) -> bool:
         ) = self.param
         return CommonTest.skip_pt
 
+    skip_jax = not INSTALLED_JAX
+
+    @property
+    def skip_array_api_strict(self) -> bool:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        # TypeError: The array_api_strict namespace does not support the dtype 'bfloat16'
+        return not INSTALLED_ARRAY_API_STRICT or precision == "bfloat16"
+
     tf_class = EnerFittingTF
     dp_class = EnerFittingDP
     pt_class = EnerFittingPT
+    jax_class = EnerFittingJAX
+    array_api_strict_class = EnerFittingStrict
     args = fitting_ener()
 
     def setUp(self):
@@ -157,6 +191,39 @@ def eval_dp(self, dp_obj: Any) -> Any:
             fparam=self.fparam if numb_fparam else None,
         )["energy"]
 
+    def eval_jax(self, jax_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        return np.asarray(
+            jax_obj(
+                jnp.asarray(self.inputs),
+                jnp.asarray(self.atype.reshape(1, -1)),
+                fparam=jnp.asarray(self.fparam) if numb_fparam else None,
+            )["energy"]
+        )
+
+    def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
+        array_api_strict.set_array_api_strict_flags(api_version="2023.12")
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        return np.asarray(
+            array_api_strict_obj(
+                array_api_strict.asarray(self.inputs),
+                array_api_strict.asarray(self.atype.reshape(1, -1)),
+                fparam=array_api_strict.asarray(self.fparam) if numb_fparam else None,
+            )["energy"]
+        )
+
     def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         if backend == self.RefBackend.TF:
             # shape is not same

From a1f867217e3d06a9f5921cd5b2b76e42649b0882 Mon Sep 17 00:00:00 2001
From: Anyang Peng <137014849+anyangml@users.noreply.github.com>
Date: Mon, 14 Oct 2024 15:44:02 +0800
Subject: [PATCH 27/39] Chore: refactor get standard model (#4205)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit


- **Refactor**
- Simplified model component creation by introducing a new function for
better code clarity and reusability.
- Updated model-building functions to utilize the new component creation
logic, enhancing maintainability.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/pt/model/model/__init__.py | 96 +++++++++++--------------------
 1 file changed, 35 insertions(+), 61 deletions(-)

diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
index 26aefa6201..613baf440e 100644
--- a/deepmd/pt/model/model/__init__.py
+++ b/deepmd/pt/model/model/__init__.py
@@ -72,6 +72,29 @@
 )
 
 
+def _get_standard_model_components(model_params, ntypes):
+    # descriptor
+    model_params["descriptor"]["ntypes"] = ntypes
+    model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])
+    descriptor = BaseDescriptor(**model_params["descriptor"])
+    # fitting
+    fitting_net = model_params.get("fitting_net", {})
+    fitting_net["type"] = fitting_net.get("type", "ener")
+    fitting_net["ntypes"] = descriptor.get_ntypes()
+    fitting_net["type_map"] = copy.deepcopy(model_params["type_map"])
+    fitting_net["mixed_types"] = descriptor.mixed_types()
+    if fitting_net["type"] in ["dipole", "polar"]:
+        fitting_net["embedding_width"] = descriptor.get_dim_emb()
+    fitting_net["dim_descrpt"] = descriptor.get_dim_out()
+    grad_force = "direct" not in fitting_net["type"]
+    if not grad_force:
+        fitting_net["out_dim"] = descriptor.get_dim_emb()
+        if "ener" in fitting_net["type"]:
+            fitting_net["return_energy"] = True
+    fitting = BaseFitting(**fitting_net)
+    return descriptor, fitting, fitting_net["type"]
+
+
 def get_spin_model(model_params):
     model_params = copy.deepcopy(model_params)
     if not model_params["spin"]["use_spin"] or isinstance(
@@ -117,25 +140,9 @@ def get_linear_model(model_params):
         if "descriptor" in sub_model_params:
             # descriptor
             sub_model_params["descriptor"]["ntypes"] = ntypes
-            sub_model_params["descriptor"]["type_map"] = copy.deepcopy(
-                model_params["type_map"]
+            descriptor, fitting, _ = _get_standard_model_components(
+                sub_model_params, ntypes
             )
-            descriptor = BaseDescriptor(**sub_model_params["descriptor"])
-            # fitting
-            fitting_net = sub_model_params.get("fitting_net", {})
-            fitting_net["type"] = fitting_net.get("type", "ener")
-            fitting_net["ntypes"] = descriptor.get_ntypes()
-            fitting_net["type_map"] = copy.deepcopy(model_params["type_map"])
-            fitting_net["mixed_types"] = descriptor.mixed_types()
-            if fitting_net["type"] in ["dipole", "polar"]:
-                fitting_net["embedding_width"] = descriptor.get_dim_emb()
-            fitting_net["dim_descrpt"] = descriptor.get_dim_out()
-            grad_force = "direct" not in fitting_net["type"]
-            if not grad_force:
-                fitting_net["out_dim"] = descriptor.get_dim_emb()
-                if "ener" in fitting_net["type"]:
-                    fitting_net["return_energy"] = True
-            fitting = BaseFitting(**fitting_net)
             list_of_models.append(
                 DPAtomicModel(descriptor, fitting, type_map=model_params["type_map"])
             )
@@ -167,24 +174,7 @@ def get_linear_model(model_params):
 def get_zbl_model(model_params):
     model_params = copy.deepcopy(model_params)
     ntypes = len(model_params["type_map"])
-    # descriptor
-    model_params["descriptor"]["ntypes"] = ntypes
-    model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])
-    descriptor = BaseDescriptor(**model_params["descriptor"])
-    # fitting
-    fitting_net = model_params.get("fitting_net", None)
-    fitting_net["type"] = fitting_net.get("type", "ener")
-    fitting_net["ntypes"] = descriptor.get_ntypes()
-    fitting_net["type_map"] = copy.deepcopy(model_params["type_map"])
-    fitting_net["mixed_types"] = descriptor.mixed_types()
-    fitting_net["embedding_width"] = descriptor.get_dim_out()
-    fitting_net["dim_descrpt"] = descriptor.get_dim_out()
-    grad_force = "direct" not in fitting_net["type"]
-    if not grad_force:
-        fitting_net["out_dim"] = descriptor.get_dim_emb()
-        if "ener" in fitting_net["type"]:
-            fitting_net["return_energy"] = True
-    fitting = BaseFitting(**fitting_net)
+    descriptor, fitting, _ = _get_standard_model_components(model_params, ntypes)
     dp_model = DPAtomicModel(descriptor, fitting, type_map=model_params["type_map"])
     # pairtab
     filepath = model_params["use_srtab"]
@@ -246,25 +236,9 @@ def get_standard_model(model_params):
     model_params_old = model_params
     model_params = copy.deepcopy(model_params)
     ntypes = len(model_params["type_map"])
-    # descriptor
-    model_params["descriptor"]["ntypes"] = ntypes
-    model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])
-    descriptor = BaseDescriptor(**model_params["descriptor"])
-    # fitting
-    fitting_net = model_params.get("fitting_net", {})
-    fitting_net["type"] = fitting_net.get("type", "ener")
-    fitting_net["ntypes"] = descriptor.get_ntypes()
-    fitting_net["type_map"] = copy.deepcopy(model_params["type_map"])
-    fitting_net["mixed_types"] = descriptor.mixed_types()
-    if fitting_net["type"] in ["dipole", "polar"]:
-        fitting_net["embedding_width"] = descriptor.get_dim_emb()
-    fitting_net["dim_descrpt"] = descriptor.get_dim_out()
-    grad_force = "direct" not in fitting_net["type"]
-    if not grad_force:
-        fitting_net["out_dim"] = descriptor.get_dim_emb()
-        if "ener" in fitting_net["type"]:
-            fitting_net["return_energy"] = True
-    fitting = BaseFitting(**fitting_net)
+    descriptor, fitting, fitting_net_type = _get_standard_model_components(
+        model_params, ntypes
+    )
     atom_exclude_types = model_params.get("atom_exclude_types", [])
     pair_exclude_types = model_params.get("pair_exclude_types", [])
     preset_out_bias = model_params.get("preset_out_bias")
@@ -272,18 +246,18 @@ def get_standard_model(model_params):
         preset_out_bias, model_params["type_map"]
     )
 
-    if fitting_net["type"] == "dipole":
+    if fitting_net_type == "dipole":
         modelcls = DipoleModel
-    elif fitting_net["type"] == "polar":
+    elif fitting_net_type == "polar":
         modelcls = PolarModel
-    elif fitting_net["type"] == "dos":
+    elif fitting_net_type == "dos":
         modelcls = DOSModel
-    elif fitting_net["type"] in ["ener", "direct_force_ener"]:
+    elif fitting_net_type in ["ener", "direct_force_ener"]:
         modelcls = EnergyModel
-    elif fitting_net["type"] == "property":
+    elif fitting_net_type == "property":
         modelcls = PropertyModel
     else:
-        raise RuntimeError(f"Unknown fitting type: {fitting_net['type']}")
+        raise RuntimeError(f"Unknown fitting type: {fitting_net_type}")
 
     model = modelcls(
         descriptor=descriptor,

From 6fe8dde1b6649b2a12d46a868da31aad69b8012c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 14 Oct 2024 20:54:22 -0400
Subject: [PATCH 28/39] ci: pin ubuntu to 22.04 (#4213)

It seems that GitHub starts to point ubuntu-latest to ubuntu-24.04
(xref: https://github.com/actions/runner-images/issues/10636), which
brings some breaking changes. For example, cuda 11.8 doesn't support the
default compiler in ubuntu-24.04.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Updated build, test, and analysis workflows to run on Ubuntu 22.04,
enhancing compatibility and performance for C++ projects.

- **Bug Fixes**
- Corrected indentation in the permissions section of the CodeQL
workflow.

- **Chores**
- Adjusted timeout settings for the CodeQL analysis job based on the
programming language.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 .github/workflows/build_cc.yml | 2 +-
 .github/workflows/codeql.yml   | 2 +-
 .github/workflows/test_cc.yml  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml
index 775b88cfd3..a1ac032891 100644
--- a/.github/workflows/build_cc.yml
+++ b/.github/workflows/build_cc.yml
@@ -11,7 +11,7 @@ name: Build C++
 jobs:
   buildcc:
     name: Build C++
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         include:
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index c912ece8d5..583e7785d9 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -13,7 +13,7 @@ concurrency:
 jobs:
   analyze:
     name: Analyze
-    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-22.04' }}
     timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
     permissions:
       actions: read
diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index ebbfc4d960..768590980f 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -11,7 +11,7 @@ name: Test C++
 jobs:
   testcc:
     name: Test C++
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         check_memleak: [true, false]

From 48f8a1ef0815e471b39de4376edf9fae829e2565 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 15 Oct 2024 11:20:48 -0400
Subject: [PATCH 29/39] feat(pt): support `DeepEval.eval_descriptor` (#4214)

Fix #4112.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a method for evaluating descriptors using the deep
potential model.
- Added functionality to control descriptor evaluation during model
operations.

- **Bug Fixes**
- Removed conditional skip for descriptor evaluation tests, enhancing
test coverage for PyTorch models.

- **Tests**
- Added a new test class for neighbor list setups in descriptor
evaluation.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/pt/infer/deep_eval.py                  | 55 +++++++++++++++++++
 .../pt/model/atomic_model/dp_atomic_model.py  | 15 +++++
 deepmd/pt/model/model/dp_model.py             | 12 ++++
 source/tests/infer/test_models.py             |  2 -
 4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index 538dc65371..0a77a38135 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -598,3 +598,58 @@ def eval_typeebd(self) -> np.ndarray:
     def get_model_def_script(self) -> str:
         """Get model defination script."""
         return self.model_def_script
+
+    def eval_descriptor(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        """Evaluate descriptors by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+
+        Returns
+        -------
+        descriptor
+            Descriptors.
+        """
+        model = self.dp.model["Default"]
+        model.set_eval_descriptor_hook(True)
+        self.eval(
+            coords,
+            cells,
+            atom_types,
+            atomic=False,
+            fparam=fparam,
+            aparam=aparam,
+            **kwargs,
+        )
+        descriptor = model.eval_descriptor()
+        model.set_eval_descriptor_hook(False)
+        return to_numpy_array(descriptor)
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index 936a1fead3..edb1253234 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -62,6 +62,19 @@ def __init__(
         self.sel = self.descriptor.get_sel()
         self.fitting_net = fitting
         super().init_out_stat()
+        self.enable_eval_descriptor_hook = False
+        self.eval_descriptor_list = []
+
+    eval_descriptor_list: list[torch.Tensor]
+
+    def set_eval_descriptor_hook(self, enable: bool) -> None:
+        """Set the hook for evaluating descriptor and clear the cache for descriptor list."""
+        self.enable_eval_descriptor_hook = enable
+        self.eval_descriptor_list = []
+
+    def eval_descriptor(self) -> torch.Tensor:
+        """Evaluate the descriptor."""
+        return torch.concat(self.eval_descriptor_list)
 
     @torch.jit.export
     def fitting_output_def(self) -> FittingOutputDef:
@@ -192,6 +205,8 @@ def forward_atomic(
             comm_dict=comm_dict,
         )
         assert descriptor is not None
+        if self.enable_eval_descriptor_hook:
+            self.eval_descriptor_list.append(descriptor)
         # energy, force
         fit_ret = self.fitting_net(
             descriptor,
diff --git a/deepmd/pt/model/model/dp_model.py b/deepmd/pt/model/model/dp_model.py
index 8659526c49..bd278ed787 100644
--- a/deepmd/pt/model/model/dp_model.py
+++ b/deepmd/pt/model/model/dp_model.py
@@ -3,6 +3,8 @@
     Optional,
 )
 
+import torch
+
 from deepmd.pt.model.descriptor.base_descriptor import (
     BaseDescriptor,
 )
@@ -52,3 +54,13 @@ def get_fitting_net(self):
     def get_descriptor(self):
         """Get the descriptor."""
         return self.atomic_model.descriptor
+
+    @torch.jit.export
+    def set_eval_descriptor_hook(self, enable: bool) -> None:
+        """Set the hook for evaluating descriptor and clear the cache for descriptor list."""
+        self.atomic_model.set_eval_descriptor_hook(enable)
+
+    @torch.jit.export
+    def eval_descriptor(self) -> torch.Tensor:
+        """Evaluate the descriptor."""
+        return self.atomic_model.eval_descriptor()
diff --git a/source/tests/infer/test_models.py b/source/tests/infer/test_models.py
index 6b62e994aa..2b0f292046 100644
--- a/source/tests/infer/test_models.py
+++ b/source/tests/infer/test_models.py
@@ -153,8 +153,6 @@ def test_1frame_atm(self):
 
     def test_descriptor(self):
         _, extension = self.param
-        if extension == ".pth":
-            self.skipTest("eval_descriptor not supported for PyTorch models")
         for ii, result in enumerate(self.case.results):
             if result.descriptor is None:
                 continue

From 5c092e673e61611e3116696374ad51b22ec37357 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 15 Oct 2024 11:24:34 -0400
Subject: [PATCH 30/39] fix: fix average training time for restart (#4212)

Fix #4208.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
	- Enhanced training step management and logging for improved tracking.
- Updated average training time calculations for more accurate
reporting.
- Refined model and checkpoint saving logic based on new tracking
metrics.
	- Improved logging clarity for learning rates and losses.

- **Bug Fixes**
- Resolved issues related to inaccurate training time and logging
conditions.

- **Chores**
	- General code cleanup for better readability and organization.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/pt/train/training.py | 14 ++++++++------
 deepmd/tf/train/trainer.py  | 20 +++++++++++++++-----
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 95c73bd83c..4d746e84c0 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -889,8 +889,9 @@ def log_loss_valid(_task_key="Default"):
                     )
                 # the first training time is not accurate
                 if (
-                    _step_id + 1
-                ) > self.disp_freq or self.num_steps < 2 * self.disp_freq:
+                    (_step_id + 1 - self.start_step) > self.disp_freq
+                    or self.num_steps - self.start_step < 2 * self.disp_freq
+                ):
                     self.total_train_time += train_time
 
                 if fout:
@@ -981,13 +982,14 @@ def log_loss_valid(_task_key="Default"):
                 with open("checkpoint", "w") as f:
                     f.write(str(self.latest_model))
 
-            if self.timing_in_training and self.num_steps // self.disp_freq > 0:
-                if self.num_steps >= 2 * self.disp_freq:
+            elapsed_batch = self.num_steps - self.start_step
+            if self.timing_in_training and elapsed_batch // self.disp_freq > 0:
+                if self.start_step >= 2 * self.disp_freq:
                     log.info(
                         "average training time: %.4f s/batch (exclude first %d batches)",
                         self.total_train_time
                         / (
-                            self.num_steps // self.disp_freq * self.disp_freq
+                            elapsed_batch // self.disp_freq * self.disp_freq
                             - self.disp_freq
                         ),
                         self.disp_freq,
@@ -996,7 +998,7 @@ def log_loss_valid(_task_key="Default"):
                     log.info(
                         "average training time: %.4f s/batch",
                         self.total_train_time
-                        / (self.num_steps // self.disp_freq * self.disp_freq),
+                        / (elapsed_batch // self.disp_freq * self.disp_freq),
                     )
 
             if JIT:
diff --git a/deepmd/tf/train/trainer.py b/deepmd/tf/train/trainer.py
index 7f9aeb27d2..9f353f2e32 100644
--- a/deepmd/tf/train/trainer.py
+++ b/deepmd/tf/train/trainer.py
@@ -416,6 +416,8 @@ def train(self, train_data=None, valid_data=None):
             fp = open(self.disp_file, "a")
 
         cur_batch = run_sess(self.sess, self.global_step)
+        start_batch = cur_batch
+        elapsed_batch = stop_batch - start_batch
         is_first_step = True
         self.cur_batch = cur_batch
         log.info(
@@ -552,7 +554,10 @@ def train(self, train_data=None, valid_data=None):
                         )
                     )
                     # the first training time is not accurate
-                    if cur_batch > self.disp_freq or stop_batch < 2 * self.disp_freq:
+                    if (
+                        cur_batch - start_batch > self.disp_freq
+                        or elapsed_batch < 2 * self.disp_freq
+                    ):
                         total_train_time += train_time
                     train_time = 0
                     wall_time_tic = toc
@@ -594,18 +599,23 @@ def train(self, train_data=None, valid_data=None):
             self.save_checkpoint(cur_batch)
         if self.run_opt.is_chief:
             fp.close()
-        if self.timing_in_training and stop_batch // self.disp_freq > 0:
-            if stop_batch >= 2 * self.disp_freq:
+        elapsed_batch = stop_batch - start_batch
+        if self.timing_in_training and elapsed_batch // self.disp_freq > 0:
+            if elapsed_batch >= 2 * self.disp_freq:
                 log.info(
                     "average training time: %.4f s/batch (exclude first %d batches)",
                     total_train_time
-                    / (stop_batch // self.disp_freq * self.disp_freq - self.disp_freq),
+                    / (
+                        elapsed_batch // self.disp_freq * self.disp_freq
+                        - self.disp_freq
+                    ),
                     self.disp_freq,
                 )
             else:
                 log.info(
                     "average training time: %.4f s/batch",
-                    total_train_time / (stop_batch // self.disp_freq * self.disp_freq),
+                    total_train_time
+                    / (elapsed_batch // self.disp_freq * self.disp_freq),
                 )
 
         if self.profiling and self.run_opt.is_chief:

From 16172e6cbdc881da02779da230d636d41e86c6e9 Mon Sep 17 00:00:00 2001
From: Lysithea <52808607+CaRoLZhangxy@users.noreply.github.com>
Date: Tue, 15 Oct 2024 23:25:30 +0800
Subject: [PATCH 31/39] fix(pt): keep mapping not none during lmp steps when
 nghost == 0 (#4209)

enhancement on https://github.com/deepmodeling/deepmd-kit/pull/4144

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Enhanced tensor mapping capabilities with the addition of a new
`mapping_tensor` variable.
- Updated `compute` method to handle ghost atoms and support improved
tensor creation logic.
	- Overloaded `computew` methods to support both double and float types.

- **Bug Fixes**
- Improved error handling in the `translate_error` method for better
exception management.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 source/api_cc/include/DeepPotPT.h | 1 +
 source/api_cc/src/DeepPotPT.cc    | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/api_cc/include/DeepPotPT.h b/source/api_cc/include/DeepPotPT.h
index 973c02c434..4144249367 100644
--- a/source/api_cc/include/DeepPotPT.h
+++ b/source/api_cc/include/DeepPotPT.h
@@ -338,6 +338,7 @@ class DeepPotPT : public DeepPotBase {
   int do_message_passing;  // 1:dpa2 model 0:others
   bool gpu_enabled;
   at::Tensor firstneigh_tensor;
+  c10::optional<torch::Tensor> mapping_tensor;
   torch::Dict<std::string, torch::Tensor> comm_dict;
   /**
    * @brief Translate PyTorch exceptions to the DeePMD-kit exception.
diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc
index c03576635a..84629042f4 100644
--- a/source/api_cc/src/DeepPotPT.cc
+++ b/source/api_cc/src/DeepPotPT.cc
@@ -164,7 +164,6 @@ void DeepPotPT::compute(ENERGYVTYPE& ener,
   std::vector<std::int64_t> atype_64(datype.begin(), datype.end());
   at::Tensor atype_Tensor =
       torch::from_blob(atype_64.data(), {1, nall_real}, int_option).to(device);
-  c10::optional<torch::Tensor> mapping_tensor;
   if (ago == 0) {
     nlist_data.copy_from_nlist(lmp_list);
     nlist_data.shuffle_exclude_empty(fwd_map);

From cfb47310e1d070824347f48883222d3605907ee4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 15 Oct 2024 23:39:16 -0400
Subject: [PATCH 32/39] feat(jax/array-api): se_e2_a (#4217)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a new class `DescrptSeAArrayAPI` for enhanced array
compatibility.
- Added a new class `DescrptSeA` integrated with the Flax library for
neural network modules.
- Improved handling of atomic types and neighbor lists for better
performance and clarity.

- **Tests**
- Enhanced test suite to support additional backends and configurations,
including JAX and strict array API.
	- Added new evaluation methods for testing across different frameworks.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 deepmd/dpmodel/descriptor/se_e2_a.py          | 107 ++++++++++++++++--
 deepmd/dpmodel/utils/nlist.py                 |  20 ++--
 deepmd/jax/descriptor/se_e2_a.py              |  33 ++++++
 .../array_api_strict/descriptor/se_e2_a.py    |  32 ++++++
 .../consistent/descriptor/test_se_e2_a.py     |  55 +++++++++
 5 files changed, 230 insertions(+), 17 deletions(-)
 create mode 100644 deepmd/jax/descriptor/se_e2_a.py
 create mode 100644 source/tests/array_api_strict/descriptor/se_e2_a.py

diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
index 29577ef79e..d29ce8862e 100644
--- a/deepmd/dpmodel/descriptor/se_e2_a.py
+++ b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -7,6 +7,7 @@
     Union,
 )
 
+import array_api_compat
 import numpy as np
 
 from deepmd.dpmodel import (
@@ -14,6 +15,9 @@
     PRECISION_DICT,
     NativeOP,
 )
+from deepmd.dpmodel.common import (
+    to_numpy_array,
+)
 from deepmd.dpmodel.utils import (
     EmbeddingNet,
     EnvMat,
@@ -186,15 +190,15 @@ def __init__(
         self.reinit_exclude(exclude_types)
 
         in_dim = 1  # not considiering type embedding
-        self.embeddings = NetworkCollection(
+        embeddings = NetworkCollection(
             ntypes=self.ntypes,
             ndim=(1 if self.type_one_side else 2),
             network_type="embedding_network",
         )
         for ii, embedding_idx in enumerate(
-            itertools.product(range(self.ntypes), repeat=self.embeddings.ndim)
+            itertools.product(range(self.ntypes), repeat=embeddings.ndim)
         ):
-            self.embeddings[embedding_idx] = EmbeddingNet(
+            embeddings[embedding_idx] = EmbeddingNet(
                 in_dim,
                 self.neuron,
                 self.activation_function,
@@ -202,8 +206,9 @@ def __init__(
                 self.precision,
                 seed=child_seed(seed, ii),
             )
+        self.embeddings = embeddings
         self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection)
-        self.nnei = np.sum(self.sel)
+        self.nnei = np.sum(self.sel).item()
         self.davg = np.zeros(
             [self.ntypes, self.nnei, 4], dtype=PRECISION_DICT[self.precision]
         )
@@ -211,6 +216,7 @@ def __init__(
             [self.ntypes, self.nnei, 4], dtype=PRECISION_DICT[self.precision]
         )
         self.orig_sel = self.sel
+        self.sel_cumsum = [0, *np.cumsum(self.sel).tolist()]
 
     def __setitem__(self, key, value):
         if key in ("avg", "data_avg", "davg"):
@@ -321,8 +327,9 @@ def cal_g(
         ss,
         embedding_idx,
     ):
+        xp = array_api_compat.array_namespace(ss)
         nf_times_nloc, nnei = ss.shape[0:2]
-        ss = ss.reshape(nf_times_nloc, nnei, 1)
+        ss = xp.reshape(ss, (nf_times_nloc, nnei, 1))
         # (nf x nloc) x nnei x ng
         gg = self.embeddings[embedding_idx].call(ss)
         return gg
@@ -444,8 +451,8 @@ def serialize(self) -> dict:
             "env_mat": self.env_mat.serialize(),
             "embeddings": self.embeddings.serialize(),
             "@variables": {
-                "davg": self.davg,
-                "dstd": self.dstd,
+                "davg": to_numpy_array(self.davg),
+                "dstd": to_numpy_array(self.dstd),
             },
             "type_map": self.type_map,
         }
@@ -497,3 +504,89 @@ def update_sel(
             train_data, type_map, local_jdata_cpy["rcut"], local_jdata_cpy["sel"], False
         )
         return local_jdata_cpy, min_nbor_dist
+
+
+class DescrptSeAArrayAPI(DescrptSeA):
+    def call(
+        self,
+        coord_ext,
+        atype_ext,
+        nlist,
+        mapping: Optional[np.ndarray] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping from extended to lcoal region. not used by this descriptor.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            this descriptor returns None
+        h2
+            The rotationally equivariant pair-partical representation.
+            this descriptor returns None
+        sw
+            The smooth switch function.
+        """
+        if not self.type_one_side:
+            raise NotImplementedError(
+                "type_one_side == False is not supported in DescrptSeAArrayAPI"
+            )
+        del mapping
+        xp = array_api_compat.array_namespace(coord_ext, atype_ext, nlist)
+        input_dtype = coord_ext.dtype
+        # nf x nloc x nnei x 4
+        rr, diff, ww = self.env_mat.call(
+            coord_ext, atype_ext, nlist, self.davg, self.dstd
+        )
+        nf, nloc, nnei, _ = rr.shape
+        sec = xp.asarray(self.sel_cumsum)
+
+        ng = self.neuron[-1]
+        gr = xp.zeros([nf * nloc, ng, 4], dtype=self.dstd.dtype)
+        exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext)
+        # merge nf and nloc axis, so for type_one_side == False,
+        # we don't require atype is the same in all frames
+        exclude_mask = xp.reshape(exclude_mask, (nf * nloc, nnei))
+        rr = xp.reshape(rr, (nf * nloc, nnei, 4))
+        rr = xp.astype(rr, self.dstd.dtype)
+
+        for embedding_idx in itertools.product(
+            range(self.ntypes), repeat=self.embeddings.ndim
+        ):
+            (tt,) = embedding_idx
+            mm = exclude_mask[:, sec[tt] : sec[tt + 1]]
+            tr = rr[:, sec[tt] : sec[tt + 1], :]
+            tr = tr * xp.astype(mm[:, :, None], tr.dtype)
+            ss = tr[..., 0:1]
+            gg = self.cal_g(ss, embedding_idx)
+            # gr_tmp = xp.einsum("lni,lnj->lij", gg, tr)
+            gr_tmp = xp.sum(gg[:, :, :, None] * tr[:, :, None, :], axis=1)
+            gr += gr_tmp
+        gr = xp.reshape(gr, (nf, nloc, ng, 4))
+        # nf x nloc x ng x 4
+        gr /= self.nnei
+        gr1 = gr[:, :, : self.axis_neuron, :]
+        # nf x nloc x ng x ng1
+        # grrg = xp.einsum("flid,fljd->flij", gr, gr1)
+        grrg = xp.sum(gr[:, :, :, None, :] * gr1[:, :, None, :, :], axis=4)
+        # nf x nloc x (ng x ng1)
+        grrg = xp.astype(
+            xp.reshape(grrg, (nf, nloc, ng * self.axis_neuron)), input_dtype
+        )
+        return grrg, gr[..., 1:], None, None, ww
diff --git a/deepmd/dpmodel/utils/nlist.py b/deepmd/dpmodel/utils/nlist.py
index 4806fa4cd8..c56f1bc061 100644
--- a/deepmd/dpmodel/utils/nlist.py
+++ b/deepmd/dpmodel/utils/nlist.py
@@ -163,20 +163,20 @@ def nlist_distinguish_types(
     xp = array_api_compat.array_namespace(nlist, atype)
     nf, nloc, _ = nlist.shape
     ret_nlist = []
-    tmp_atype = xp.tile(atype[:, None], [1, nloc, 1])
+    tmp_atype = xp.tile(atype[:, None, :], (1, nloc, 1))
     mask = nlist == -1
-    tnlist_0 = nlist.copy()
-    tnlist_0[mask] = 0
-    tnlist = xp_take_along_axis(tmp_atype, tnlist_0, axis=2).squeeze()
-    tnlist = xp.where(mask, -1, tnlist)
-    snsel = tnlist.shape[2]
+    tnlist_0 = xp.where(mask, xp.zeros_like(nlist), nlist)
+    tnlist = xp_take_along_axis(tmp_atype, tnlist_0, axis=2)
+    tnlist = xp.where(mask, xp.full_like(tnlist, -1), tnlist)
     for ii, ss in enumerate(sel):
-        pick_mask = (tnlist == ii).astype(xp.int32)
-        sorted_indices = xp.argsort(-pick_mask, kind="stable", axis=-1)
+        pick_mask = xp.astype(tnlist == ii, xp.int32)
+        sorted_indices = xp.argsort(-pick_mask, stable=True, axis=-1)
         pick_mask_sorted = -xp.sort(-pick_mask, axis=-1)
         inlist = xp_take_along_axis(nlist, sorted_indices, axis=2)
-        inlist = xp.where(~pick_mask_sorted.astype(bool), -1, inlist)
-        ret_nlist.append(xp.split(inlist, [ss, snsel - ss], axis=-1)[0])
+        inlist = xp.where(
+            ~xp.astype(pick_mask_sorted, xp.bool), xp.full_like(inlist, -1), inlist
+        )
+        ret_nlist.append(inlist[..., :ss])
     ret = xp.concat(ret_nlist, axis=-1)
     return ret
 
diff --git a/deepmd/jax/descriptor/se_e2_a.py b/deepmd/jax/descriptor/se_e2_a.py
new file mode 100644
index 0000000000..a60a4e9af1
--- /dev/null
+++ b/deepmd/jax/descriptor/se_e2_a.py
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.descriptor.se_e2_a import DescrptSeAArrayAPI as DescrptSeADP
+from deepmd.jax.common import (
+    flax_module,
+    to_jax_array,
+)
+from deepmd.jax.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.jax.utils.network import (
+    NetworkCollection,
+)
+
+
+@flax_module
+class DescrptSeA(DescrptSeADP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"dstd", "davg"}:
+            value = to_jax_array(value)
+        elif name in {"embeddings"}:
+            if value is not None:
+                value = NetworkCollection.deserialize(value.serialize())
+        elif name == "env_mat":
+            # env_mat doesn't store any value
+            pass
+        elif name == "emask":
+            value = PairExcludeMask(value.ntypes, value.exclude_types)
+
+        return super().__setattr__(name, value)
diff --git a/source/tests/array_api_strict/descriptor/se_e2_a.py b/source/tests/array_api_strict/descriptor/se_e2_a.py
new file mode 100644
index 0000000000..654b9f8925
--- /dev/null
+++ b/source/tests/array_api_strict/descriptor/se_e2_a.py
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.dpmodel.descriptor.se_e2_a import DescrptSeAArrayAPI as DescrptSeADP
+
+from ..common import (
+    to_array_api_strict_array,
+)
+from ..utils.exclude_mask import (
+    PairExcludeMask,
+)
+from ..utils.network import (
+    NetworkCollection,
+)
+
+
+class DescrptSeA(DescrptSeADP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"dstd", "davg"}:
+            value = to_array_api_strict_array(value)
+        elif name in {"embeddings"}:
+            if value is not None:
+                value = NetworkCollection.deserialize(value.serialize())
+        elif name == "env_mat":
+            # env_mat doesn't store any value
+            pass
+        elif name == "emask":
+            value = PairExcludeMask(value.ntypes, value.exclude_types)
+
+        return super().__setattr__(name, value)
diff --git a/source/tests/consistent/descriptor/test_se_e2_a.py b/source/tests/consistent/descriptor/test_se_e2_a.py
index 2563ee1d6d..286703e21d 100644
--- a/source/tests/consistent/descriptor/test_se_e2_a.py
+++ b/source/tests/consistent/descriptor/test_se_e2_a.py
@@ -12,6 +12,8 @@
 )
 
 from ..common import (
+    INSTALLED_ARRAY_API_STRICT,
+    INSTALLED_JAX,
     INSTALLED_PT,
     INSTALLED_TF,
     CommonTest,
@@ -33,6 +35,17 @@
     descrpt_se_a_args,
 )
 
+if INSTALLED_JAX:
+    from deepmd.jax.descriptor.se_e2_a import DescrptSeA as DescrptSeAJAX
+else:
+    DescrptSeAJAX = None
+if INSTALLED_ARRAY_API_STRICT:
+    from ...array_api_strict.descriptor.se_e2_a import (
+        DescrptSeA as DescrptSeAArrayAPIStrict,
+    )
+else:
+    DescrptSeAArrayAPIStrict = None
+
 
 @parameterized(
     (True, False),  # resnet_dt
@@ -98,9 +111,33 @@ def skip_tf(self) -> bool:
         ) = self.param
         return env_protection != 0.0
 
+    @property
+    def skip_jax(self) -> bool:
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+            env_protection,
+        ) = self.param
+        return not type_one_side or not INSTALLED_JAX
+
+    @property
+    def skip_array_api_strict(self) -> bool:
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+            env_protection,
+        ) = self.param
+        return not type_one_side or not INSTALLED_ARRAY_API_STRICT
+
     tf_class = DescrptSeATF
     dp_class = DescrptSeADP
     pt_class = DescrptSeAPT
+    jax_class = DescrptSeAJAX
+    array_api_strict_class = DescrptSeAArrayAPIStrict
     args = descrpt_se_a_args()
 
     def setUp(self):
@@ -177,6 +214,24 @@ def eval_pt(self, pt_obj: Any) -> Any:
             self.box,
         )
 
+    def eval_jax(self, jax_obj: Any) -> Any:
+        return self.eval_jax_descriptor(
+            jax_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
+        return self.eval_array_api_strict_descriptor(
+            array_api_strict_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
     def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         return (ret[0],)
 

From 5050f611133665580fb44cd62cbe6d84d4864ac8 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 15 Oct 2024 23:40:01 -0400
Subject: [PATCH 33/39] feat(jax/array-api): DOS fitting (#4218)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced the `DOSFittingNet` class for enhanced fitting
capabilities.
- Added methods to evaluate different backends (JAX and Array API
Strict) for computing density of states.
- Enhanced testing framework to conditionally include tests based on
library availability.

- **Bug Fixes**
- Improved serialization of the `bias_atom_e` variable to ensure
consistent data representation.

- **Tests**
- Expanded the `TestDOS` class with new attributes and methods for
better backend evaluation.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 deepmd/dpmodel/fitting/dos_fitting.py         |  3 +-
 deepmd/jax/fitting/fitting.py                 |  8 +++
 .../tests/array_api_strict/fitting/fitting.py |  7 +++
 source/tests/consistent/fitting/test_dos.py   | 59 +++++++++++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py
index e9cd4a17ae..32225ac6c0 100644
--- a/deepmd/dpmodel/fitting/dos_fitting.py
+++ b/deepmd/dpmodel/fitting/dos_fitting.py
@@ -10,6 +10,7 @@
 
 from deepmd.dpmodel.common import (
     DEFAULT_PRECISION,
+    to_numpy_array,
 )
 from deepmd.dpmodel.fitting.invar_fitting import (
     InvarFitting,
@@ -89,6 +90,6 @@ def serialize(self) -> dict:
             **super().serialize(),
             "type": "dos",
         }
-        dd["@variables"]["bias_atom_e"] = self.bias_atom_e
+        dd["@variables"]["bias_atom_e"] = to_numpy_array(self.bias_atom_e)
 
         return dd
diff --git a/deepmd/jax/fitting/fitting.py b/deepmd/jax/fitting/fitting.py
index 27ad791db9..284213c70a 100644
--- a/deepmd/jax/fitting/fitting.py
+++ b/deepmd/jax/fitting/fitting.py
@@ -3,6 +3,7 @@
     Any,
 )
 
+from deepmd.dpmodel.fitting.dos_fitting import DOSFittingNet as DOSFittingNetDP
 from deepmd.dpmodel.fitting.ener_fitting import EnergyFittingNet as EnergyFittingNetDP
 from deepmd.jax.common import (
     flax_module,
@@ -37,3 +38,10 @@ class EnergyFittingNet(EnergyFittingNetDP):
     def __setattr__(self, name: str, value: Any) -> None:
         value = setattr_for_general_fitting(name, value)
         return super().__setattr__(name, value)
+
+
+@flax_module
+class DOSFittingNet(DOSFittingNetDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        value = setattr_for_general_fitting(name, value)
+        return super().__setattr__(name, value)
diff --git a/source/tests/array_api_strict/fitting/fitting.py b/source/tests/array_api_strict/fitting/fitting.py
index 2e6bd9fe25..8b65320203 100644
--- a/source/tests/array_api_strict/fitting/fitting.py
+++ b/source/tests/array_api_strict/fitting/fitting.py
@@ -3,6 +3,7 @@
     Any,
 )
 
+from deepmd.dpmodel.fitting.dos_fitting import DOSFittingNet as DOSFittingNetDP
 from deepmd.dpmodel.fitting.ener_fitting import EnergyFittingNet as EnergyFittingNetDP
 
 from ..common import (
@@ -36,3 +37,9 @@ class EnergyFittingNet(EnergyFittingNetDP):
     def __setattr__(self, name: str, value: Any) -> None:
         value = setattr_for_general_fitting(name, value)
         return super().__setattr__(name, value)
+
+
+class DOSFittingNet(DOSFittingNetDP):
+    def __setattr__(self, name: str, value: Any) -> None:
+        value = setattr_for_general_fitting(name, value)
+        return super().__setattr__(name, value)
diff --git a/source/tests/consistent/fitting/test_dos.py b/source/tests/consistent/fitting/test_dos.py
index ada65c8ac5..4a78b69341 100644
--- a/source/tests/consistent/fitting/test_dos.py
+++ b/source/tests/consistent/fitting/test_dos.py
@@ -12,6 +12,8 @@
 )
 
 from ..common import (
+    INSTALLED_ARRAY_API_STRICT,
+    INSTALLED_JAX,
     INSTALLED_PT,
     INSTALLED_TF,
     CommonTest,
@@ -36,6 +38,20 @@
     fitting_dos,
 )
 
+if INSTALLED_JAX:
+    from deepmd.jax.env import (
+        jnp,
+    )
+    from deepmd.jax.fitting.fitting import DOSFittingNet as DOSFittingJAX
+else:
+    DOSFittingJAX = object
+if INSTALLED_ARRAY_API_STRICT:
+    import array_api_strict
+
+    from ...array_api_strict.fitting.fitting import DOSFittingNet as DOSFittingStrict
+else:
+    DOSFittingStrict = object
+
 
 @parameterized(
     (True, False),  # resnet_dt
@@ -74,9 +90,19 @@ def skip_pt(self) -> bool:
         ) = self.param
         return CommonTest.skip_pt
 
+    @property
+    def skip_jax(self) -> bool:
+        return not INSTALLED_JAX
+
+    @property
+    def skip_array_api_strict(self) -> bool:
+        return not INSTALLED_ARRAY_API_STRICT
+
     tf_class = DOSFittingTF
     dp_class = DOSFittingDP
     pt_class = DOSFittingPT
+    jax_class = DOSFittingJAX
+    array_api_strict_class = DOSFittingStrict
     args = fitting_dos()
 
     def setUp(self):
@@ -157,6 +183,39 @@ def eval_dp(self, dp_obj: Any) -> Any:
             fparam=self.fparam if numb_fparam else None,
         )["dos"]
 
+    def eval_jax(self, jax_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return np.asarray(
+            jax_obj(
+                jnp.asarray(self.inputs),
+                jnp.asarray(self.atype.reshape(1, -1)),
+                fparam=jnp.asarray(self.fparam) if numb_fparam else None,
+            )["dos"]
+        )
+
+    def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
+        array_api_strict.set_array_api_strict_flags(api_version="2023.12")
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return np.asarray(
+            array_api_strict_obj(
+                array_api_strict.asarray(self.inputs),
+                array_api_strict.asarray(self.atype.reshape(1, -1)),
+                fparam=array_api_strict.asarray(self.fparam) if numb_fparam else None,
+            )["dos"]
+        )
+
     def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         if backend == self.RefBackend.TF:
             # shape is not same

From d7d221059ea39802354cca8ea2e3d800b62e7563 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 16 Oct 2024 09:50:28 -0400
Subject: [PATCH 34/39] fix(pt): make PT training step idx consistent with TF
 (#4221)

Fix #4206.

Currently, the training step index displayed in TF and PT has different
meanings:
- In TF, step 0 means no training; step 1 means a training step has been
performed. The maximum training step is equal to the number of steps.
- In PT, step 0 means a training step has been performed. The maximum
training step is the number of steps minus 1.

This PR corrects the definition of the step-index in PT and makes them
consistent.

There is still a difference after this PR: TF shows step 0, but PT shows
step 1. Showing the loss of step 0 in PT needs heavy refactoring and is
thus not included in this PR.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Improved logging for training progress, starting step count from 1 for
better clarity.
	- Enhanced TensorBoard logging for consistent step tracking.

- **Bug Fixes**
- Adjusted logging conditions to ensure the first step's results are
included in the output.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/pt/train/training.py | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 4d746e84c0..10e841682a 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -769,7 +769,10 @@ def fake_model():
                 raise ValueError(f"Not supported optimizer type '{self.opt_type}'")
 
             # Log and persist
-            if self.display_in_training and _step_id % self.disp_freq == 0:
+            display_step_id = _step_id + 1
+            if self.display_in_training and (
+                display_step_id % self.disp_freq == 0 or display_step_id == 1
+            ):
                 self.wrapper.eval()
 
                 def log_loss_train(_loss, _more_loss, _task_key="Default"):
@@ -821,7 +824,7 @@ def log_loss_valid(_task_key="Default"):
                     if self.rank == 0:
                         log.info(
                             format_training_message_per_task(
-                                batch=_step_id,
+                                batch=display_step_id,
                                 task_name="trn",
                                 rmse=train_results,
                                 learning_rate=cur_lr,
@@ -830,7 +833,7 @@ def log_loss_valid(_task_key="Default"):
                         if valid_results:
                             log.info(
                                 format_training_message_per_task(
-                                    batch=_step_id,
+                                    batch=display_step_id,
                                     task_name="val",
                                     rmse=valid_results,
                                     learning_rate=None,
@@ -861,7 +864,7 @@ def log_loss_valid(_task_key="Default"):
                         if self.rank == 0:
                             log.info(
                                 format_training_message_per_task(
-                                    batch=_step_id,
+                                    batch=display_step_id,
                                     task_name=_key + "_trn",
                                     rmse=train_results[_key],
                                     learning_rate=cur_lr,
@@ -870,7 +873,7 @@ def log_loss_valid(_task_key="Default"):
                             if valid_results[_key]:
                                 log.info(
                                     format_training_message_per_task(
-                                        batch=_step_id,
+                                        batch=display_step_id,
                                         task_name=_key + "_val",
                                         rmse=valid_results[_key],
                                         learning_rate=None,
@@ -883,7 +886,7 @@ def log_loss_valid(_task_key="Default"):
                 if self.rank == 0 and self.timing_in_training:
                     log.info(
                         format_training_message(
-                            batch=_step_id,
+                            batch=display_step_id,
                             wall_time=train_time,
                         )
                     )
@@ -899,7 +902,7 @@ def log_loss_valid(_task_key="Default"):
                         self.print_header(fout, train_results, valid_results)
                         self.lcurve_should_print_header = False
                     self.print_on_training(
-                        fout, _step_id, cur_lr, train_results, valid_results
+                        fout, display_step_id, cur_lr, train_results, valid_results
                     )
 
             if (
@@ -921,11 +924,15 @@ def log_loss_valid(_task_key="Default"):
                     f.write(str(self.latest_model))
 
             # tensorboard
-            if self.enable_tensorboard and _step_id % self.tensorboard_freq == 0:
-                writer.add_scalar(f"{task_key}/lr", cur_lr, _step_id)
-                writer.add_scalar(f"{task_key}/loss", loss, _step_id)
+            if self.enable_tensorboard and (
+                display_step_id % self.tensorboard_freq == 0 or display_step_id == 1
+            ):
+                writer.add_scalar(f"{task_key}/lr", cur_lr, display_step_id)
+                writer.add_scalar(f"{task_key}/loss", loss, display_step_id)
                 for item in more_loss:
-                    writer.add_scalar(f"{task_key}/{item}", more_loss[item], _step_id)
+                    writer.add_scalar(
+                        f"{task_key}/{item}", more_loss[item], display_step_id
+                    )
 
         self.t0 = time.time()
         self.total_train_time = 0.0

From af86b577089393c519e0c478ae0a50c1766708ab Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 17 Oct 2024 01:03:06 +0800
Subject: [PATCH 35/39] Chore(pt): refactor the command function interface
 (#4225)

Fix #3934.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced clarity and usability of the training process with explicit
parameters for model training, freezing, and bias changing functions.
- Improved function interfaces streamline user interactions and
understanding.

- **Bug Fixes**
- Resolved issues related to parameter handling by transitioning from a
flags-based system to a more structured approach.

- **Refactor**
- Updated function signatures for better readability and
maintainability, improving the overall structure of the code.
- Simplified the freezing mechanism in tests by removing the use of a
`Namespace` object.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/pt/entrypoints/main.py          | 149 +++++++++++++++----------
 source/tests/pt/model/test_deeppot.py  |   6 +-
 source/tests/pt/test_init_frz_model.py |   6 +-
 3 files changed, 94 insertions(+), 67 deletions(-)

diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index a0694c41c5..7c8a95c5e7 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -239,16 +239,27 @@ def get_backend_info(self) -> dict:
         }
 
 
-def train(FLAGS):
-    log.info("Configuration path: %s", FLAGS.INPUT)
+def train(
+    input_file: str,
+    init_model: Optional[str],
+    restart: Optional[str],
+    finetune: Optional[str],
+    init_frz_model: Optional[str],
+    model_branch: str,
+    skip_neighbor_stat: bool = False,
+    use_pretrain_script: bool = False,
+    force_load: bool = False,
+    output: str = "out.json",
+):
+    log.info("Configuration path: %s", input_file)
     SummaryPrinter()()
-    with open(FLAGS.INPUT) as fin:
+    with open(input_file) as fin:
         config = json.load(fin)
     # ensure suffix, as in the command line help, we say "path prefix of checkpoint files"
-    if FLAGS.init_model is not None and not FLAGS.init_model.endswith(".pt"):
-        FLAGS.init_model += ".pt"
-    if FLAGS.restart is not None and not FLAGS.restart.endswith(".pt"):
-        FLAGS.restart += ".pt"
+    if init_model is not None and not init_model.endswith(".pt"):
+        init_model += ".pt"
+    if restart is not None and not restart.endswith(".pt"):
+        restart += ".pt"
 
     # update multitask config
     multi_task = "model_dict" in config["model"]
@@ -262,26 +273,24 @@ def train(FLAGS):
 
     # update fine-tuning config
     finetune_links = None
-    if FLAGS.finetune is not None:
+    if finetune is not None:
         config["model"], finetune_links = get_finetune_rules(
-            FLAGS.finetune,
+            finetune,
             config["model"],
-            model_branch=FLAGS.model_branch,
-            change_model_params=FLAGS.use_pretrain_script,
+            model_branch=model_branch,
+            change_model_params=use_pretrain_script,
         )
     # update init_model or init_frz_model config if necessary
-    if (
-        FLAGS.init_model is not None or FLAGS.init_frz_model is not None
-    ) and FLAGS.use_pretrain_script:
-        if FLAGS.init_model is not None:
-            init_state_dict = torch.load(FLAGS.init_model, map_location=DEVICE)
+    if (init_model is not None or init_frz_model is not None) and use_pretrain_script:
+        if init_model is not None:
+            init_state_dict = torch.load(init_model, map_location=DEVICE)
             if "model" in init_state_dict:
                 init_state_dict = init_state_dict["model"]
             config["model"] = init_state_dict["_extra_state"]["model_params"]
         else:
             config["model"] = json.loads(
                 torch.jit.load(
-                    FLAGS.init_frz_model, map_location=DEVICE
+                    init_frz_model, map_location=DEVICE
                 ).get_model_def_script()
             )
 
@@ -291,7 +300,7 @@ def train(FLAGS):
 
     # do neighbor stat
     min_nbor_dist = None
-    if not FLAGS.skip_neighbor_stat:
+    if not skip_neighbor_stat:
         log.info(
             "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)"
         )
@@ -320,16 +329,16 @@ def train(FLAGS):
                     )
                 )
 
-    with open(FLAGS.output, "w") as fp:
+    with open(output, "w") as fp:
         json.dump(config, fp, indent=4)
 
     trainer = get_trainer(
         config,
-        FLAGS.init_model,
-        FLAGS.restart,
-        FLAGS.finetune,
-        FLAGS.force_load,
-        FLAGS.init_frz_model,
+        init_model,
+        restart,
+        finetune,
+        force_load,
+        init_frz_model,
         shared_links=shared_links,
         finetune_links=finetune_links,
     )
@@ -343,26 +352,39 @@ def train(FLAGS):
     trainer.run()
 
 
-def freeze(FLAGS):
-    model = inference.Tester(FLAGS.model, head=FLAGS.head).model
+def freeze(
+    model: str,
+    output: str = "frozen_model.pth",
+    head: Optional[str] = None,
+):
+    model = inference.Tester(model, head=head).model
     model.eval()
     model = torch.jit.script(model)
     extra_files = {}
     torch.jit.save(
         model,
-        FLAGS.output,
+        output,
         extra_files,
     )
-    log.info(f"Saved frozen model to {FLAGS.output}")
-
-
-def change_bias(FLAGS):
-    if FLAGS.INPUT.endswith(".pt"):
-        old_state_dict = torch.load(FLAGS.INPUT, map_location=env.DEVICE)
+    log.info(f"Saved frozen model to {output}")
+
+
+def change_bias(
+    input_file: str,
+    mode: str = "change",
+    bias_value: Optional[list] = None,
+    datafile: Optional[str] = None,
+    system: str = ".",
+    numb_batch: int = 0,
+    model_branch: Optional[str] = None,
+    output: Optional[str] = None,
+):
+    if input_file.endswith(".pt"):
+        old_state_dict = torch.load(input_file, map_location=env.DEVICE)
         model_state_dict = copy.deepcopy(old_state_dict.get("model", old_state_dict))
         model_params = model_state_dict["_extra_state"]["model_params"]
-    elif FLAGS.INPUT.endswith(".pth"):
-        old_model = torch.jit.load(FLAGS.INPUT, map_location=env.DEVICE)
+    elif input_file.endswith(".pth"):
+        old_model = torch.jit.load(input_file, map_location=env.DEVICE)
         model_params_string = old_model.get_model_def_script()
         model_params = json.loads(model_params_string)
         old_state_dict = old_model.state_dict()
@@ -373,10 +395,7 @@ def change_bias(FLAGS):
             "or a frozen model with a .pth extension"
         )
     multi_task = "model_dict" in model_params
-    model_branch = FLAGS.model_branch
-    bias_adjust_mode = (
-        "change-by-statistic" if FLAGS.mode == "change" else "set-by-statistic"
-    )
+    bias_adjust_mode = "change-by-statistic" if mode == "change" else "set-by-statistic"
     if multi_task:
         assert (
             model_branch is not None
@@ -393,24 +412,24 @@ def change_bias(FLAGS):
         else model_params["model_dict"][model_branch]["type_map"]
     )
     model_to_change = model if not multi_task else model[model_branch]
-    if FLAGS.INPUT.endswith(".pt"):
+    if input_file.endswith(".pt"):
         wrapper = ModelWrapper(model)
         wrapper.load_state_dict(old_state_dict["model"])
     else:
         # for .pth
         model.load_state_dict(old_state_dict)
 
-    if FLAGS.bias_value is not None:
+    if bias_value is not None:
         # use user-defined bias
         assert model_to_change.model_type in [
             "ener"
         ], "User-defined bias is only available for energy model!"
         assert (
-            len(FLAGS.bias_value) == len(type_map)
+            len(bias_value) == len(type_map)
         ), f"The number of elements in the bias should be the same as that in the type_map: {type_map}."
         old_bias = model_to_change.get_out_bias()
         bias_to_set = torch.tensor(
-            FLAGS.bias_value, dtype=old_bias.dtype, device=old_bias.device
+            bias_value, dtype=old_bias.dtype, device=old_bias.device
         ).view(old_bias.shape)
         model_to_change.set_out_bias(bias_to_set)
         log.info(
@@ -421,11 +440,11 @@ def change_bias(FLAGS):
         updated_model = model_to_change
     else:
         # calculate bias on given systems
-        if FLAGS.datafile is not None:
-            with open(FLAGS.datafile) as datalist:
+        if datafile is not None:
+            with open(datafile) as datalist:
                 all_sys = datalist.read().splitlines()
         else:
-            all_sys = expand_sys_str(FLAGS.system)
+            all_sys = expand_sys_str(system)
         data_systems = process_systems(all_sys)
         data_single = DpLoaderSet(
             data_systems,
@@ -438,7 +457,7 @@ def change_bias(FLAGS):
         data_requirement = mock_loss.label_requirement
         data_requirement += training.get_additional_data_requirement(model_to_change)
         data_single.add_data_requirement(data_requirement)
-        nbatches = FLAGS.numb_batch if FLAGS.numb_batch != 0 else float("inf")
+        nbatches = numb_batch if numb_batch != 0 else float("inf")
         sampled_data = make_stat_input(
             data_single.systems,
             data_single.dataloaders,
@@ -453,11 +472,9 @@ def change_bias(FLAGS):
     else:
         model[model_branch] = updated_model
 
-    if FLAGS.INPUT.endswith(".pt"):
+    if input_file.endswith(".pt"):
         output_path = (
-            FLAGS.output
-            if FLAGS.output is not None
-            else FLAGS.INPUT.replace(".pt", "_updated.pt")
+            output if output is not None else input_file.replace(".pt", "_updated.pt")
         )
         wrapper = ModelWrapper(model)
         if "model" in old_state_dict:
@@ -470,9 +487,7 @@ def change_bias(FLAGS):
     else:
         # for .pth
         output_path = (
-            FLAGS.output
-            if FLAGS.output is not None
-            else FLAGS.INPUT.replace(".pth", "_updated.pth")
+            output if output is not None else input_file.replace(".pth", "_updated.pth")
         )
         model = torch.jit.script(model)
         torch.jit.save(
@@ -499,7 +514,18 @@ def main(args: Optional[Union[list[str], argparse.Namespace]] = None):
     log.info("DeePMD version: %s", __version__)
 
     if FLAGS.command == "train":
-        train(FLAGS)
+        train(
+            input_file=FLAGS.INPUT,
+            init_model=FLAGS.init_model,
+            restart=FLAGS.restart,
+            finetune=FLAGS.finetune,
+            init_frz_model=FLAGS.init_frz_model,
+            model_branch=FLAGS.model_branch,
+            skip_neighbor_stat=FLAGS.skip_neighbor_stat,
+            use_pretrain_script=FLAGS.use_pretrain_script,
+            force_load=FLAGS.force_load,
+            output=FLAGS.output,
+        )
     elif FLAGS.command == "freeze":
         if Path(FLAGS.checkpoint_folder).is_dir():
             checkpoint_path = Path(FLAGS.checkpoint_folder)
@@ -508,9 +534,18 @@ def main(args: Optional[Union[list[str], argparse.Namespace]] = None):
         else:
             FLAGS.model = FLAGS.checkpoint_folder
         FLAGS.output = str(Path(FLAGS.output).with_suffix(".pth"))
-        freeze(FLAGS)
+        freeze(model=FLAGS.model, output=FLAGS.output, head=FLAGS.head)
     elif FLAGS.command == "change-bias":
-        change_bias(FLAGS)
+        change_bias(
+            input_file=FLAGS.INPUT,
+            mode=FLAGS.mode,
+            bias_value=FLAGS.bias_value,
+            datafile=FLAGS.datafile,
+            system=FLAGS.system,
+            numb_batch=FLAGS.numb_batch,
+            model_branch=FLAGS.model_branch,
+            output=FLAGS.output,
+        )
     else:
         raise RuntimeError(f"Invalid command {FLAGS.command}!")
 
diff --git a/source/tests/pt/model/test_deeppot.py b/source/tests/pt/model/test_deeppot.py
index 8917c62cce..7f530b0a5e 100644
--- a/source/tests/pt/model/test_deeppot.py
+++ b/source/tests/pt/model/test_deeppot.py
@@ -2,9 +2,6 @@
 import json
 import os
 import unittest
-from argparse import (
-    Namespace,
-)
 from copy import (
     deepcopy,
 )
@@ -123,12 +120,11 @@ class TestDeepPotFrozen(TestDeepPot):
     def setUp(self):
         super().setUp()
         frozen_model = "frozen_model.pth"
-        ns = Namespace(
+        freeze(
             model=self.model,
             output=frozen_model,
             head=None,
         )
-        freeze(ns)
         self.model = frozen_model
 
     # Note: this can not actually disable cuda device to be used
diff --git a/source/tests/pt/test_init_frz_model.py b/source/tests/pt/test_init_frz_model.py
index 1cbc1b29b6..69c738d6bd 100644
--- a/source/tests/pt/test_init_frz_model.py
+++ b/source/tests/pt/test_init_frz_model.py
@@ -4,9 +4,6 @@
 import shutil
 import tempfile
 import unittest
-from argparse import (
-    Namespace,
-)
 from copy import (
     deepcopy,
 )
@@ -70,12 +67,11 @@ def setUp(self):
 
             if imodel in [0, 1]:
                 trainer.run()
-            ns = Namespace(
+            freeze(
                 model="model.pt",
                 output=frozen_model,
                 head=None,
             )
-            freeze(ns)
             self.models.append(frozen_model)
 
     def test_dp_test(self):

From 2871fec8771c887010d9a660bb40425e603622d4 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 17 Oct 2024 10:06:06 +0800
Subject: [PATCH 36/39] Chore(pt):rm old pt implementation (#4223)

Fix #3913.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced `exclude_types` parameter in `DipoleFittingNet` and
`PolarFittingNet` constructors for improved flexibility.
  - Added `SimpleLinear` class to enhance network functionality.

- **Bug Fixes**
- Removed `old_impl` parameter across various classes, streamlining
interfaces and ensuring consistent behavior.

- **Documentation**
- Updated test cases to reflect the removal of `old_impl`, focusing on
new implementations.

- **Chores**
- Deleted obsolete files and classes to simplify the codebase and
improve maintainability.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/dpmodel/fitting/dipole_fitting.py      |    3 -
 .../dpmodel/fitting/polarizability_fitting.py |    3 -
 deepmd/pt/model/backbone/__init__.py          |   12 -
 deepmd/pt/model/backbone/backbone.py          |   12 -
 deepmd/pt/model/backbone/evoformer2b.py       |  103 --
 deepmd/pt/model/descriptor/__init__.py        |    4 -
 deepmd/pt/model/descriptor/dpa1.py            |    2 -
 deepmd/pt/model/descriptor/dpa2.py            |    2 -
 deepmd/pt/model/descriptor/gaussian_lcc.py    |  319 ----
 .../descriptor/repformer_layer_old_impl.py    |  744 --------
 deepmd/pt/model/descriptor/repformers.py      |  102 +-
 deepmd/pt/model/descriptor/se_a.py            |  158 +-
 deepmd/pt/model/descriptor/se_atten.py        |  218 +--
 deepmd/pt/model/descriptor/se_atten_v2.py     |    2 -
 deepmd/pt/model/descriptor/se_r.py            |    2 -
 deepmd/pt/model/network/network.py            | 1637 -----------------
 deepmd/pt/model/task/__init__.py              |    4 -
 deepmd/pt/model/task/atten_lcc.py             |   55 -
 deepmd/pt/model/task/dipole.py                |    2 -
 deepmd/pt/model/task/fitting.py               |  106 +-
 deepmd/pt/model/task/polarizability.py        |    2 -
 .../tests/pt/model/test_descriptor_hybrid.py  |    1 -
 source/tests/pt/model/test_descriptor_se_r.py |    3 -
 source/tests/pt/model/test_dpa1.py            |   65 -
 source/tests/pt/model/test_dpa2.py            |   41 -
 source/tests/pt/model/test_embedding_net.py   |    7 +-
 source/tests/pt/model/test_ener_fitting.py    |   48 -
 source/tests/pt/model/test_se_atten_v2.py     |    2 -
 source/tests/pt/model/test_se_e2_a.py         |   42 -
 29 files changed, 206 insertions(+), 3495 deletions(-)
 delete mode 100644 deepmd/pt/model/backbone/__init__.py
 delete mode 100644 deepmd/pt/model/backbone/backbone.py
 delete mode 100644 deepmd/pt/model/backbone/evoformer2b.py
 delete mode 100644 deepmd/pt/model/descriptor/gaussian_lcc.py
 delete mode 100644 deepmd/pt/model/descriptor/repformer_layer_old_impl.py
 delete mode 100644 deepmd/pt/model/task/atten_lcc.py

diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py
index f67bbc93a4..01bd60c777 100644
--- a/deepmd/dpmodel/fitting/dipole_fitting.py
+++ b/deepmd/dpmodel/fitting/dipole_fitting.py
@@ -105,7 +105,6 @@ def __init__(
         r_differentiable: bool = True,
         c_differentiable: bool = True,
         type_map: Optional[list[str]] = None,
-        old_impl=False,
         seed: Optional[Union[int, list[int]]] = None,
     ):
         if tot_ener_zero:
@@ -141,7 +140,6 @@ def __init__(
             type_map=type_map,
             seed=seed,
         )
-        self.old_impl = False
 
     def _net_out_dim(self):
         """Set the FittingNet output dim."""
@@ -151,7 +149,6 @@ def serialize(self) -> dict:
         data = super().serialize()
         data["type"] = "dipole"
         data["embedding_width"] = self.embedding_width
-        data["old_impl"] = self.old_impl
         data["r_differentiable"] = self.r_differentiable
         data["c_differentiable"] = self.c_differentiable
         return data
diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py
index 2ff5052a83..73a691f482 100644
--- a/deepmd/dpmodel/fitting/polarizability_fitting.py
+++ b/deepmd/dpmodel/fitting/polarizability_fitting.py
@@ -107,7 +107,6 @@ def __init__(
         spin: Any = None,
         mixed_types: bool = False,
         exclude_types: list[int] = [],
-        old_impl: bool = False,
         fit_diag: bool = True,
         scale: Optional[list[float]] = None,
         shift_diag: bool = True,
@@ -165,7 +164,6 @@ def __init__(
             type_map=type_map,
             seed=seed,
         )
-        self.old_impl = False
 
     def _net_out_dim(self):
         """Set the FittingNet output dim."""
@@ -192,7 +190,6 @@ def serialize(self) -> dict:
         data["type"] = "polar"
         data["@version"] = 3
         data["embedding_width"] = self.embedding_width
-        data["old_impl"] = self.old_impl
         data["fit_diag"] = self.fit_diag
         data["shift_diag"] = self.shift_diag
         data["@variables"]["scale"] = self.scale
diff --git a/deepmd/pt/model/backbone/__init__.py b/deepmd/pt/model/backbone/__init__.py
deleted file mode 100644
index a76bdb2a2d..0000000000
--- a/deepmd/pt/model/backbone/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from .backbone import (
-    BackBone,
-)
-from .evoformer2b import (
-    Evoformer2bBackBone,
-)
-
-__all__ = [
-    "BackBone",
-    "Evoformer2bBackBone",
-]
diff --git a/deepmd/pt/model/backbone/backbone.py b/deepmd/pt/model/backbone/backbone.py
deleted file mode 100644
index ddeedfeff5..0000000000
--- a/deepmd/pt/model/backbone/backbone.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import torch
-
-
-class BackBone(torch.nn.Module):
-    def __init__(self, **kwargs):
-        """BackBone base method."""
-        super().__init__()
-
-    def forward(self, **kwargs):
-        """Calculate backBone."""
-        raise NotImplementedError
diff --git a/deepmd/pt/model/backbone/evoformer2b.py b/deepmd/pt/model/backbone/evoformer2b.py
deleted file mode 100644
index 1146b3a298..0000000000
--- a/deepmd/pt/model/backbone/evoformer2b.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from deepmd.pt.model.backbone import (
-    BackBone,
-)
-from deepmd.pt.model.network.network import (
-    Evoformer2bEncoder,
-)
-
-
-class Evoformer2bBackBone(BackBone):
-    def __init__(
-        self,
-        nnei,
-        layer_num=6,
-        attn_head=8,
-        atomic_dim=1024,
-        pair_dim=100,
-        feature_dim=1024,
-        ffn_dim=2048,
-        post_ln=False,
-        final_layer_norm=True,
-        final_head_layer_norm=False,
-        emb_layer_norm=False,
-        atomic_residual=False,
-        evo_residual=False,
-        residual_factor=1.0,
-        activation_function="gelu",
-        **kwargs,
-    ):
-        """Construct an evoformer backBone."""
-        super().__init__()
-        self.nnei = nnei
-        self.layer_num = layer_num
-        self.attn_head = attn_head
-        self.atomic_dim = atomic_dim
-        self.pair_dim = pair_dim
-        self.feature_dim = feature_dim
-        self.head_dim = feature_dim // attn_head
-        assert (
-            feature_dim % attn_head == 0
-        ), f"feature_dim {feature_dim} must be divided by attn_head {attn_head}!"
-        self.ffn_dim = ffn_dim
-        self.post_ln = post_ln
-        self.final_layer_norm = final_layer_norm
-        self.final_head_layer_norm = final_head_layer_norm
-        self.emb_layer_norm = emb_layer_norm
-        self.activation_function = activation_function
-        self.atomic_residual = atomic_residual
-        self.evo_residual = evo_residual
-        self.residual_factor = float(residual_factor)
-        self.encoder = Evoformer2bEncoder(
-            nnei=self.nnei,
-            layer_num=self.layer_num,
-            attn_head=self.attn_head,
-            atomic_dim=self.atomic_dim,
-            pair_dim=self.pair_dim,
-            feature_dim=self.feature_dim,
-            ffn_dim=self.ffn_dim,
-            post_ln=self.post_ln,
-            final_layer_norm=self.final_layer_norm,
-            final_head_layer_norm=self.final_head_layer_norm,
-            emb_layer_norm=self.emb_layer_norm,
-            atomic_residual=self.atomic_residual,
-            evo_residual=self.evo_residual,
-            residual_factor=self.residual_factor,
-            activation_function=self.activation_function,
-        )
-
-    def forward(self, atomic_rep, pair_rep, nlist, nlist_type, nlist_mask):
-        """Encoder the atomic and pair representations.
-
-        Args:
-        - atomic_rep: Atomic representation with shape [nframes, nloc, atomic_dim].
-        - pair_rep: Pair representation with shape [nframes, nloc, nnei, pair_dim].
-        - nlist: Neighbor list with shape [nframes, nloc, nnei].
-        - nlist_type: Neighbor types with shape [nframes, nloc, nnei].
-        - nlist_mask: Neighbor mask with shape [nframes, nloc, nnei], `False` if blank.
-
-        Returns
-        -------
-        - atomic_rep: Atomic representation after encoder with shape [nframes, nloc, feature_dim].
-        - transformed_atomic_rep: Transformed atomic representation after encoder with shape [nframes, nloc, atomic_dim].
-        - pair_rep: Pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
-        - delta_pair_rep: Delta pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
-        - norm_x: Normalization loss of atomic_rep.
-        - norm_delta_pair_rep: Normalization loss of delta_pair_rep.
-        """
-        (
-            atomic_rep,
-            transformed_atomic_rep,
-            pair_rep,
-            delta_pair_rep,
-            norm_x,
-            norm_delta_pair_rep,
-        ) = self.encoder(atomic_rep, pair_rep, nlist, nlist_type, nlist_mask)
-        return (
-            atomic_rep,
-            transformed_atomic_rep,
-            pair_rep,
-            delta_pair_rep,
-            norm_x,
-            norm_delta_pair_rep,
-        )
diff --git a/deepmd/pt/model/descriptor/__init__.py b/deepmd/pt/model/descriptor/__init__.py
index 779e7a562c..4ffa937bcb 100644
--- a/deepmd/pt/model/descriptor/__init__.py
+++ b/deepmd/pt/model/descriptor/__init__.py
@@ -16,9 +16,6 @@
 from .env_mat import (
     prod_env_mat,
 )
-from .gaussian_lcc import (
-    DescrptGaussianLcc,
-)
 from .hybrid import (
     DescrptHybrid,
 )
@@ -59,6 +56,5 @@
     "DescrptDPA2",
     "DescrptHybrid",
     "prod_env_mat",
-    "DescrptGaussianLcc",
     "DescrptBlockRepformers",
 ]
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index 617e8b49b6..322fa3a12d 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -245,7 +245,6 @@ def __init__(
         # not implemented
         spin=None,
         type: Optional[str] = None,
-        old_impl: bool = False,
     ):
         super().__init__()
         # Ensure compatibility with the deprecated stripped_type_embedding option.
@@ -290,7 +289,6 @@ def __init__(
             trainable_ln=trainable_ln,
             ln_eps=ln_eps,
             seed=child_seed(seed, 1),
-            old_impl=old_impl,
         )
         self.use_econf_tebd = use_econf_tebd
         self.use_tebd_bias = use_tebd_bias
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index f1ef200b09..632efe5dbf 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -92,7 +92,6 @@ def __init__(
         use_econf_tebd: bool = False,
         use_tebd_bias: bool = False,
         type_map: Optional[list[str]] = None,
-        old_impl: bool = False,
     ):
         r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
 
@@ -235,7 +234,6 @@ def init_subclass_params(sub_data, sub_class):
             g1_out_conv=self.repformer_args.g1_out_conv,
             g1_out_mlp=self.repformer_args.g1_out_mlp,
             seed=child_seed(seed, 1),
-            old_impl=old_impl,
         )
         self.rcsl_list = [
             (self.repformers.get_rcut(), self.repformers.get_nsel()),
diff --git a/deepmd/pt/model/descriptor/gaussian_lcc.py b/deepmd/pt/model/descriptor/gaussian_lcc.py
deleted file mode 100644
index 8ac52215c0..0000000000
--- a/deepmd/pt/model/descriptor/gaussian_lcc.py
+++ /dev/null
@@ -1,319 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Optional,
-)
-
-import torch
-import torch.nn as nn
-
-from deepmd.pt.model.descriptor.base_descriptor import (
-    BaseDescriptor,
-)
-from deepmd.pt.model.network.network import (
-    Evoformer3bEncoder,
-    GaussianEmbedding,
-    TypeEmbedNet,
-)
-from deepmd.pt.utils import (
-    env,
-)
-from deepmd.utils.path import (
-    DPPath,
-)
-
-
-class DescrptGaussianLcc(torch.nn.Module, BaseDescriptor):
-    def __init__(
-        self,
-        rcut,
-        rcut_smth,
-        sel: int,
-        ntypes: int,
-        num_pair: int,
-        embed_dim: int = 768,
-        kernel_num: int = 128,
-        pair_embed_dim: int = 64,
-        num_block: int = 1,
-        layer_num: int = 12,
-        attn_head: int = 48,
-        pair_hidden_dim: int = 16,
-        ffn_embedding_dim: int = 768,
-        dropout: float = 0.0,
-        droppath_prob: float = 0.1,
-        pair_dropout: float = 0.25,
-        attention_dropout: float = 0.1,
-        activation_dropout: float = 0.1,
-        pre_ln: bool = True,
-        do_tag_embedding: bool = False,
-        tag_ener_pref: bool = False,
-        atomic_sum_gbf: bool = False,
-        pre_add_seq: bool = True,
-        tri_update: bool = True,
-        **kwargs,
-    ):
-        """Construct a descriptor of Gaussian Based Local Cluster.
-
-        Args:
-        - rcut: Cut-off radius.
-        - rcut_smth: Smooth hyper-parameter for pair force & energy. **Not used in this descriptor**.
-        - sel: For each element type, how many atoms is selected as neighbors.
-        - ntypes: Number of atom types.
-        - num_pair: Number of atom type pairs. Default is 2 * ntypes.
-        - kernel_num: Number of gaussian kernels.
-        - embed_dim: Dimension of atomic representation.
-        - pair_embed_dim: Dimension of pair representation.
-        - num_block: Number of evoformer blocks.
-        - layer_num: Number of attention layers.
-        - attn_head: Number of attention heads.
-        - pair_hidden_dim: Hidden dimension of pair representation during attention process.
-        - ffn_embedding_dim: Dimension during feed forward network.
-        - dropout: Dropout probability of atomic representation.
-        - droppath_prob: If not zero, it will use drop paths (Stochastic Depth) per sample and ignore `dropout`.
-        - pair_dropout: Dropout probability of pair representation during triangular update.
-        - attention_dropout: Dropout probability during attetion process.
-        - activation_dropout: Dropout probability of pair feed forward network.
-        - pre_ln: Do previous layer norm or not.
-        - do_tag_embedding: Add tag embedding to atomic and pair representations. (`tags`, `tags2`, `tags3` must exist)
-        - atomic_sum_gbf: Add sum of gaussian outputs to atomic representation or not.
-        - pre_add_seq: Add output of other descriptor (if has) to the atomic representation before attention.
-        """
-        super().__init__()
-        self.rcut = rcut
-        self.rcut_smth = rcut_smth
-        self.embed_dim = embed_dim
-        self.num_pair = num_pair
-        self.kernel_num = kernel_num
-        self.pair_embed_dim = pair_embed_dim
-        self.num_block = num_block
-        self.layer_num = layer_num
-        self.attention_heads = attn_head
-        self.pair_hidden_dim = pair_hidden_dim
-        self.ffn_embedding_dim = ffn_embedding_dim
-        self.dropout = dropout
-        self.droppath_prob = droppath_prob
-        self.pair_dropout = pair_dropout
-        self.attention_dropout = attention_dropout
-        self.activation_dropout = activation_dropout
-        self.pre_ln = pre_ln
-        self.do_tag_embedding = do_tag_embedding
-        self.tag_ener_pref = tag_ener_pref
-        self.atomic_sum_gbf = atomic_sum_gbf
-        self.local_cluster = True
-        self.pre_add_seq = pre_add_seq
-        self.tri_update = tri_update
-
-        if isinstance(sel, int):
-            sel = [sel]
-
-        self.ntypes = ntypes
-        self.sec = torch.tensor(sel)  # pylint: disable=no-explicit-dtype,no-explicit-device
-        self.nnei = sum(sel)
-
-        if self.do_tag_embedding:
-            self.tag_encoder = nn.Embedding(3, self.embed_dim)
-            self.tag_encoder2 = nn.Embedding(2, self.embed_dim)
-            self.tag_type_embedding = TypeEmbedNet(10, pair_embed_dim)
-        self.edge_type_embedding = nn.Embedding(
-            (ntypes + 1) * (ntypes + 1),
-            pair_embed_dim,
-            padding_idx=(ntypes + 1) * (ntypes + 1) - 1,
-            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-        )
-        self.gaussian_encoder = GaussianEmbedding(
-            rcut,
-            kernel_num,
-            num_pair,
-            embed_dim,
-            pair_embed_dim,
-            sel,
-            ntypes,
-            atomic_sum_gbf,
-        )
-        self.backbone = Evoformer3bEncoder(
-            self.nnei,
-            layer_num=self.layer_num,
-            attn_head=self.attention_heads,
-            atomic_dim=self.embed_dim,
-            pair_dim=self.pair_embed_dim,
-            pair_hidden_dim=self.pair_hidden_dim,
-            ffn_embedding_dim=self.ffn_embedding_dim,
-            dropout=self.dropout,
-            droppath_prob=self.droppath_prob,
-            pair_dropout=self.pair_dropout,
-            attention_dropout=self.attention_dropout,
-            activation_dropout=self.activation_dropout,
-            pre_ln=self.pre_ln,
-            tri_update=self.tri_update,
-        )
-
-    @property
-    def dim_out(self):
-        """Returns the output dimension of atomic representation."""
-        return self.embed_dim
-
-    @property
-    def dim_in(self):
-        """Returns the atomic input dimension of this descriptor."""
-        return self.embed_dim
-
-    @property
-    def dim_emb(self):
-        """Returns the output dimension of pair representation."""
-        return self.pair_embed_dim
-
-    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
-        """Update mean and stddev for descriptor elements."""
-        pass
-
-    def forward(
-        self,
-        extended_coord,
-        nlist,
-        atype,
-        nlist_type,
-        nlist_loc=None,
-        atype_tebd=None,
-        nlist_tebd=None,
-        seq_input=None,
-    ):
-        """Calculate the atomic and pair representations of this descriptor.
-
-        Args:
-        - extended_coord: Copied atom coordinates with shape [nframes, nall, 3].
-        - nlist: Neighbor list with shape [nframes, nloc, nnei].
-        - atype: Atom type with shape [nframes, nloc].
-        - nlist_type: Atom type of neighbors with shape [nframes, nloc, nnei].
-        - nlist_loc: Local index of neighbor list with shape [nframes, nloc, nnei].
-        - atype_tebd: Atomic type embedding with shape [nframes, nloc, tebd_dim].
-        - nlist_tebd: Type embeddings of neighbor with shape [nframes, nloc, nnei, tebd_dim].
-        - seq_input: The sequential input from other descriptor with
-                    shape [nframes, nloc, tebd_dim] or [nframes * nloc, 1 + nnei, tebd_dim]
-
-        Returns
-        -------
-        - result: descriptor with shape [nframes, nloc, self.filter_neuron[-1] * self.axis_neuron].
-        - ret: environment matrix with shape [nframes, nloc, self.neei, out_size]
-        """
-        nframes, nloc = nlist.shape[:2]
-        nall = extended_coord.shape[1]
-        nlist2 = torch.cat(
-            [
-                torch.arange(0, nloc, device=nlist.device)  # pylint: disable=no-explicit-dtype
-                .reshape(1, nloc, 1)
-                .expand(nframes, -1, -1),
-                nlist,
-            ],
-            dim=-1,
-        )
-        nlist_loc2 = torch.cat(
-            [
-                torch.arange(0, nloc, device=nlist_loc.device)  # pylint: disable=no-explicit-dtype
-                .reshape(1, nloc, 1)
-                .expand(nframes, -1, -1),
-                nlist_loc,
-            ],
-            dim=-1,
-        )
-        nlist_type2 = torch.cat([atype.reshape(nframes, nloc, 1), nlist_type], dim=-1)
-        nnei2_mask = nlist2 != -1
-        padding_mask = nlist2 == -1
-        nlist2 = nlist2 * nnei2_mask
-        nlist_loc2 = nlist_loc2 * nnei2_mask
-
-        # nframes x nloc x (1 + nnei2) x (1 + nnei2)
-        pair_mask = nnei2_mask.unsqueeze(-1) * nnei2_mask.unsqueeze(-2)
-        # nframes x nloc x (1 + nnei2) x (1 + nnei2) x head
-        attn_mask = torch.zeros(
-            [nframes, nloc, 1 + self.nnei, 1 + self.nnei, self.attention_heads],
-            device=nlist.device,
-            dtype=extended_coord.dtype,
-        )
-        attn_mask.masked_fill_(padding_mask.unsqueeze(2).unsqueeze(-1), float("-inf"))
-        # (nframes x nloc) x head x (1 + nnei2) x (1 + nnei2)
-        attn_mask = (
-            attn_mask.reshape(
-                nframes * nloc, 1 + self.nnei, 1 + self.nnei, self.attention_heads
-            )
-            .permute(0, 3, 1, 2)
-            .contiguous()
-        )
-
-        # Atomic feature
-        # [(nframes x nloc) x (1 + nnei2) x tebd_dim]
-        atom_feature = torch.gather(
-            atype_tebd,
-            dim=1,
-            index=nlist_loc2.reshape(nframes, -1)
-            .unsqueeze(-1)
-            .expand(-1, -1, self.embed_dim),
-        ).reshape(nframes * nloc, 1 + self.nnei, self.embed_dim)
-        if self.pre_add_seq and seq_input is not None:
-            first_dim = seq_input.shape[0]
-            if first_dim == nframes * nloc:
-                atom_feature += seq_input
-            elif first_dim == nframes:
-                atom_feature_seq = torch.gather(
-                    seq_input,
-                    dim=1,
-                    index=nlist_loc2.reshape(nframes, -1)
-                    .unsqueeze(-1)
-                    .expand(-1, -1, self.embed_dim),
-                ).reshape(nframes * nloc, 1 + self.nnei, self.embed_dim)
-                atom_feature += atom_feature_seq
-            else:
-                raise RuntimeError
-        atom_feature = atom_feature * nnei2_mask.reshape(
-            nframes * nloc, 1 + self.nnei, 1
-        )
-
-        # Pair feature
-        # [(nframes x nloc) x (1 + nnei2)]
-        nlist_type2_reshape = nlist_type2.reshape(nframes * nloc, 1 + self.nnei)
-        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2)]
-        edge_type = nlist_type2_reshape.unsqueeze(-1) * (
-            self.ntypes + 1
-        ) + nlist_type2_reshape.unsqueeze(-2)
-        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim]
-        edge_feature = self.edge_type_embedding(edge_type)
-
-        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x 2]
-        edge_type_2dim = torch.cat(
-            [
-                nlist_type2_reshape.view(nframes * nloc, 1 + self.nnei, 1, 1).expand(
-                    -1, -1, 1 + self.nnei, -1
-                ),
-                nlist_type2_reshape.view(nframes * nloc, 1, 1 + self.nnei, 1).expand(
-                    -1, 1 + self.nnei, -1, -1
-                )
-                + self.ntypes,
-            ],
-            dim=-1,
-        )
-        # [(nframes x nloc) x (1 + nnei2) x 3]
-        coord_selected = torch.gather(
-            extended_coord.unsqueeze(1)
-            .expand(-1, nloc, -1, -1)
-            .reshape(nframes * nloc, nall, 3),
-            dim=1,
-            index=nlist2.reshape(nframes * nloc, 1 + self.nnei, 1).expand(-1, -1, 3),
-        )
-
-        # Update pair features (or and atomic features) with gbf features
-        # delta_pos: [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x 3].
-        atomic_feature, pair_feature, delta_pos = self.gaussian_encoder(
-            coord_selected, atom_feature, edge_type_2dim, edge_feature
-        )
-        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim]
-        attn_bias = pair_feature
-
-        # output: [(nframes x nloc) x (1 + nnei2) x tebd_dim]
-        # pair: [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim]
-        output, pair = self.backbone(
-            atomic_feature,
-            pair=attn_bias,
-            attn_mask=attn_mask,
-            pair_mask=pair_mask,
-            atom_mask=nnei2_mask.reshape(nframes * nloc, 1 + self.nnei),
-        )
-
-        return output, pair, delta_pos, None
diff --git a/deepmd/pt/model/descriptor/repformer_layer_old_impl.py b/deepmd/pt/model/descriptor/repformer_layer_old_impl.py
deleted file mode 100644
index 47b20f7b03..0000000000
--- a/deepmd/pt/model/descriptor/repformer_layer_old_impl.py
+++ /dev/null
@@ -1,744 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Callable,
-)
-
-import torch
-
-from deepmd.pt.model.network.network import (
-    SimpleLinear,
-)
-from deepmd.pt.utils import (
-    env,
-)
-from deepmd.pt.utils.utils import (
-    ActivationFn,
-)
-
-
-def _make_nei_g1(
-    g1_ext: torch.Tensor,
-    nlist: torch.Tensor,
-) -> torch.Tensor:
-    # nlist: nb x nloc x nnei
-    nb, nloc, nnei = nlist.shape
-    # g1_ext: nb x nall x ng1
-    ng1 = g1_ext.shape[-1]
-    # index: nb x (nloc x nnei) x ng1
-    index = nlist.reshape(nb, nloc * nnei).unsqueeze(-1).expand(-1, -1, ng1)
-    # gg1  : nb x (nloc x nnei) x ng1
-    gg1 = torch.gather(g1_ext, dim=1, index=index)
-    # gg1  : nb x nloc x nnei x ng1
-    gg1 = gg1.view(nb, nloc, nnei, ng1)
-    return gg1
-
-
-def _apply_nlist_mask(
-    gg: torch.Tensor,
-    nlist_mask: torch.Tensor,
-) -> torch.Tensor:
-    # gg:  nf x nloc x nnei x ng
-    # msk: nf x nloc x nnei
-    return gg.masked_fill(~nlist_mask.unsqueeze(-1), 0.0)
-
-
-def _apply_switch(gg: torch.Tensor, sw: torch.Tensor) -> torch.Tensor:
-    # gg:  nf x nloc x nnei x ng
-    # sw:  nf x nloc x nnei
-    return gg * sw.unsqueeze(-1)
-
-
-def _apply_h_norm(
-    hh: torch.Tensor,  # nf x nloc x nnei x 3
-) -> torch.Tensor:
-    """Normalize h by the std of vector length.
-    do not have an idea if this is a good way.
-    """
-    nf, nl, nnei, _ = hh.shape
-    # nf x nloc x nnei
-    normh = torch.linalg.norm(hh, dim=-1)
-    # nf x nloc
-    std = torch.std(normh, dim=-1)
-    # nf x nloc x nnei x 3
-    hh = hh[:, :, :, :] / (1.0 + std[:, :, None, None])
-    return hh
-
-
-class Atten2Map(torch.nn.Module):
-    def __init__(
-        self,
-        ni: int,
-        nd: int,
-        nh: int,
-        has_gate: bool = False,  # apply gate to attn map
-        smooth: bool = True,
-        attnw_shift: float = 20.0,
-    ):
-        super().__init__()
-        self.ni = ni
-        self.nd = nd
-        self.nh = nh
-        self.mapqk = SimpleLinear(ni, nd * 2 * nh, bias=False)  # todo
-        self.has_gate = has_gate
-        self.smooth = smooth
-        self.attnw_shift = attnw_shift
-
-    def forward(
-        self,
-        g2: torch.Tensor,  # nb x nloc x nnei x ng2
-        h2: torch.Tensor,  # nb x nloc x nnei x 3
-        nlist_mask: torch.Tensor,  # nb x nloc x nnei
-        sw: torch.Tensor,  # nb x nloc x nnei
-    ) -> torch.Tensor:
-        (
-            nb,
-            nloc,
-            nnei,
-            _,
-        ) = g2.shape
-        nd, nh = self.nd, self.nh
-        # nb x nloc x nnei x nd x (nh x 2)
-        g2qk = self.mapqk(g2).view(nb, nloc, nnei, nd, nh * 2)
-        # nb x nloc x (nh x 2) x nnei x nd
-        g2qk = torch.permute(g2qk, (0, 1, 4, 2, 3))
-        # nb x nloc x nh x nnei x nd
-        g2q, g2k = torch.split(g2qk, nh, dim=2)
-        # g2q = torch.nn.functional.normalize(g2q, dim=-1)
-        # g2k = torch.nn.functional.normalize(g2k, dim=-1)
-        # nb x nloc x nh x nnei x nnei
-        attnw = torch.matmul(g2q, torch.transpose(g2k, -1, -2)) / nd**0.5
-        if self.has_gate:
-            gate = torch.matmul(h2, torch.transpose(h2, -1, -2)).unsqueeze(-3)
-            attnw = attnw * gate
-        # mask the attenmap, nb x nloc x 1 x 1 x nnei
-        attnw_mask = ~nlist_mask.unsqueeze(2).unsqueeze(2)
-        # mask the attenmap, nb x nloc x 1 x nnei x 1
-        attnw_mask_c = ~nlist_mask.unsqueeze(2).unsqueeze(-1)
-        if self.smooth:
-            attnw = (attnw + self.attnw_shift) * sw[:, :, None, :, None] * sw[
-                :, :, None, None, :
-            ] - self.attnw_shift
-        else:
-            attnw = attnw.masked_fill(
-                attnw_mask,
-                float("-inf"),
-            )
-        attnw = torch.softmax(attnw, dim=-1)
-        attnw = attnw.masked_fill(
-            attnw_mask,
-            0.0,
-        )
-        # nb x nloc x nh x nnei x nnei
-        attnw = attnw.masked_fill(
-            attnw_mask_c,
-            0.0,
-        )
-        if self.smooth:
-            attnw = attnw * sw[:, :, None, :, None] * sw[:, :, None, None, :]
-        # nb x nloc x nnei x nnei
-        h2h2t = torch.matmul(h2, torch.transpose(h2, -1, -2)) / 3.0**0.5
-        # nb x nloc x nh x nnei x nnei
-        ret = attnw * h2h2t[:, :, None, :, :]
-        # ret = torch.softmax(g2qk, dim=-1)
-        # nb x nloc x nnei x nnei x nh
-        ret = torch.permute(ret, (0, 1, 3, 4, 2))
-        return ret
-
-
-class Atten2MultiHeadApply(torch.nn.Module):
-    def __init__(
-        self,
-        ni: int,
-        nh: int,
-    ):
-        super().__init__()
-        self.ni = ni
-        self.nh = nh
-        self.mapv = SimpleLinear(ni, ni * nh, bias=False)
-        self.head_map = SimpleLinear(ni * nh, ni)
-
-    def forward(
-        self,
-        AA: torch.Tensor,  # nf x nloc x nnei x nnei x nh
-        g2: torch.Tensor,  # nf x nloc x nnei x ng2
-    ) -> torch.Tensor:
-        nf, nloc, nnei, ng2 = g2.shape
-        nh = self.nh
-        # nf x nloc x nnei x ng2 x nh
-        g2v = self.mapv(g2).view(nf, nloc, nnei, ng2, nh)
-        # nf x nloc x nh x nnei x ng2
-        g2v = torch.permute(g2v, (0, 1, 4, 2, 3))
-        # g2v = torch.nn.functional.normalize(g2v, dim=-1)
-        # nf x nloc x nh x nnei x nnei
-        AA = torch.permute(AA, (0, 1, 4, 2, 3))
-        # nf x nloc x nh x nnei x ng2
-        ret = torch.matmul(AA, g2v)
-        # nf x nloc x nnei x ng2 x nh
-        ret = torch.permute(ret, (0, 1, 3, 4, 2)).reshape(nf, nloc, nnei, (ng2 * nh))
-        # nf x nloc x nnei x ng2
-        return self.head_map(ret)
-
-
-class Atten2EquiVarApply(torch.nn.Module):
-    def __init__(
-        self,
-        ni: int,
-        nh: int,
-    ):
-        super().__init__()
-        self.ni = ni
-        self.nh = nh
-        self.head_map = SimpleLinear(nh, 1, bias=False)
-
-    def forward(
-        self,
-        AA: torch.Tensor,  # nf x nloc x nnei x nnei x nh
-        h2: torch.Tensor,  # nf x nloc x nnei x 3
-    ) -> torch.Tensor:
-        nf, nloc, nnei, _ = h2.shape
-        nh = self.nh
-        # nf x nloc x nh x nnei x nnei
-        AA = torch.permute(AA, (0, 1, 4, 2, 3))
-        h2m = torch.unsqueeze(h2, dim=2)
-        # nf x nloc x nh x nnei x 3
-        h2m = torch.tile(h2m, [1, 1, nh, 1, 1])
-        # nf x nloc x nh x nnei x 3
-        ret = torch.matmul(AA, h2m)
-        # nf x nloc x nnei x 3 x nh
-        ret = torch.permute(ret, (0, 1, 3, 4, 2)).view(nf, nloc, nnei, 3, nh)
-        # nf x nloc x nnei x 3
-        return torch.squeeze(self.head_map(ret), dim=-1)
-
-
-class LocalAtten(torch.nn.Module):
-    def __init__(
-        self,
-        ni: int,
-        nd: int,
-        nh: int,
-        smooth: bool = True,
-        attnw_shift: float = 20.0,
-    ):
-        super().__init__()
-        self.ni = ni
-        self.nd = nd
-        self.nh = nh
-        self.mapq = SimpleLinear(ni, nd * 1 * nh, bias=False)
-        self.mapkv = SimpleLinear(ni, (nd + ni) * nh, bias=False)
-        self.head_map = SimpleLinear(ni * nh, ni)
-        self.smooth = smooth
-        self.attnw_shift = attnw_shift
-
-    def forward(
-        self,
-        g1: torch.Tensor,  # nb x nloc x ng1
-        gg1: torch.Tensor,  # nb x nloc x nnei x ng1
-        nlist_mask: torch.Tensor,  # nb x nloc x nnei
-        sw: torch.Tensor,  # nb x nloc x nnei
-    ) -> torch.Tensor:
-        nb, nloc, nnei = nlist_mask.shape
-        ni, nd, nh = self.ni, self.nd, self.nh
-        assert ni == g1.shape[-1]
-        assert ni == gg1.shape[-1]
-        # nb x nloc x nd x nh
-        g1q = self.mapq(g1).view(nb, nloc, nd, nh)
-        # nb x nloc x nh x nd
-        g1q = torch.permute(g1q, (0, 1, 3, 2))
-        # nb x nloc x nnei x (nd+ni) x nh
-        gg1kv = self.mapkv(gg1).view(nb, nloc, nnei, nd + ni, nh)
-        gg1kv = torch.permute(gg1kv, (0, 1, 4, 2, 3))
-        # nb x nloc x nh x nnei x nd, nb x nloc x nh x nnei x ng1
-        gg1k, gg1v = torch.split(gg1kv, [nd, ni], dim=-1)
-
-        # nb x nloc x nh x 1 x nnei
-        attnw = torch.matmul(g1q.unsqueeze(-2), torch.transpose(gg1k, -1, -2)) / nd**0.5
-        # nb x nloc x nh x nnei
-        attnw = attnw.squeeze(-2)
-        # mask the attenmap, nb x nloc x 1 x nnei
-        attnw_mask = ~nlist_mask.unsqueeze(-2)
-        # nb x nloc x nh x nnei
-        if self.smooth:
-            attnw = (attnw + self.attnw_shift) * sw.unsqueeze(-2) - self.attnw_shift
-        else:
-            attnw = attnw.masked_fill(
-                attnw_mask,
-                float("-inf"),
-            )
-        attnw = torch.softmax(attnw, dim=-1)
-        attnw = attnw.masked_fill(
-            attnw_mask,
-            0.0,
-        )
-        if self.smooth:
-            attnw = attnw * sw.unsqueeze(-2)
-
-        # nb x nloc x nh x ng1
-        ret = (
-            torch.matmul(attnw.unsqueeze(-2), gg1v).squeeze(-2).view(nb, nloc, nh * ni)
-        )
-        # nb x nloc x ng1
-        ret = self.head_map(ret)
-        return ret
-
-
-class RepformerLayer(torch.nn.Module):
-    def __init__(
-        self,
-        rcut,
-        rcut_smth,
-        sel: int,
-        ntypes: int,
-        g1_dim=128,
-        g2_dim=16,
-        axis_neuron: int = 4,
-        update_chnnl_2: bool = True,
-        do_bn_mode: str = "no",
-        bn_momentum: float = 0.1,
-        update_g1_has_conv: bool = True,
-        update_g1_has_drrd: bool = True,
-        update_g1_has_grrg: bool = True,
-        update_g1_has_attn: bool = True,
-        update_g2_has_g1g1: bool = True,
-        update_g2_has_attn: bool = True,
-        update_h2: bool = False,
-        attn1_hidden: int = 64,
-        attn1_nhead: int = 4,
-        attn2_hidden: int = 16,
-        attn2_nhead: int = 4,
-        attn2_has_gate: bool = False,
-        activation_function: str = "tanh",
-        update_style: str = "res_avg",
-        set_davg_zero: bool = True,  # TODO
-        smooth: bool = True,
-    ):
-        super().__init__()
-        self.epsilon = 1e-4  # protection of 1./nnei
-        self.rcut = rcut
-        self.rcut_smth = rcut_smth
-        self.ntypes = ntypes
-        sel = [sel] if isinstance(sel, int) else sel
-        self.nnei = sum(sel)
-        assert len(sel) == 1
-        self.sel = torch.tensor(sel, device=env.DEVICE)  # pylint: disable=no-explicit-dtype
-        self.sec = self.sel
-        self.axis_neuron = axis_neuron
-        self.set_davg_zero = set_davg_zero
-        self.do_bn_mode = do_bn_mode
-        self.bn_momentum = bn_momentum
-        self.act = ActivationFn(activation_function)
-        self.update_g1_has_grrg = update_g1_has_grrg
-        self.update_g1_has_drrd = update_g1_has_drrd
-        self.update_g1_has_conv = update_g1_has_conv
-        self.update_g1_has_attn = update_g1_has_attn
-        self.update_chnnl_2 = update_chnnl_2
-        self.update_g2_has_g1g1 = update_g2_has_g1g1 if self.update_chnnl_2 else False
-        self.update_g2_has_attn = update_g2_has_attn if self.update_chnnl_2 else False
-        self.update_h2 = update_h2 if self.update_chnnl_2 else False
-        del update_g2_has_g1g1, update_g2_has_attn, update_h2
-        self.update_style = update_style
-        self.smooth = smooth
-        self.g1_dim = g1_dim
-        self.g2_dim = g2_dim
-
-        g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_neuron)
-        self.linear1 = SimpleLinear(g1_in_dim, g1_dim)
-        self.linear2 = None
-        self.proj_g1g2 = None
-        self.proj_g1g1g2 = None
-        self.attn2g_map = None
-        self.attn2_mh_apply = None
-        self.attn2_lm = None
-        self.attn2h_map = None
-        self.attn2_ev_apply = None
-        self.loc_attn = None
-
-        if self.update_chnnl_2:
-            self.linear2 = SimpleLinear(g2_dim, g2_dim)
-        if self.update_g1_has_conv:
-            self.proj_g1g2 = SimpleLinear(g1_dim, g2_dim, bias=False)
-        if self.update_g2_has_g1g1:
-            self.proj_g1g1g2 = SimpleLinear(g1_dim, g2_dim, bias=False)
-        if self.update_g2_has_attn:
-            self.attn2g_map = Atten2Map(
-                g2_dim, attn2_hidden, attn2_nhead, attn2_has_gate, self.smooth
-            )
-            self.attn2_mh_apply = Atten2MultiHeadApply(g2_dim, attn2_nhead)
-            self.attn2_lm = torch.nn.LayerNorm(
-                g2_dim,
-                elementwise_affine=True,
-                device=env.DEVICE,
-                dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-            )
-        if self.update_h2:
-            self.attn2h_map = Atten2Map(
-                g2_dim, attn2_hidden, attn2_nhead, attn2_has_gate, self.smooth
-            )
-            self.attn2_ev_apply = Atten2EquiVarApply(g2_dim, attn2_nhead)
-        if self.update_g1_has_attn:
-            self.loc_attn = LocalAtten(g1_dim, attn1_hidden, attn1_nhead, self.smooth)
-
-        if self.do_bn_mode == "uniform":
-            self.bn1 = self._bn_layer()
-            self.bn2 = self._bn_layer()
-        elif self.do_bn_mode == "component":
-            self.bn1 = self._bn_layer(nf=g1_dim)
-            self.bn2 = self._bn_layer(nf=g2_dim)
-        elif self.do_bn_mode == "no":
-            self.bn1, self.bn2 = None, None
-        else:
-            raise RuntimeError(f"unknown bn_mode {self.do_bn_mode}")
-
-    def cal_1_dim(self, g1d: int, g2d: int, ax: int) -> int:
-        ret = g1d
-        if self.update_g1_has_grrg:
-            ret += g2d * ax
-        if self.update_g1_has_drrd:
-            ret += g1d * ax
-        if self.update_g1_has_conv:
-            ret += g2d
-        return ret
-
-    def _update_h2(
-        self,
-        g2: torch.Tensor,
-        h2: torch.Tensor,
-        nlist_mask: torch.Tensor,
-        sw: torch.Tensor,
-    ) -> torch.Tensor:
-        assert self.attn2h_map is not None
-        assert self.attn2_ev_apply is not None
-        nb, nloc, nnei, _ = g2.shape
-        # # nb x nloc x nnei x nh2
-        # h2_1 = self.attn2_ev_apply(AA, h2)
-        # h2_update.append(h2_1)
-        # nb x nloc x nnei x nnei x nh
-        AAh = self.attn2h_map(g2, h2, nlist_mask, sw)
-        # nb x nloc x nnei x nh2
-        h2_1 = self.attn2_ev_apply(AAh, h2)
-        return h2_1
-
-    def _update_g1_conv(
-        self,
-        gg1: torch.Tensor,
-        g2: torch.Tensor,
-        nlist_mask: torch.Tensor,
-        sw: torch.Tensor,
-    ) -> torch.Tensor:
-        assert self.proj_g1g2 is not None
-        nb, nloc, nnei, _ = g2.shape
-        ng1 = gg1.shape[-1]
-        ng2 = g2.shape[-1]
-        # gg1  : nb x nloc x nnei x ng2
-        gg1 = self.proj_g1g2(gg1).view(nb, nloc, nnei, ng2)
-        # nb x nloc x nnei x ng2
-        gg1 = _apply_nlist_mask(gg1, nlist_mask)
-        if not self.smooth:
-            # normalized by number of neighbors, not smooth
-            # nb x nloc x 1
-            invnnei = 1.0 / (
-                self.epsilon + torch.sum(nlist_mask.type_as(gg1), dim=-1)
-            ).unsqueeze(-1)
-        else:
-            gg1 = _apply_switch(gg1, sw)
-            invnnei = (1.0 / float(nnei)) * torch.ones(
-                (nb, nloc, 1), dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=gg1.device
-            )
-        # nb x nloc x ng2
-        g1_11 = torch.sum(g2 * gg1, dim=2) * invnnei
-        return g1_11
-
-    def _cal_h2g2(
-        self,
-        g2: torch.Tensor,
-        h2: torch.Tensor,
-        nlist_mask: torch.Tensor,
-        sw: torch.Tensor,
-    ) -> torch.Tensor:
-        # g2:  nf x nloc x nnei x ng2
-        # h2:  nf x nloc x nnei x 3
-        # msk: nf x nloc x nnei
-        nb, nloc, nnei, _ = g2.shape
-        ng2 = g2.shape[-1]
-        # nb x nloc x nnei x ng2
-        g2 = _apply_nlist_mask(g2, nlist_mask)
-        if not self.smooth:
-            # nb x nloc
-            invnnei = 1.0 / (self.epsilon + torch.sum(nlist_mask.type_as(g2), dim=-1))
-            # nb x nloc x 1 x 1
-            invnnei = invnnei.unsqueeze(-1).unsqueeze(-1)
-        else:
-            g2 = _apply_switch(g2, sw)
-            invnnei = (1.0 / float(nnei)) * torch.ones(
-                (nb, nloc, 1, 1), dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=g2.device
-            )
-        # nb x nloc x 3 x ng2
-        h2g2 = torch.matmul(torch.transpose(h2, -1, -2), g2) * invnnei
-        return h2g2
-
-    def _cal_grrg(self, h2g2: torch.Tensor) -> torch.Tensor:
-        # nb x nloc x 3 x ng2
-        nb, nloc, _, ng2 = h2g2.shape
-        # nb x nloc x 3 x axis
-        h2g2m = torch.split(h2g2, self.axis_neuron, dim=-1)[0]
-        # nb x nloc x axis x ng2
-        g1_13 = torch.matmul(torch.transpose(h2g2m, -1, -2), h2g2) / (3.0**1)
-        # nb x nloc x (axisxng2)
-        g1_13 = g1_13.view(nb, nloc, self.axis_neuron * ng2)
-        return g1_13
-
-    def _update_g1_grrg(
-        self,
-        g2: torch.Tensor,
-        h2: torch.Tensor,
-        nlist_mask: torch.Tensor,
-        sw: torch.Tensor,
-    ) -> torch.Tensor:
-        # g2:  nf x nloc x nnei x ng2
-        # h2:  nf x nloc x nnei x 3
-        # msk: nf x nloc x nnei
-        nb, nloc, nnei, _ = g2.shape
-        ng2 = g2.shape[-1]
-        # nb x nloc x 3 x ng2
-        h2g2 = self._cal_h2g2(g2, h2, nlist_mask, sw)
-        # nb x nloc x (axisxng2)
-        g1_13 = self._cal_grrg(h2g2)
-        return g1_13
-
-    def _update_g2_g1g1(
-        self,
-        g1: torch.Tensor,  # nb x nloc x ng1
-        gg1: torch.Tensor,  # nb x nloc x nnei x ng1
-        nlist_mask: torch.Tensor,  # nb x nloc x nnei
-        sw: torch.Tensor,  # nb x nloc x nnei
-    ) -> torch.Tensor:
-        ret = g1.unsqueeze(-2) * gg1
-        # nb x nloc x nnei x ng1
-        ret = _apply_nlist_mask(ret, nlist_mask)
-        if self.smooth:
-            ret = _apply_switch(ret, sw)
-        return ret
-
-    def _apply_bn(
-        self,
-        bn_number: int,
-        gg: torch.Tensor,
-    ):
-        if self.do_bn_mode == "uniform":
-            return self._apply_bn_uni(bn_number, gg)
-        elif self.do_bn_mode == "component":
-            return self._apply_bn_comp(bn_number, gg)
-        else:
-            return gg
-
-    def _apply_nb_1(self, bn_number: int, gg: torch.Tensor) -> torch.Tensor:
-        nb, nl, nf = gg.shape
-        gg = gg.view([nb, 1, nl * nf])
-        if bn_number == 1:
-            assert self.bn1 is not None
-            gg = self.bn1(gg)
-        else:
-            assert self.bn2 is not None
-            gg = self.bn2(gg)
-        return gg.view([nb, nl, nf])
-
-    def _apply_nb_2(
-        self,
-        bn_number: int,
-        gg: torch.Tensor,
-    ) -> torch.Tensor:
-        nb, nl, nnei, nf = gg.shape
-        gg = gg.view([nb, 1, nl * nnei * nf])
-        if bn_number == 1:
-            assert self.bn1 is not None
-            gg = self.bn1(gg)
-        else:
-            assert self.bn2 is not None
-            gg = self.bn2(gg)
-        return gg.view([nb, nl, nnei, nf])
-
-    def _apply_bn_uni(
-        self,
-        bn_number: int,
-        gg: torch.Tensor,
-        mode: str = "1",
-    ) -> torch.Tensor:
-        if len(gg.shape) == 3:
-            return self._apply_nb_1(bn_number, gg)
-        elif len(gg.shape) == 4:
-            return self._apply_nb_2(bn_number, gg)
-        else:
-            raise RuntimeError(f"unsupported input shape {gg.shape}")
-
-    def _apply_bn_comp(
-        self,
-        bn_number: int,
-        gg: torch.Tensor,
-    ) -> torch.Tensor:
-        ss = gg.shape
-        nf = ss[-1]
-        gg = gg.view([-1, nf])
-        if bn_number == 1:
-            assert self.bn1 is not None
-            gg = self.bn1(gg).view(ss)
-        else:
-            assert self.bn2 is not None
-            gg = self.bn2(gg).view(ss)
-        return gg
-
-    def forward(
-        self,
-        g1_ext: torch.Tensor,  # nf x nall x ng1
-        g2: torch.Tensor,  # nf x nloc x nnei x ng2
-        h2: torch.Tensor,  # nf x nloc x nnei x 3
-        nlist: torch.Tensor,  # nf x nloc x nnei
-        nlist_mask: torch.Tensor,  # nf x nloc x nnei
-        sw: torch.Tensor,  # switch func, nf x nloc x nnei
-    ):
-        """
-        Parameters
-        ----------
-        g1_ext : nf x nall x ng1         extended single-atom chanel
-        g2 : nf x nloc x nnei x ng2  pair-atom channel, invariant
-        h2 : nf x nloc x nnei x 3    pair-atom channel, equivariant
-        nlist : nf x nloc x nnei        neighbor list (padded neis are set to 0)
-        nlist_mask : nf x nloc x nnei   masks of the neighbor list. real nei 1 otherwise 0
-        sw : nf x nloc x nnei        switch function
-
-        Returns
-        -------
-        g1:     nf x nloc x ng1         updated single-atom chanel
-        g2:     nf x nloc x nnei x ng2  updated pair-atom channel, invariant
-        h2:     nf x nloc x nnei x 3    updated pair-atom channel, equivariant
-        """
-        cal_gg1 = (
-            self.update_g1_has_drrd
-            or self.update_g1_has_conv
-            or self.update_g1_has_attn
-            or self.update_g2_has_g1g1
-        )
-
-        nb, nloc, nnei, _ = g2.shape
-        nall = g1_ext.shape[1]
-        g1, _ = torch.split(g1_ext, [nloc, nall - nloc], dim=1)
-        assert (nb, nloc) == g1.shape[:2]
-        assert (nb, nloc, nnei) == h2.shape[:3]
-        ng1 = g1.shape[-1]
-        ng2 = g2.shape[-1]
-        nh2 = h2.shape[-1]
-
-        if self.bn1 is not None:
-            g1 = self._apply_bn(1, g1)
-        if self.bn2 is not None:
-            g2 = self._apply_bn(2, g2)
-        if self.update_h2:
-            h2 = _apply_h_norm(h2)
-
-        g2_update: list[torch.Tensor] = [g2]
-        h2_update: list[torch.Tensor] = [h2]
-        g1_update: list[torch.Tensor] = [g1]
-        g1_mlp: list[torch.Tensor] = [g1]
-
-        if cal_gg1:
-            gg1 = _make_nei_g1(g1_ext, nlist)
-        else:
-            gg1 = None
-
-        if self.update_chnnl_2:
-            # nb x nloc x nnei x ng2
-            assert self.linear2 is not None
-            g2_1 = self.act(self.linear2(g2))
-            g2_update.append(g2_1)
-
-            if self.update_g2_has_g1g1:
-                assert gg1 is not None
-                assert self.proj_g1g1g2 is not None
-                g2_update.append(
-                    self.proj_g1g1g2(self._update_g2_g1g1(g1, gg1, nlist_mask, sw))
-                )
-
-            if self.update_g2_has_attn:
-                assert self.attn2g_map is not None
-                assert self.attn2_mh_apply is not None
-                assert self.attn2_lm is not None
-                # nb x nloc x nnei x nnei x nh
-                AAg = self.attn2g_map(g2, h2, nlist_mask, sw)
-                # nb x nloc x nnei x ng2
-                g2_2 = self.attn2_mh_apply(AAg, g2)
-                g2_2 = self.attn2_lm(g2_2)
-                g2_update.append(g2_2)
-
-            if self.update_h2:
-                h2_update.append(self._update_h2(g2, h2, nlist_mask, sw))
-
-        if self.update_g1_has_conv:
-            assert gg1 is not None
-            g1_mlp.append(self._update_g1_conv(gg1, g2, nlist_mask, sw))
-
-        if self.update_g1_has_grrg:
-            g1_mlp.append(self._update_g1_grrg(g2, h2, nlist_mask, sw))
-
-        if self.update_g1_has_drrd:
-            assert gg1 is not None
-            g1_mlp.append(self._update_g1_grrg(gg1, h2, nlist_mask, sw))
-
-        # nb x nloc x [ng1+ng2+(axisxng2)+(axisxng1)]
-        #                  conv   grrg      drrd
-        g1_1 = self.act(self.linear1(torch.cat(g1_mlp, dim=-1)))
-        g1_update.append(g1_1)
-
-        if self.update_g1_has_attn:
-            assert gg1 is not None
-            assert self.loc_attn is not None
-            g1_update.append(self.loc_attn(g1, gg1, nlist_mask, sw))
-
-        # update
-        if self.update_chnnl_2:
-            g2_new = self.list_update(g2_update)
-            h2_new = self.list_update(h2_update)
-        else:
-            g2_new, h2_new = g2, h2
-        g1_new = self.list_update(g1_update)
-        return g1_new, g2_new, h2_new
-
-    @torch.jit.export
-    def list_update_res_avg(
-        self,
-        update_list: list[torch.Tensor],
-    ) -> torch.Tensor:
-        nitem = len(update_list)
-        uu = update_list[0]
-        for ii in range(1, nitem):
-            uu = uu + update_list[ii]
-        return uu / (float(nitem) ** 0.5)
-
-    @torch.jit.export
-    def list_update_res_incr(self, update_list: list[torch.Tensor]) -> torch.Tensor:
-        nitem = len(update_list)
-        uu = update_list[0]
-        scale = 1.0 / (float(nitem - 1) ** 0.5) if nitem > 1 else 0.0
-        for ii in range(1, nitem):
-            uu = uu + scale * update_list[ii]
-        return uu
-
-    @torch.jit.export
-    def list_update(self, update_list: list[torch.Tensor]) -> torch.Tensor:
-        if self.update_style == "res_avg":
-            return self.list_update_res_avg(update_list)
-        elif self.update_style == "res_incr":
-            return self.list_update_res_incr(update_list)
-        else:
-            raise RuntimeError(f"unknown update style {self.update_style}")
-
-    def _bn_layer(
-        self,
-        nf: int = 1,
-    ) -> Callable:
-        return torch.nn.BatchNorm1d(
-            nf,
-            eps=1e-5,
-            momentum=self.bn_momentum,
-            affine=False,
-            track_running_stats=True,
-            device=env.DEVICE,
-            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-        )
diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
index 64965825a0..ad4ead4d74 100644
--- a/deepmd/pt/model/descriptor/repformers.py
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -41,7 +41,6 @@
 from .repformer_layer import (
     RepformerLayer,
 )
-from .repformer_layer_old_impl import RepformerLayer as RepformerLayerOld
 
 if not hasattr(torch.ops.deepmd, "border_op"):
 
@@ -106,7 +105,6 @@ def __init__(
         use_sqrt_nnei: bool = True,
         g1_out_conv: bool = True,
         g1_out_mlp: bool = True,
-        old_impl: bool = False,
     ):
         r"""
         The repformer descriptor block.
@@ -240,78 +238,48 @@ def __init__(
         self.ln_eps = ln_eps
         self.epsilon = 1e-4
         self.seed = seed
-        self.old_impl = old_impl
 
         self.g2_embd = MLPLayer(
             1, self.g2_dim, precision=precision, seed=child_seed(seed, 0)
         )
         layers = []
         for ii in range(nlayers):
-            if self.old_impl:
-                layers.append(
-                    RepformerLayerOld(
-                        self.rcut,
-                        self.rcut_smth,
-                        self.sel,
-                        self.ntypes,
-                        self.g1_dim,
-                        self.g2_dim,
-                        axis_neuron=self.axis_neuron,
-                        update_chnnl_2=(ii != nlayers - 1),
-                        update_g1_has_conv=self.update_g1_has_conv,
-                        update_g1_has_drrd=self.update_g1_has_drrd,
-                        update_g1_has_grrg=self.update_g1_has_grrg,
-                        update_g1_has_attn=self.update_g1_has_attn,
-                        update_g2_has_g1g1=self.update_g2_has_g1g1,
-                        update_g2_has_attn=self.update_g2_has_attn,
-                        update_h2=self.update_h2,
-                        attn1_hidden=self.attn1_hidden,
-                        attn1_nhead=self.attn1_nhead,
-                        attn2_has_gate=self.attn2_has_gate,
-                        attn2_hidden=self.attn2_hidden,
-                        attn2_nhead=self.attn2_nhead,
-                        activation_function=self.activation_function,
-                        update_style=self.update_style,
-                        smooth=self.smooth,
-                    )
-                )
-            else:
-                layers.append(
-                    RepformerLayer(
-                        self.rcut,
-                        self.rcut_smth,
-                        self.sel,
-                        self.ntypes,
-                        self.g1_dim,
-                        self.g2_dim,
-                        axis_neuron=self.axis_neuron,
-                        update_chnnl_2=(ii != nlayers - 1),
-                        update_g1_has_conv=self.update_g1_has_conv,
-                        update_g1_has_drrd=self.update_g1_has_drrd,
-                        update_g1_has_grrg=self.update_g1_has_grrg,
-                        update_g1_has_attn=self.update_g1_has_attn,
-                        update_g2_has_g1g1=self.update_g2_has_g1g1,
-                        update_g2_has_attn=self.update_g2_has_attn,
-                        update_h2=self.update_h2,
-                        attn1_hidden=self.attn1_hidden,
-                        attn1_nhead=self.attn1_nhead,
-                        attn2_has_gate=self.attn2_has_gate,
-                        attn2_hidden=self.attn2_hidden,
-                        attn2_nhead=self.attn2_nhead,
-                        activation_function=self.activation_function,
-                        update_style=self.update_style,
-                        update_residual=self.update_residual,
-                        update_residual_init=self.update_residual_init,
-                        smooth=self.smooth,
-                        trainable_ln=self.trainable_ln,
-                        ln_eps=self.ln_eps,
-                        precision=precision,
-                        use_sqrt_nnei=self.use_sqrt_nnei,
-                        g1_out_conv=self.g1_out_conv,
-                        g1_out_mlp=self.g1_out_mlp,
-                        seed=child_seed(child_seed(seed, 1), ii),
-                    )
+            layers.append(
+                RepformerLayer(
+                    self.rcut,
+                    self.rcut_smth,
+                    self.sel,
+                    self.ntypes,
+                    self.g1_dim,
+                    self.g2_dim,
+                    axis_neuron=self.axis_neuron,
+                    update_chnnl_2=(ii != nlayers - 1),
+                    update_g1_has_conv=self.update_g1_has_conv,
+                    update_g1_has_drrd=self.update_g1_has_drrd,
+                    update_g1_has_grrg=self.update_g1_has_grrg,
+                    update_g1_has_attn=self.update_g1_has_attn,
+                    update_g2_has_g1g1=self.update_g2_has_g1g1,
+                    update_g2_has_attn=self.update_g2_has_attn,
+                    update_h2=self.update_h2,
+                    attn1_hidden=self.attn1_hidden,
+                    attn1_nhead=self.attn1_nhead,
+                    attn2_has_gate=self.attn2_has_gate,
+                    attn2_hidden=self.attn2_hidden,
+                    attn2_nhead=self.attn2_nhead,
+                    activation_function=self.activation_function,
+                    update_style=self.update_style,
+                    update_residual=self.update_residual,
+                    update_residual_init=self.update_residual_init,
+                    smooth=self.smooth,
+                    trainable_ln=self.trainable_ln,
+                    ln_eps=self.ln_eps,
+                    precision=precision,
+                    use_sqrt_nnei=self.use_sqrt_nnei,
+                    g1_out_conv=self.g1_out_conv,
+                    g1_out_mlp=self.g1_out_mlp,
+                    seed=child_seed(child_seed(seed, 1), ii),
                 )
+            )
         self.layers = torch.nn.ModuleList(layers)
 
         wanted_shape = (self.ntypes, self.nnei, 4)
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index 1b51acfa21..e939a2541b 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -55,9 +55,6 @@
     EmbeddingNet,
     NetworkCollection,
 )
-from deepmd.pt.model.network.network import (
-    TypeFilter,
-)
 from deepmd.pt.utils.exclude_mask import (
     PairExcludeMask,
 )
@@ -83,7 +80,6 @@ def __init__(
         resnet_dt: bool = False,
         exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
-        old_impl: bool = False,
         type_one_side: bool = True,
         trainable: bool = True,
         seed: Optional[Union[int, list[int]]] = None,
@@ -109,7 +105,6 @@ def __init__(
             resnet_dt=resnet_dt,
             exclude_types=exclude_types,
             env_protection=env_protection,
-            old_impl=old_impl,
             type_one_side=type_one_side,
             trainable=trainable,
             seed=seed,
@@ -385,7 +380,6 @@ def __init__(
         resnet_dt: bool = False,
         exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
-        old_impl: bool = False,
         type_one_side: bool = True,
         trainable: bool = True,
         seed: Optional[Union[int, list[int]]] = None,
@@ -411,7 +405,6 @@ def __init__(
         self.precision = precision
         self.prec = PRECISION_DICT[self.precision]
         self.resnet_dt = resnet_dt
-        self.old_impl = old_impl
         self.env_protection = env_protection
         self.ntypes = len(sel)
         self.type_one_side = type_one_side
@@ -431,39 +424,23 @@ def __init__(
         stddev = torch.ones(wanted_shape, dtype=self.prec, device=env.DEVICE)
         self.register_buffer("mean", mean)
         self.register_buffer("stddev", stddev)
-        self.filter_layers_old = None
-        self.filter_layers = None
-
-        if self.old_impl:
-            if not self.type_one_side:
-                raise ValueError(
-                    "The old implementation does not support type_one_side=False."
-                )
-            filter_layers = []
-            # TODO: remove
-            start_index = 0
-            for type_i in range(self.ntypes):
-                one = TypeFilter(start_index, sel[type_i], self.filter_neuron)
-                filter_layers.append(one)
-                start_index += sel[type_i]
-            self.filter_layers_old = torch.nn.ModuleList(filter_layers)
-        else:
-            ndim = 1 if self.type_one_side else 2
-            filter_layers = NetworkCollection(
-                ndim=ndim, ntypes=len(sel), network_type="embedding_network"
+
+        ndim = 1 if self.type_one_side else 2
+        filter_layers = NetworkCollection(
+            ndim=ndim, ntypes=len(sel), network_type="embedding_network"
+        )
+        for ii, embedding_idx in enumerate(
+            itertools.product(range(self.ntypes), repeat=ndim)
+        ):
+            filter_layers[embedding_idx] = EmbeddingNet(
+                1,
+                self.filter_neuron,
+                activation_function=self.activation_function,
+                precision=self.precision,
+                resnet_dt=self.resnet_dt,
+                seed=child_seed(self.seed, ii),
             )
-            for ii, embedding_idx in enumerate(
-                itertools.product(range(self.ntypes), repeat=ndim)
-            ):
-                filter_layers[embedding_idx] = EmbeddingNet(
-                    1,
-                    self.filter_neuron,
-                    activation_function=self.activation_function,
-                    precision=self.precision,
-                    resnet_dt=self.resnet_dt,
-                    seed=child_seed(self.seed, ii),
-                )
-            self.filter_layers = filter_layers
+        self.filter_layers = filter_layers
         self.stats = None
         # set trainable
         for param in self.parameters():
@@ -632,66 +609,49 @@ def forward(
             protection=self.env_protection,
         )
 
-        if self.old_impl:
-            assert self.filter_layers_old is not None
-            dmatrix = dmatrix.view(
-                -1, self.ndescrpt
-            )  # shape is [nframes*nall, self.ndescrpt]
-            xyz_scatter = torch.empty(  # pylint: disable=no-explicit-dtype
-                1,
-                device=env.DEVICE,
-            )
-            ret = self.filter_layers_old[0](dmatrix)
-            xyz_scatter = ret
-            for ii, transform in enumerate(self.filter_layers_old[1:]):
-                # shape is [nframes*nall, 4, self.filter_neuron[-1]]
-                ret = transform.forward(dmatrix)
-                xyz_scatter = xyz_scatter + ret
-        else:
-            assert self.filter_layers is not None
-            dmatrix = dmatrix.view(-1, self.nnei, 4)
-            dmatrix = dmatrix.to(dtype=self.prec)
-            nfnl = dmatrix.shape[0]
-            # pre-allocate a shape to pass jit
-            xyz_scatter = torch.zeros(
-                [nfnl, 4, self.filter_neuron[-1]],
-                dtype=self.prec,
-                device=extended_coord.device,
-            )
-            # nfnl x nnei
-            exclude_mask = self.emask(nlist, extended_atype).view(nfnl, self.nnei)
-            for embedding_idx, ll in enumerate(self.filter_layers.networks):
-                if self.type_one_side:
-                    ii = embedding_idx
-                    # torch.jit is not happy with slice(None)
-                    # ti_mask = torch.ones(nfnl, dtype=torch.bool, device=dmatrix.device)
-                    # applying a mask seems to cause performance degradation
-                    ti_mask = None
-                else:
-                    # ti: center atom type, ii: neighbor type...
-                    ii = embedding_idx // self.ntypes
-                    ti = embedding_idx % self.ntypes
-                    ti_mask = atype.ravel().eq(ti)
-                # nfnl x nt
-                if ti_mask is not None:
-                    mm = exclude_mask[ti_mask, self.sec[ii] : self.sec[ii + 1]]
-                else:
-                    mm = exclude_mask[:, self.sec[ii] : self.sec[ii + 1]]
-                # nfnl x nt x 4
-                if ti_mask is not None:
-                    rr = dmatrix[ti_mask, self.sec[ii] : self.sec[ii + 1], :]
-                else:
-                    rr = dmatrix[:, self.sec[ii] : self.sec[ii + 1], :]
-                rr = rr * mm[:, :, None]
-                ss = rr[:, :, :1]
-                # nfnl x nt x ng
-                gg = ll.forward(ss)
-                # nfnl x 4 x ng
-                gr = torch.matmul(rr.permute(0, 2, 1), gg)
-                if ti_mask is not None:
-                    xyz_scatter[ti_mask] += gr
-                else:
-                    xyz_scatter += gr
+        dmatrix = dmatrix.view(-1, self.nnei, 4)
+        dmatrix = dmatrix.to(dtype=self.prec)
+        nfnl = dmatrix.shape[0]
+        # pre-allocate a shape to pass jit
+        xyz_scatter = torch.zeros(
+            [nfnl, 4, self.filter_neuron[-1]],
+            dtype=self.prec,
+            device=extended_coord.device,
+        )
+        # nfnl x nnei
+        exclude_mask = self.emask(nlist, extended_atype).view(nfnl, self.nnei)
+        for embedding_idx, ll in enumerate(self.filter_layers.networks):
+            if self.type_one_side:
+                ii = embedding_idx
+                # torch.jit is not happy with slice(None)
+                # ti_mask = torch.ones(nfnl, dtype=torch.bool, device=dmatrix.device)
+                # applying a mask seems to cause performance degradation
+                ti_mask = None
+            else:
+                # ti: center atom type, ii: neighbor type...
+                ii = embedding_idx // self.ntypes
+                ti = embedding_idx % self.ntypes
+                ti_mask = atype.ravel().eq(ti)
+            # nfnl x nt
+            if ti_mask is not None:
+                mm = exclude_mask[ti_mask, self.sec[ii] : self.sec[ii + 1]]
+            else:
+                mm = exclude_mask[:, self.sec[ii] : self.sec[ii + 1]]
+            # nfnl x nt x 4
+            if ti_mask is not None:
+                rr = dmatrix[ti_mask, self.sec[ii] : self.sec[ii + 1], :]
+            else:
+                rr = dmatrix[:, self.sec[ii] : self.sec[ii + 1], :]
+            rr = rr * mm[:, :, None]
+            ss = rr[:, :, :1]
+            # nfnl x nt x ng
+            gg = ll.forward(ss)
+            # nfnl x 4 x ng
+            gr = torch.matmul(rr.permute(0, 2, 1), gg)
+            if ti_mask is not None:
+                xyz_scatter[ti_mask] += gr
+            else:
+                xyz_scatter += gr
 
         xyz_scatter /= self.nnei
         xyz_scatter_1 = xyz_scatter.permute(0, 2, 1)
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
index c760f7330b..c028230e9b 100644
--- a/deepmd/pt/model/descriptor/se_atten.py
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -26,10 +26,6 @@
     MLPLayer,
     NetworkCollection,
 )
-from deepmd.pt.model.network.network import (
-    NeighborWiseAttention,
-    TypeFilter,
-)
 from deepmd.pt.utils import (
     env,
 )
@@ -85,7 +81,6 @@ def __init__(
         ln_eps: Optional[float] = 1e-5,
         seed: Optional[Union[int, list[int]]] = None,
         type: Optional[str] = None,
-        old_impl: bool = False,
     ):
         r"""Construct an embedding net of type `se_atten`.
 
@@ -182,7 +177,6 @@ def __init__(
         if ln_eps is None:
             ln_eps = 1e-5
         self.ln_eps = ln_eps
-        self.old_impl = old_impl
 
         if isinstance(sel, int):
             sel = [sel]
@@ -195,40 +189,22 @@ def __init__(
         self.ndescrpt = self.nnei * 4
         # order matters, placed after the assignment of self.ntypes
         self.reinit_exclude(exclude_types)
-        if self.old_impl:
-            assert self.tebd_input_mode in [
-                "concat"
-            ], "Old implementation does not support tebd_input_mode != 'concat'."
-            self.dpa1_attention = NeighborWiseAttention(
-                self.attn_layer,
-                self.nnei,
-                self.filter_neuron[-1],
-                self.attn_dim,
-                dotr=self.attn_dotr,
-                do_mask=self.attn_mask,
-                activation=self.activation_function,
-                scaling_factor=self.scaling_factor,
-                normalize=self.normalize,
-                temperature=self.temperature,
-                smooth=self.smooth,
-            )
-        else:
-            self.dpa1_attention = NeighborGatedAttention(
-                self.attn_layer,
-                self.nnei,
-                self.filter_neuron[-1],
-                self.attn_dim,
-                dotr=self.attn_dotr,
-                do_mask=self.attn_mask,
-                scaling_factor=self.scaling_factor,
-                normalize=self.normalize,
-                temperature=self.temperature,
-                trainable_ln=self.trainable_ln,
-                ln_eps=self.ln_eps,
-                smooth=self.smooth,
-                precision=self.precision,
-                seed=child_seed(self.seed, 0),
-            )
+        self.dpa1_attention = NeighborGatedAttention(
+            self.attn_layer,
+            self.nnei,
+            self.filter_neuron[-1],
+            self.attn_dim,
+            dotr=self.attn_dotr,
+            do_mask=self.attn_mask,
+            scaling_factor=self.scaling_factor,
+            normalize=self.normalize,
+            temperature=self.temperature,
+            trainable_ln=self.trainable_ln,
+            ln_eps=self.ln_eps,
+            smooth=self.smooth,
+            precision=self.precision,
+            seed=child_seed(self.seed, 0),
+        )
 
         wanted_shape = (self.ntypes, self.nnei, 4)
         mean = torch.zeros(
@@ -245,48 +221,32 @@ def __init__(
         else:
             self.embd_input_dim = 1
 
-        self.filter_layers_old = None
-        self.filter_layers = None
         self.filter_layers_strip = None
-        if self.old_impl:
-            filter_layers = []
-            one = TypeFilter(
-                0,
-                self.nnei,
-                self.filter_neuron,
-                return_G=True,
-                tebd_dim=self.tebd_dim,
-                use_tebd=True,
-                tebd_mode=self.tebd_input_mode,
-            )
-            filter_layers.append(one)
-            self.filter_layers_old = torch.nn.ModuleList(filter_layers)
-        else:
-            filter_layers = NetworkCollection(
+        filter_layers = NetworkCollection(
+            ndim=0, ntypes=self.ntypes, network_type="embedding_network"
+        )
+        filter_layers[0] = EmbeddingNet(
+            self.embd_input_dim,
+            self.filter_neuron,
+            activation_function=self.activation_function,
+            precision=self.precision,
+            resnet_dt=self.resnet_dt,
+            seed=child_seed(self.seed, 1),
+        )
+        self.filter_layers = filter_layers
+        if self.tebd_input_mode in ["strip"]:
+            filter_layers_strip = NetworkCollection(
                 ndim=0, ntypes=self.ntypes, network_type="embedding_network"
             )
-            filter_layers[0] = EmbeddingNet(
-                self.embd_input_dim,
+            filter_layers_strip[0] = EmbeddingNet(
+                self.tebd_dim_input,
                 self.filter_neuron,
                 activation_function=self.activation_function,
                 precision=self.precision,
                 resnet_dt=self.resnet_dt,
-                seed=child_seed(self.seed, 1),
+                seed=child_seed(self.seed, 2),
             )
-            self.filter_layers = filter_layers
-            if self.tebd_input_mode in ["strip"]:
-                filter_layers_strip = NetworkCollection(
-                    ndim=0, ntypes=self.ntypes, network_type="embedding_network"
-                )
-                filter_layers_strip[0] = EmbeddingNet(
-                    self.tebd_dim_input,
-                    self.filter_neuron,
-                    activation_function=self.activation_function,
-                    precision=self.precision,
-                    resnet_dt=self.resnet_dt,
-                    seed=child_seed(self.seed, 2),
-                )
-                self.filter_layers_strip = filter_layers_strip
+            self.filter_layers_strip = filter_layers_strip
         self.stats = None
 
     def get_rcut(self) -> float:
@@ -500,75 +460,51 @@ def forward(
         sw = sw.masked_fill(~nlist_mask, 0.0)
         # (nb x nloc) x nnei
         exclude_mask = exclude_mask.view(nb * nloc, nnei)
-        if self.old_impl:
-            assert self.filter_layers_old is not None
-            dmatrix = dmatrix.view(
-                -1, self.ndescrpt
-            )  # shape is [nframes*nall, self.ndescrpt]
-            gg = self.filter_layers_old[0](
-                dmatrix,
-                atype_tebd=atype_tebd_nnei,
-                nlist_tebd=atype_tebd_nlist,
-            )  # shape is [nframes*nall, self.neei, out_size]
-            input_r = torch.nn.functional.normalize(
-                dmatrix.reshape(-1, self.nnei, 4)[:, :, 1:4], dim=-1
-            )
-            gg = self.dpa1_attention(
-                gg, nlist_mask, input_r=input_r, sw=sw
-            )  # shape is [nframes*nloc, self.neei, out_size]
-            inputs_reshape = dmatrix.view(-1, self.nnei, 4).permute(
-                0, 2, 1
-            )  # shape is [nframes*natoms[0], 4, self.neei]
-            xyz_scatter = torch.matmul(
-                inputs_reshape, gg
-            )  # shape is [nframes*natoms[0], 4, out_size]
-        else:
-            assert self.filter_layers is not None
-            # nfnl x nnei x 4
-            dmatrix = dmatrix.view(-1, self.nnei, 4)
-            nfnl = dmatrix.shape[0]
-            # nfnl x nnei x 4
-            rr = dmatrix
-            rr = rr * exclude_mask[:, :, None]
-            ss = rr[:, :, :1]
-            nlist_tebd = atype_tebd_nlist.reshape(nfnl, nnei, self.tebd_dim)
-            atype_tebd = atype_tebd_nnei.reshape(nfnl, nnei, self.tebd_dim)
-            if self.tebd_input_mode in ["concat"]:
-                if not self.type_one_side:
-                    # nfnl x nnei x (1 + tebd_dim * 2)
-                    ss = torch.concat([ss, nlist_tebd, atype_tebd], dim=2)
-                else:
-                    # nfnl x nnei x (1 + tebd_dim)
-                    ss = torch.concat([ss, nlist_tebd], dim=2)
-                # nfnl x nnei x ng
-                gg = self.filter_layers.networks[0](ss)
-            elif self.tebd_input_mode in ["strip"]:
-                # nfnl x nnei x ng
-                gg_s = self.filter_layers.networks[0](ss)
-                assert self.filter_layers_strip is not None
-                if not self.type_one_side:
-                    # nfnl x nnei x (tebd_dim * 2)
-                    tt = torch.concat([nlist_tebd, atype_tebd], dim=2)
-                else:
-                    # nfnl x nnei x tebd_dim
-                    tt = nlist_tebd
-                # nfnl x nnei x ng
-                gg_t = self.filter_layers_strip.networks[0](tt)
-                if self.smooth:
-                    gg_t = gg_t * sw.reshape(-1, self.nnei, 1)
-                # nfnl x nnei x ng
-                gg = gg_s * gg_t + gg_s
+        # nfnl x nnei x 4
+        dmatrix = dmatrix.view(-1, self.nnei, 4)
+        nfnl = dmatrix.shape[0]
+        # nfnl x nnei x 4
+        rr = dmatrix
+        rr = rr * exclude_mask[:, :, None]
+        ss = rr[:, :, :1]
+        nlist_tebd = atype_tebd_nlist.reshape(nfnl, nnei, self.tebd_dim)
+        atype_tebd = atype_tebd_nnei.reshape(nfnl, nnei, self.tebd_dim)
+        if self.tebd_input_mode in ["concat"]:
+            if not self.type_one_side:
+                # nfnl x nnei x (1 + tebd_dim * 2)
+                ss = torch.concat([ss, nlist_tebd, atype_tebd], dim=2)
+            else:
+                # nfnl x nnei x (1 + tebd_dim)
+                ss = torch.concat([ss, nlist_tebd], dim=2)
+            # nfnl x nnei x ng
+            gg = self.filter_layers.networks[0](ss)
+        elif self.tebd_input_mode in ["strip"]:
+            # nfnl x nnei x ng
+            gg_s = self.filter_layers.networks[0](ss)
+            assert self.filter_layers_strip is not None
+            if not self.type_one_side:
+                # nfnl x nnei x (tebd_dim * 2)
+                tt = torch.concat([nlist_tebd, atype_tebd], dim=2)
             else:
-                raise NotImplementedError
+                # nfnl x nnei x tebd_dim
+                tt = nlist_tebd
+            # nfnl x nnei x ng
+            gg_t = self.filter_layers_strip.networks[0](tt)
+            if self.smooth:
+                gg_t = gg_t * sw.reshape(-1, self.nnei, 1)
+            # nfnl x nnei x ng
+            gg = gg_s * gg_t + gg_s
+        else:
+            raise NotImplementedError
 
-            input_r = torch.nn.functional.normalize(
-                rr.reshape(-1, self.nnei, 4)[:, :, 1:4], dim=-1
-            )
-            gg = self.dpa1_attention(
-                gg, nlist_mask, input_r=input_r, sw=sw
-            )  # shape is [nframes*nloc, self.neei, out_size]
-            # nfnl x 4 x ng
-            xyz_scatter = torch.matmul(rr.permute(0, 2, 1), gg)
+        input_r = torch.nn.functional.normalize(
+            rr.reshape(-1, self.nnei, 4)[:, :, 1:4], dim=-1
+        )
+        gg = self.dpa1_attention(
+            gg, nlist_mask, input_r=input_r, sw=sw
+        )  # shape is [nframes*nloc, self.neei, out_size]
+        # nfnl x 4 x ng
+        xyz_scatter = torch.matmul(rr.permute(0, 2, 1), gg)
         xyz_scatter = xyz_scatter / self.nnei
         xyz_scatter_1 = xyz_scatter.permute(0, 2, 1)
         rot_mat = xyz_scatter_1[:, :, 1:4]
diff --git a/deepmd/pt/model/descriptor/se_atten_v2.py b/deepmd/pt/model/descriptor/se_atten_v2.py
index f73ff255e6..11d783261e 100644
--- a/deepmd/pt/model/descriptor/se_atten_v2.py
+++ b/deepmd/pt/model/descriptor/se_atten_v2.py
@@ -71,7 +71,6 @@ def __init__(
         # not implemented
         spin=None,
         type: Optional[str] = None,
-        old_impl: bool = False,
     ) -> None:
         r"""Construct smooth version of embedding net of type `se_atten_v2`.
 
@@ -191,7 +190,6 @@ def __init__(
             # not implemented
             spin=spin,
             type=type,
-            old_impl=old_impl,
         )
 
     def serialize(self) -> dict:
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index b873ee20b8..e82bb23dac 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -68,7 +68,6 @@ def __init__(
         resnet_dt: bool = False,
         exclude_types: list[tuple[int, int]] = [],
         env_protection: float = 0.0,
-        old_impl: bool = False,
         trainable: bool = True,
         seed: Optional[Union[int, list[int]]] = None,
         type_map: Optional[list[str]] = None,
@@ -84,7 +83,6 @@ def __init__(
         self.precision = precision
         self.prec = PRECISION_DICT[self.precision]
         self.resnet_dt = resnet_dt
-        self.old_impl = False  # this does not support old implementation.
         self.exclude_types = exclude_types
         self.ntypes = len(sel)
         self.type_map = type_map
diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py
index ef50274b03..12e1eabf22 100644
--- a/deepmd/pt/model/network/network.py
+++ b/deepmd/pt/model/network/network.py
@@ -26,10 +26,6 @@
 except ImportError:
     from torch.jit import Final
 
-from functools import (
-    partial,
-)
-
 import torch.utils.checkpoint
 
 from deepmd.dpmodel.utils.type_embed import (
@@ -48,247 +44,6 @@ def Tensor(*shape):
     return torch.empty(shape, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)
 
 
-class Dropout(nn.Module):
-    def __init__(self, p):
-        super().__init__()
-        self.p = p
-
-    def forward(self, x, inplace: bool = False):
-        if self.p > 0 and self.training:
-            return F.dropout(x, p=self.p, training=True, inplace=inplace)
-        else:
-            return x
-
-
-class Identity(nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, x):
-        return x
-
-
-class DropPath(torch.nn.Module):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks)."""
-
-    def __init__(self, prob=None):
-        super().__init__()
-        self.drop_prob = prob
-
-    def forward(self, x):
-        if self.drop_prob == 0.0 or not self.training:
-            return x
-        keep_prob = 1 - self.drop_prob
-        shape = (x.shape[0],) + (1,) * (
-            x.ndim - 1
-        )  # work with diff dim tensors, not just 2D ConvNets
-        random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
-        random_tensor.floor_()  # binarize
-        output = x.div(keep_prob) * random_tensor
-        return output
-
-    def extra_repr(self) -> str:
-        return f"prob={self.drop_prob}"
-
-
-def softmax_dropout(
-    input_x, dropout_prob, is_training=True, mask=None, bias=None, inplace=True
-):
-    input_x = input_x.contiguous()
-    if not inplace:
-        input_x = input_x.clone()
-    if mask is not None:
-        input_x += mask
-    if bias is not None:
-        input_x += bias
-    return F.dropout(F.softmax(input_x, dim=-1), p=dropout_prob, training=is_training)
-
-
-def checkpoint_sequential(
-    functions,
-    input_x,
-    enabled=True,
-):
-    def wrap_tuple(a):
-        return (a,) if type(a) is not tuple else a
-
-    def exec(func, a):
-        return wrap_tuple(func(*a))
-
-    def get_wrap_exec(func):
-        def wrap_exec(*a):
-            return exec(func, a)
-
-        return wrap_exec
-
-    input_x = wrap_tuple(input_x)
-
-    is_grad_enabled = torch.is_grad_enabled()
-
-    if enabled and is_grad_enabled:
-        for func in functions:
-            input_x = torch.utils.checkpoint.checkpoint(get_wrap_exec(func), *input_x)
-    else:
-        for func in functions:
-            input_x = exec(func, input_x)
-    return input_x
-
-
-class ResidualLinear(nn.Module):
-    resnet: Final[int]
-
-    def __init__(self, num_in, num_out, bavg=0.0, stddev=1.0, resnet_dt=False):
-        """Construct a residual linear layer.
-
-        Args:
-        - num_in: Width of input tensor.
-        - num_out: Width of output tensor.
-        - resnet_dt: Using time-step in the ResNet construction.
-        """
-        super().__init__()
-        self.num_in = num_in
-        self.num_out = num_out
-        self.resnet = resnet_dt
-
-        self.matrix = nn.Parameter(data=Tensor(num_in, num_out))
-        nn.init.normal_(self.matrix.data, std=stddev / np.sqrt(num_out + num_in))
-        self.bias = nn.Parameter(data=Tensor(1, num_out))
-        nn.init.normal_(self.bias.data, mean=bavg, std=stddev)
-        if self.resnet:
-            self.idt = nn.Parameter(data=Tensor(1, num_out))
-            nn.init.normal_(self.idt.data, mean=1.0, std=0.001)
-
-    def forward(self, inputs):
-        """Return X ?+ X*W+b."""
-        xw_plus_b = torch.matmul(inputs, self.matrix) + self.bias
-        hidden = torch.tanh(xw_plus_b)
-        if self.resnet:
-            hidden = hidden * self.idt
-        if self.num_in == self.num_out:
-            return inputs + hidden
-        elif self.num_in * 2 == self.num_out:
-            return torch.cat([inputs, inputs], dim=1) + hidden
-        else:
-            return hidden
-
-
-class TypeFilter(nn.Module):
-    use_tebd: Final[bool]
-    tebd_mode: Final[str]
-
-    def __init__(
-        self,
-        offset,
-        length,
-        neuron,
-        return_G=False,
-        tebd_dim=0,
-        use_tebd=False,
-        tebd_mode="concat",
-    ):
-        """Construct a filter on the given element as neighbor.
-
-        Args:
-        - offset: Element offset in the descriptor matrix.
-        - length: Atom count of this element.
-        - neuron: Number of neurons in each hidden layers of the embedding net.
-        """
-        super().__init__()
-        self.offset = offset
-        self.length = length
-        self.tebd_dim = tebd_dim
-        self.use_tebd = use_tebd
-        self.tebd_mode = tebd_mode
-        supported_tebd_mode = ["concat", "dot", "dot_residual_s", "dot_residual_t"]
-        assert (
-            tebd_mode in supported_tebd_mode
-        ), f"Unknown tebd_mode {tebd_mode}! Supported are {supported_tebd_mode}."
-        if use_tebd and tebd_mode == "concat":
-            self.neuron = [1 + tebd_dim * 2, *neuron]
-        else:
-            self.neuron = [1, *neuron]
-
-        deep_layers = []
-        for ii in range(1, len(self.neuron)):
-            one = ResidualLinear(self.neuron[ii - 1], self.neuron[ii])
-            deep_layers.append(one)
-        self.deep_layers = nn.ModuleList(deep_layers)
-
-        deep_layers_t = []
-        if use_tebd and tebd_mode in ["dot", "dot_residual_s", "dot_residual_t"]:
-            self.neuron_t = [tebd_dim * 2, *neuron]
-            for ii in range(1, len(self.neuron_t)):
-                one = ResidualLinear(self.neuron_t[ii - 1], self.neuron_t[ii])
-                deep_layers_t.append(one)
-        self.deep_layers_t = nn.ModuleList(deep_layers_t)
-
-        self.return_G = return_G
-
-    def forward(
-        self,
-        inputs,
-        atype_tebd: Optional[torch.Tensor] = None,
-        nlist_tebd: Optional[torch.Tensor] = None,
-    ):
-        """Calculate decoded embedding for each atom.
-
-        Args:
-        - inputs: Descriptor matrix. Its shape is [nframes*natoms[0], len_descriptor].
-
-        Returns
-        -------
-        - `torch.Tensor`: Embedding contributed by me. Its shape is [nframes*natoms[0], 4, self.neuron[-1]].
-        """
-        inputs_i = inputs[:, self.offset * 4 : (self.offset + self.length) * 4]
-        inputs_reshape = inputs_i.reshape(
-            -1, 4
-        )  # shape is [nframes*natoms[0]*self.length, 4]
-        xyz_scatter = inputs_reshape[:, 0:1]
-
-        # concat the tebd as input
-        if self.use_tebd and self.tebd_mode == "concat":
-            assert nlist_tebd is not None and atype_tebd is not None
-            nlist_tebd = nlist_tebd.reshape(-1, self.tebd_dim)
-            atype_tebd = atype_tebd.reshape(-1, self.tebd_dim)
-            # [nframes * nloc * nnei, 1 + tebd_dim * 2]
-            xyz_scatter = torch.concat([xyz_scatter, nlist_tebd, atype_tebd], dim=1)
-
-        for linear in self.deep_layers:
-            xyz_scatter = linear(xyz_scatter)
-            # [nframes * nloc * nnei, out_size]
-
-        # dot the tebd output
-        if self.use_tebd and self.tebd_mode in [
-            "dot",
-            "dot_residual_s",
-            "dot_residual_t",
-        ]:
-            assert nlist_tebd is not None and atype_tebd is not None
-            nlist_tebd = nlist_tebd.reshape(-1, self.tebd_dim)
-            atype_tebd = atype_tebd.reshape(-1, self.tebd_dim)
-            # [nframes * nloc * nnei, tebd_dim * 2]
-            two_side_tebd = torch.concat([nlist_tebd, atype_tebd], dim=1)
-            for linear in self.deep_layers_t:
-                two_side_tebd = linear(two_side_tebd)
-                # [nframes * nloc * nnei, out_size]
-            if self.tebd_mode == "dot":
-                xyz_scatter = xyz_scatter * two_side_tebd
-            elif self.tebd_mode == "dot_residual_s":
-                xyz_scatter = xyz_scatter * two_side_tebd + xyz_scatter
-            elif self.tebd_mode == "dot_residual_t":
-                xyz_scatter = xyz_scatter * two_side_tebd + two_side_tebd
-
-        xyz_scatter = xyz_scatter.view(
-            -1, self.length, self.neuron[-1]
-        )  # shape is [nframes*natoms[0], self.length, self.neuron[-1]]
-        if self.return_G:
-            return xyz_scatter
-        else:
-            # shape is [nframes*natoms[0], 4, self.length]
-            inputs_reshape = inputs_i.view(-1, self.length, 4).permute(0, 2, 1)
-            return torch.matmul(inputs_reshape, xyz_scatter)
-
-
 class SimpleLinear(nn.Module):
     use_timestep: Final[bool]
 
@@ -396,53 +151,6 @@ def _normal_init(self):
         nn.init.kaiming_normal_(self.weight, nonlinearity="linear")
 
 
-class Transition(nn.Module):
-    def __init__(self, d_in, n, dropout=0.0):
-        super().__init__()
-
-        self.d_in = d_in
-        self.n = n
-
-        self.linear_1 = Linear(self.d_in, self.n * self.d_in, init="relu")
-        self.act = nn.GELU()
-        self.linear_2 = Linear(self.n * self.d_in, d_in, init="final")
-        self.dropout = dropout
-
-    def _transition(self, x):
-        x = self.linear_1(x)
-        x = self.act(x)
-        x = F.dropout(x, p=self.dropout, training=self.training)
-        x = self.linear_2(x)
-        return x
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ) -> torch.Tensor:
-        x = self._transition(x=x)
-        return x
-
-
-class Embedding(nn.Embedding):
-    def __init__(
-        self,
-        num_embeddings: int,
-        embedding_dim: int,
-        padding_idx: Optional[int] = None,
-        dtype=torch.float64,
-    ):
-        super().__init__(
-            num_embeddings, embedding_dim, padding_idx=padding_idx, dtype=dtype
-        )
-        self._normal_init()
-
-        if padding_idx is not None:
-            self.weight.data[self.padding_idx].zero_()
-
-    def _normal_init(self, std=0.02):
-        nn.init.normal_(self.weight, mean=0.0, std=std)
-
-
 class NonLinearHead(nn.Module):
     def __init__(self, input_dim, out_dim, activation_fn, hidden=None):
         super().__init__()
@@ -456,27 +164,6 @@ def forward(self, x):
         return x
 
 
-class NonLinear(nn.Module):
-    def __init__(self, input, output_size, hidden=None):
-        super().__init__()
-
-        if hidden is None:
-            hidden = input
-        self.layer1 = Linear(input, hidden, init="relu")
-        self.layer2 = Linear(hidden, output_size, init="final")
-
-    def forward(self, x):
-        x = F.linear(x, self.layer1.weight)
-        # x = fused_ops.bias_torch_gelu(x, self.layer1.bias)
-        x = nn.GELU()(x) + self.layer1.bias
-        x = self.layer2(x)
-        return x
-
-    def zero_init(self):
-        nn.init.zeros_(self.layer2.weight)
-        nn.init.zeros_(self.layer2.bias)
-
-
 class MaskLMHead(nn.Module):
     """Head for masked language modeling."""
 
@@ -844,1327 +531,3 @@ def serialize(self) -> dict:
             "type_map": self.type_map,
             "embedding": self.embedding_net.serialize(),
         }
-
-
-@torch.jit.script
-def gaussian(x, mean, std: float):
-    pi = 3.14159
-    a = (2 * pi) ** 0.5
-    return torch.exp(-0.5 * (((x - mean) / std) ** 2)) / (a * std)
-
-
-class GaussianKernel(nn.Module):
-    def __init__(self, K=128, num_pair=512, std_width=1.0, start=0.0, stop=9.0):
-        super().__init__()
-        self.K = K
-        std_width = std_width
-        start = start
-        stop = stop
-        mean = torch.linspace(start, stop, K, dtype=env.GLOBAL_PT_FLOAT_PRECISION)  # pylint: disable=no-explicit-device
-        self.std = (std_width * (mean[1] - mean[0])).item()
-        self.register_buffer("mean", mean)
-        self.mul = Embedding(
-            num_pair + 1, 1, padding_idx=num_pair, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        self.bias = Embedding(
-            num_pair + 1, 1, padding_idx=num_pair, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        nn.init.constant_(self.bias.weight, 0)
-        nn.init.constant_(self.mul.weight, 1.0)
-
-    def forward(self, x, atom_pair):
-        mul = self.mul(atom_pair).abs().sum(dim=-2)
-        bias = self.bias(atom_pair).sum(dim=-2)
-        x = mul * x.unsqueeze(-1) + bias
-        # [nframes, nloc, nnei, K]
-        x = x.expand(-1, -1, -1, self.K)
-        mean = self.mean.view(-1)
-        return gaussian(x, mean, self.std)
-
-
-class GaussianEmbedding(nn.Module):
-    def __init__(
-        self,
-        rcut,
-        kernel_num,
-        num_pair,
-        embed_dim,
-        pair_embed_dim,
-        sel,
-        ntypes,
-        atomic_sum_gbf,
-    ):
-        """Construct a gaussian kernel based embedding of pair representation.
-
-        Args:
-            rcut: Radial cutoff.
-            kernel_num: Number of gaussian kernels.
-            num_pair: Number of different pairs.
-            embed_dim: Dimension of atomic representation.
-            pair_embed_dim: Dimension of pair representation.
-            sel: Number of neighbors.
-            ntypes: Number of atom types.
-        """
-        super().__init__()
-        self.gbf = GaussianKernel(K=kernel_num, num_pair=num_pair, stop=rcut)
-        self.gbf_proj = NonLinear(kernel_num, pair_embed_dim)
-        self.embed_dim = embed_dim
-        self.pair_embed_dim = pair_embed_dim
-        self.atomic_sum_gbf = atomic_sum_gbf
-        if self.atomic_sum_gbf:
-            if kernel_num != self.embed_dim:
-                self.edge_proj = torch.nn.Linear(
-                    kernel_num, self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-                )
-            else:
-                self.edge_proj = None
-        self.ntypes = ntypes
-        self.nnei = sel
-
-    def forward(self, coord_selected, atom_feature, edge_type_2dim, edge_feature):
-        ## local cluster forward
-        """Calculate decoded embedding for each atom.
-        Args:
-            coord_selected: Clustered atom coordinates with shape [nframes*nloc, natoms, 3].
-            atom_feature: Previous calculated atomic features with shape [nframes*nloc, natoms, embed_dim].
-            edge_type_2dim: Edge index for gbf calculation with shape [nframes*nloc, natoms, natoms, 2].
-            edge_feature: Previous calculated edge features with shape [nframes*nloc, natoms, natoms, pair_dim].
-
-        Returns
-        -------
-        atom_feature: Updated atomic features with shape [nframes*nloc, natoms, embed_dim].
-        attn_bias: Updated edge features as attention bias with shape [nframes*nloc, natoms, natoms, pair_dim].
-        delta_pos: Delta position for force/vector prediction with shape [nframes*nloc, natoms, natoms, 3].
-        """
-        ncluster, natoms, _ = coord_selected.shape
-        # ncluster x natoms x natoms x 3
-        delta_pos = coord_selected.unsqueeze(1) - coord_selected.unsqueeze(2)
-        # (ncluster x natoms x natoms
-        dist = delta_pos.norm(dim=-1).view(-1, natoms, natoms)
-        # [ncluster, natoms, natoms, K]
-        gbf_feature = self.gbf(dist, edge_type_2dim)
-        if self.atomic_sum_gbf:
-            edge_features = gbf_feature
-            # [ncluster, natoms, K]
-            sum_edge_features = edge_features.sum(dim=-2)
-            if self.edge_proj is not None:
-                sum_edge_features = self.edge_proj(sum_edge_features)
-            # [ncluster, natoms, embed_dim]
-            atom_feature = atom_feature + sum_edge_features
-
-        # [ncluster, natoms, natoms, pair_dim]
-        gbf_result = self.gbf_proj(gbf_feature)
-
-        attn_bias = gbf_result + edge_feature
-        return atom_feature, attn_bias, delta_pos
-
-
-class NeighborWiseAttention(nn.Module):
-    def __init__(
-        self,
-        layer_num,
-        nnei,
-        embed_dim,
-        hidden_dim,
-        dotr=False,
-        do_mask=False,
-        post_ln=True,
-        ffn=False,
-        ffn_embed_dim=1024,
-        activation="tanh",
-        scaling_factor=1.0,
-        head_num=1,
-        normalize=True,
-        temperature=None,
-        smooth=True,
-    ):
-        """Construct a neighbor-wise attention net."""
-        super().__init__()
-        self.layer_num = layer_num
-        attention_layers = []
-        for i in range(self.layer_num):
-            attention_layers.append(
-                NeighborWiseAttentionLayer(
-                    nnei,
-                    embed_dim,
-                    hidden_dim,
-                    dotr=dotr,
-                    do_mask=do_mask,
-                    post_ln=post_ln,
-                    ffn=ffn,
-                    ffn_embed_dim=ffn_embed_dim,
-                    activation=activation,
-                    scaling_factor=scaling_factor,
-                    head_num=head_num,
-                    normalize=normalize,
-                    temperature=temperature,
-                    smooth=smooth,
-                )
-            )
-        self.attention_layers = nn.ModuleList(attention_layers)
-
-    def forward(
-        self,
-        input_G,
-        nei_mask,
-        input_r: Optional[torch.Tensor] = None,
-        sw: Optional[torch.Tensor] = None,
-    ):
-        """
-        Args:
-            input_G: Input G, [nframes * nloc, nnei, embed_dim].
-            nei_mask: neighbor mask, [nframes * nloc, nnei].
-            input_r: normalized radial, [nframes, nloc, nei, 3].
-
-        Returns
-        -------
-        out: Output G, [nframes * nloc, nnei, embed_dim]
-
-        """
-        out = input_G
-        # https://github.com/pytorch/pytorch/issues/39165#issuecomment-635472592
-        for layer in self.attention_layers:
-            out = layer(out, nei_mask, input_r=input_r, sw=sw)
-        return out
-
-
-class NeighborWiseAttentionLayer(nn.Module):
-    ffn: Final[bool]
-
-    def __init__(
-        self,
-        nnei,
-        embed_dim,
-        hidden_dim,
-        dotr=False,
-        do_mask=False,
-        post_ln=True,
-        ffn=False,
-        ffn_embed_dim=1024,
-        activation="tanh",
-        scaling_factor=1.0,
-        head_num=1,
-        normalize=True,
-        temperature=None,
-        smooth=True,
-    ):
-        """Construct a neighbor-wise attention layer."""
-        super().__init__()
-        self.nnei = nnei
-        self.embed_dim = embed_dim
-        self.hidden_dim = hidden_dim
-        self.dotr = dotr
-        self.do_mask = do_mask
-        self.post_ln = post_ln
-        self.ffn = ffn
-        self.smooth = smooth
-        self.attention_layer = GatedSelfAttetion(
-            nnei,
-            embed_dim,
-            hidden_dim,
-            dotr=dotr,
-            do_mask=do_mask,
-            scaling_factor=scaling_factor,
-            head_num=head_num,
-            normalize=normalize,
-            temperature=temperature,
-            smooth=smooth,
-        )
-        self.attn_layer_norm = nn.LayerNorm(
-            self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
-        )
-        if self.ffn:
-            self.ffn_embed_dim = ffn_embed_dim
-            self.fc1 = nn.Linear(
-                self.embed_dim, self.ffn_embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-            )
-            self.activation_fn = ActivationFn(activation)
-            self.fc2 = nn.Linear(
-                self.ffn_embed_dim, self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-            )
-            self.final_layer_norm = nn.LayerNorm(
-                self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-            )
-
-    def forward(
-        self,
-        x,
-        nei_mask,
-        input_r: Optional[torch.Tensor] = None,
-        sw: Optional[torch.Tensor] = None,
-    ):
-        residual = x
-        if not self.post_ln:
-            x = self.attn_layer_norm(x)
-        x = self.attention_layer(x, nei_mask, input_r=input_r, sw=sw)
-        x = residual + x
-        if self.post_ln:
-            x = self.attn_layer_norm(x)
-        if self.ffn:
-            residual = x
-            if not self.post_ln:
-                x = self.final_layer_norm(x)
-            x = self.fc1(x)
-            x = self.activation_fn(x)
-            x = self.fc2(x)
-            x = residual + x
-            if self.post_ln:
-                x = self.final_layer_norm(x)
-        return x
-
-
-class GatedSelfAttetion(nn.Module):
-    def __init__(
-        self,
-        nnei,
-        embed_dim,
-        hidden_dim,
-        dotr=False,
-        do_mask=False,
-        scaling_factor=1.0,
-        head_num=1,
-        normalize=True,
-        temperature=None,
-        bias=True,
-        smooth=True,
-    ):
-        """Construct a neighbor-wise attention net."""
-        super().__init__()
-        self.nnei = nnei
-        self.embed_dim = embed_dim
-        self.hidden_dim = hidden_dim
-        self.head_num = head_num
-        self.dotr = dotr
-        self.do_mask = do_mask
-        if temperature is None:
-            self.scaling = (self.hidden_dim * scaling_factor) ** -0.5
-        else:
-            self.scaling = temperature
-        self.normalize = normalize
-        self.in_proj = SimpleLinear(
-            embed_dim,
-            hidden_dim * 3,
-            bavg=0.0,
-            stddev=1.0,
-            use_timestep=False,
-            bias=bias,
-        )
-        self.out_proj = SimpleLinear(
-            hidden_dim, embed_dim, bavg=0.0, stddev=1.0, use_timestep=False, bias=bias
-        )
-        self.smooth = smooth
-
-    def forward(
-        self,
-        query,
-        nei_mask,
-        input_r: Optional[torch.Tensor] = None,
-        sw: Optional[torch.Tensor] = None,
-        attnw_shift: float = 20.0,
-    ):
-        """
-        Args:
-            query: input G, [nframes * nloc, nnei, embed_dim].
-            nei_mask: neighbor mask, [nframes * nloc, nnei].
-            input_r: normalized radial, [nframes, nloc, nei, 3].
-
-        Returns
-        -------
-        type_embedding:
-
-        """
-        q, k, v = self.in_proj(query).chunk(3, dim=-1)
-        #  [nframes * nloc, nnei, hidden_dim]
-        q = q.view(-1, self.nnei, self.hidden_dim)
-        k = k.view(-1, self.nnei, self.hidden_dim)
-        v = v.view(-1, self.nnei, self.hidden_dim)
-        if self.normalize:
-            q = F.normalize(q, dim=-1)
-            k = F.normalize(k, dim=-1)
-            v = F.normalize(v, dim=-1)
-        q = q * self.scaling
-        k = k.transpose(1, 2)
-        #  [nframes * nloc, nnei, nnei]
-        attn_weights = torch.bmm(q, k)
-        #  [nframes * nloc, nnei]
-        nei_mask = nei_mask.view(-1, self.nnei)
-        if self.smooth:
-            # [nframes * nloc, nnei]
-            assert sw is not None
-            sw = sw.view([-1, self.nnei])
-            attn_weights = (attn_weights + attnw_shift) * sw[:, :, None] * sw[
-                :, None, :
-            ] - attnw_shift
-        else:
-            attn_weights = attn_weights.masked_fill(
-                ~nei_mask.unsqueeze(1), float("-inf")
-            )
-        attn_weights = F.softmax(attn_weights, dim=-1)
-        attn_weights = attn_weights.masked_fill(~nei_mask.unsqueeze(-1), 0.0)
-        if self.smooth:
-            assert sw is not None
-            attn_weights = attn_weights * sw[:, :, None] * sw[:, None, :]
-        if self.dotr:
-            assert input_r is not None, "input_r must be provided when dotr is True!"
-            angular_weight = torch.bmm(input_r, input_r.transpose(1, 2))
-            attn_weights = attn_weights * angular_weight
-        o = torch.bmm(attn_weights, v)
-        output = self.out_proj(o)
-        return output
-
-
-class LocalSelfMultiheadAttention(nn.Module):
-    def __init__(self, feature_dim, attn_head, scaling_factor=1.0):
-        super().__init__()
-        self.feature_dim = feature_dim
-        self.attn_head = attn_head
-        self.head_dim = feature_dim // attn_head
-        assert (
-            feature_dim % attn_head == 0
-        ), f"feature_dim {feature_dim} must be divided by attn_head {attn_head}!"
-        self.scaling = (self.head_dim * scaling_factor) ** -0.5
-        self.in_proj = SimpleLinear(self.feature_dim, self.feature_dim * 3)
-        # TODO debug
-        # self.out_proj = SimpleLinear(self.feature_dim, self.feature_dim)
-
-    def forward(
-        self,
-        query,
-        attn_bias: Optional[torch.Tensor] = None,
-        nlist_mask: Optional[torch.Tensor] = None,
-        nlist: Optional[torch.Tensor] = None,
-        return_attn=True,
-    ):
-        nframes, nloc, feature_dim = query.size()
-        _, _, nnei = nlist.size()
-        assert feature_dim == self.feature_dim
-        # [nframes, nloc, feature_dim]
-        q, k, v = self.in_proj(query).chunk(3, dim=-1)
-        # [nframes * attn_head * nloc, 1, head_dim]
-        q = (
-            q.view(nframes, nloc, self.attn_head, self.head_dim)
-            .transpose(1, 2)
-            .contiguous()
-            .view(nframes * self.attn_head * nloc, 1, self.head_dim)
-            * self.scaling
-        )
-        # [nframes, nloc, feature_dim] --> [nframes, nloc + 1, feature_dim]
-        # with nlist [nframes, nloc, nnei] --> [nframes, nloc, nnei, feature_dim]
-        # padding = torch.zeros(feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION).to(k.device)
-        # k = torch.concat([k, padding.unsqueeze(0).unsqueeze(1)], dim=1)
-        # v = torch.concat([v, padding.unsqueeze(0).unsqueeze(1)], dim=1)
-
-        # [nframes, nloc * nnei, feature_dim]
-        index = nlist.view(nframes, -1).unsqueeze(-1).expand(-1, -1, feature_dim)
-        k = torch.gather(k, dim=1, index=index)
-        # [nframes, nloc * nnei, feature_dim]
-        v = torch.gather(v, dim=1, index=index)
-        # [nframes * attn_head * nloc, nnei, head_dim]
-        k = (
-            k.view(nframes, nloc, nnei, self.attn_head, self.head_dim)
-            .permute(0, 3, 1, 2, 4)
-            .contiguous()
-            .view(nframes * self.attn_head * nloc, nnei, self.head_dim)
-        )
-        v = (
-            v.view(nframes, nloc, nnei, self.attn_head, self.head_dim)
-            .permute(0, 3, 1, 2, 4)
-            .contiguous()
-            .view(nframes * self.attn_head * nloc, nnei, self.head_dim)
-        )
-        # [nframes * attn_head * nloc, 1, nnei]
-        attn_weights = torch.bmm(q, k.transpose(1, 2))
-        # maskfill
-        # [nframes, attn_head, nloc, nnei]
-        attn_weights = attn_weights.view(
-            nframes, self.attn_head, nloc, nnei
-        ).masked_fill(~nlist_mask.unsqueeze(1), float("-inf"))
-        # add bias
-        if return_attn:
-            attn_weights = attn_weights + attn_bias
-        # softmax
-        # [nframes * attn_head * nloc, 1, nnei]
-        attn = F.softmax(attn_weights, dim=-1).view(
-            nframes * self.attn_head * nloc, 1, nnei
-        )
-        # bmm
-        # [nframes * attn_head * nloc, 1, head_dim]
-        o = torch.bmm(attn, v)
-        assert list(o.size()) == [nframes * self.attn_head * nloc, 1, self.head_dim]
-        # [nframes, nloc, feature_dim]
-        o = (
-            o.view(nframes, self.attn_head, nloc, self.head_dim)
-            .transpose(1, 2)
-            .contiguous()
-            .view(nframes, nloc, self.feature_dim)
-        )
-        # out
-        ## TODO debug:
-        # o = self.out_proj(o)
-        if not return_attn:
-            return o
-        else:
-            return o, attn_weights, attn
-
-
-class NodeTaskHead(nn.Module):
-    def __init__(
-        self,
-        embed_dim: int,
-        pair_dim: int,
-        num_head: int,
-    ):
-        super().__init__()
-        self.layer_norm = nn.LayerNorm(embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
-        self.pair_norm = nn.LayerNorm(pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
-        self.embed_dim = embed_dim
-        self.q_proj = Linear(embed_dim, embed_dim, bias=False, init="glorot")
-        self.k_proj = Linear(embed_dim, embed_dim, bias=False, init="glorot")
-        self.v_proj = Linear(embed_dim, embed_dim, bias=False, init="glorot")
-        self.num_heads = num_head
-        self.head_dim = embed_dim // num_head
-        self.scaling = self.head_dim**-0.5
-        self.force_proj = Linear(embed_dim, 1, init="final", bias=False)
-        self.linear_bias = Linear(pair_dim, num_head)
-        self.dropout = 0.1
-
-    def zero_init(self):
-        nn.init.zeros_(self.force_proj.weight)
-
-    def forward(
-        self,
-        query: Tensor,
-        pair: Tensor,
-        delta_pos: Tensor,
-        attn_mask: Tensor = None,
-    ) -> Tensor:
-        ncluster, natoms, _ = query.size()
-        query = self.layer_norm(query)
-        # [ncluster, natoms, natoms, pair_dim]
-        pair = self.pair_norm(pair)
-
-        # [ncluster, attn_head, natoms, head_dim]
-        q = (
-            self.q_proj(query)
-            .view(ncluster, natoms, self.num_heads, -1)
-            .transpose(1, 2)
-            * self.scaling
-        )
-        # [ncluster, attn_head, natoms, head_dim]
-        k = (
-            self.k_proj(query)
-            .view(ncluster, natoms, self.num_heads, -1)
-            .transpose(1, 2)
-        )
-        v = (
-            self.v_proj(query)
-            .view(ncluster, natoms, self.num_heads, -1)
-            .transpose(1, 2)
-        )
-        # [ncluster, attn_head, natoms, natoms]
-        attn = q @ k.transpose(-1, -2)
-        del q, k
-        # [ncluster, attn_head, natoms, natoms]
-        bias = self.linear_bias(pair).permute(0, 3, 1, 2).contiguous()
-
-        # [ncluster, attn_head, natoms, natoms]
-        attn_probs = softmax_dropout(
-            attn,
-            self.dropout,
-            self.training,
-            mask=attn_mask,
-            bias=bias.contiguous(),
-        ).view(ncluster, self.num_heads, natoms, natoms)
-
-        # delta_pos: [ncluster, natoms, natoms, 3]
-        # [ncluster, attn_head, natoms, natoms, 3]
-        rot_attn_probs = attn_probs.unsqueeze(-1) * delta_pos.unsqueeze(1).type_as(
-            attn_probs
-        )
-        # [ncluster, attn_head, 3, natoms, natoms]
-        rot_attn_probs = rot_attn_probs.permute(0, 1, 4, 2, 3)
-        # [ncluster, attn_head, 3, natoms, head_dim]
-        x = rot_attn_probs @ v.unsqueeze(2)
-        # [ncluster, natoms, 3, embed_dim]
-        x = x.permute(0, 3, 2, 1, 4).contiguous().view(ncluster, natoms, 3, -1)
-        cur_force = self.force_proj(x).view(ncluster, natoms, 3)
-        return cur_force
-
-
-class EnergyHead(nn.Module):
-    def __init__(
-        self,
-        input_dim,
-        output_dim,
-    ):
-        super().__init__()
-        self.layer_norm = nn.LayerNorm(input_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
-        self.linear_in = Linear(input_dim, input_dim, init="relu")
-
-        self.linear_out = Linear(input_dim, output_dim, bias=True, init="final")
-
-    def forward(self, x):
-        x = x.type(self.linear_in.weight.dtype)
-        x = F.gelu(self.layer_norm(self.linear_in(x)))
-        x = self.linear_out(x)
-        return x
-
-
-class OuterProduct(nn.Module):
-    def __init__(self, d_atom, d_pair, d_hid=32):
-        super().__init__()
-
-        self.d_atom = d_atom
-        self.d_pair = d_pair
-        self.d_hid = d_hid
-
-        self.linear_in = nn.Linear(
-            d_atom, d_hid * 2, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        self.linear_out = nn.Linear(
-            d_hid**2, d_pair, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        self.act = nn.GELU()
-
-    def _opm(self, a, b):
-        # [nframes, nloc, d]
-        nframes, nloc, d = a.shape
-        a = a.view(nframes, nloc, 1, d, 1)
-        b = b.view(nframes, 1, nloc, 1, d)
-        # [nframes, nloc, nloc, d, d]
-        outer = a * b
-        outer = outer.view(outer.shape[:-2] + (-1,))
-        outer = self.linear_out(outer)
-        return outer
-
-    def forward(
-        self,
-        m: torch.Tensor,
-        nlist: torch.Tensor,
-        op_mask: float,
-        op_norm: float,
-    ) -> torch.Tensor:
-        ab = self.linear_in(m)
-        ab = ab * op_mask
-        a, b = ab.chunk(2, dim=-1)
-        # [ncluster, natoms, natoms, d_pair]
-        z = self._opm(a, b)
-        z *= op_norm
-        return z
-
-
-class Attention(nn.Module):
-    def __init__(
-        self,
-        q_dim: int,
-        k_dim: int,
-        v_dim: int,
-        head_dim: int,
-        num_heads: int,
-        gating: bool = False,
-        dropout: float = 0.0,
-    ):
-        super().__init__()
-
-        self.num_heads = num_heads
-        self.head_dim = head_dim
-        total_dim = head_dim * self.num_heads
-        self.total_dim = total_dim
-        self.q_dim = q_dim
-        self.gating = gating
-        self.linear_q = Linear(q_dim, total_dim, bias=False, init="glorot")
-        self.linear_k = Linear(k_dim, total_dim, bias=False, init="glorot")
-        self.linear_v = Linear(v_dim, total_dim, bias=False, init="glorot")
-        self.linear_o = Linear(total_dim, q_dim, init="final")
-        self.linear_g = None
-        if self.gating:
-            self.linear_g = Linear(q_dim, total_dim, init="gating")
-        # precompute the 1/sqrt(head_dim)
-        self.norm = head_dim**-0.5
-        self.dropout = dropout
-
-    def forward(
-        self,
-        q: torch.Tensor,
-        k: torch.Tensor,
-        v: torch.Tensor,
-        bias: torch.Tensor,
-        mask: torch.Tensor = None,
-    ) -> torch.Tensor:
-        nframes, nloc, embed_dim = q.size()
-        g = None
-        if self.linear_g is not None:
-            # gating, use raw query input
-            # [nframes, nloc, total_dim]
-            g = self.linear_g(q)
-        # [nframes, nloc, total_dim]
-        q = self.linear_q(q)
-        q *= self.norm
-        # [nframes, nloc, total_dim]
-        k = self.linear_k(k)
-        # [nframes, nloc, total_dim]
-        v = self.linear_v(v)
-        # global
-        # q [nframes, h, nloc, d]
-        # k [nframes, h, nloc, d]
-        # v [nframes, h, nloc, d]
-        # attn [nframes, h, nloc, nloc]
-        # o [nframes, h, nloc, d]
-
-        # [nframes, h, nloc, d]
-        q = q.view(q.shape[:-1] + (self.num_heads, -1)).transpose(-2, -3).contiguous()
-        k = k.view(k.shape[:-1] + (self.num_heads, -1)).transpose(-2, -3).contiguous()
-        v = v.view(v.shape[:-1] + (self.num_heads, -1)).transpose(-2, -3)
-        # [nframes, h, nloc, nloc]
-        attn = torch.matmul(q, k.transpose(-1, -2))
-        del q, k
-        # [nframes, h, nloc, nloc]
-        attn = softmax_dropout(attn, self.dropout, self.training, mask=mask, bias=bias)
-        # [nframes, h, nloc, d]
-        o = torch.matmul(attn, v)
-        del attn, v
-
-        # local
-        # q [nframes, h, nloc, 1, d]
-        # k [nframes, h, nloc, nnei, d]
-        # v [nframes, h, nloc, nnei, d]
-        # attn [nframes, h, nloc, nnei]
-        # o [nframes, h, nloc, d]
-
-        assert list(o.size()) == [nframes, self.num_heads, nloc, self.head_dim]
-        # [nframes, nloc, total_dim]
-        o = o.transpose(-2, -3).contiguous()
-        o = o.view(*o.shape[:-2], -1)
-
-        if g is not None:
-            o = torch.sigmoid(g) * o
-
-        # merge heads
-        o = self.linear_o(o)
-        return o
-
-
-class AtomAttention(nn.Module):
-    def __init__(
-        self,
-        q_dim: int,
-        k_dim: int,
-        v_dim: int,
-        pair_dim: int,
-        head_dim: int,
-        num_heads: int,
-        gating: bool = False,
-        dropout: float = 0.0,
-    ):
-        super().__init__()
-
-        self.mha = Attention(
-            q_dim, k_dim, v_dim, head_dim, num_heads, gating=gating, dropout=dropout
-        )
-        self.layer_norm = nn.LayerNorm(pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
-        self.linear_bias = Linear(pair_dim, num_heads)
-
-    def forward(
-        self,
-        q: torch.Tensor,
-        k: torch.Tensor,
-        v: torch.Tensor,
-        nlist: torch.Tensor,
-        pair: torch.Tensor,
-        mask: torch.Tensor = None,
-    ) -> torch.Tensor:
-        pair = self.layer_norm(pair)
-        bias = self.linear_bias(pair).permute(0, 3, 1, 2).contiguous()
-        return self.mha(q, k, v, bias=bias, mask=mask)
-
-
-class TriangleMultiplication(nn.Module):
-    def __init__(self, d_pair, d_hid):
-        super().__init__()
-
-        self.linear_ab_p = Linear(d_pair, d_hid * 2)
-        self.linear_ab_g = Linear(d_pair, d_hid * 2, init="gating")
-
-        self.linear_g = Linear(d_pair, d_pair, init="gating")
-        self.linear_z = Linear(d_hid, d_pair, init="final")
-
-        self.layer_norm_out = nn.LayerNorm(d_hid, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
-
-    def forward(
-        self,
-        z: torch.Tensor,
-        mask: Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
-        # z : [nframes, nloc, nloc, pair_dim]
-
-        # [nframes, nloc, nloc, pair_dim]
-        g = self.linear_g(z)
-        if self.training:
-            ab = self.linear_ab_p(z) * torch.sigmoid(self.linear_ab_g(z))
-        else:
-            ab = self.linear_ab_p(z)
-            ab *= torch.sigmoid(self.linear_ab_g(z))
-        # [nframes, nloc, nloc, d]
-        a, b = torch.chunk(ab, 2, dim=-1)
-        del z, ab
-
-        # [nframes, d, nloc_i, nloc_k] row not trans
-        a1 = a.permute(0, 3, 1, 2)
-        # [nframes, d, nloc_k, nloc_j(i)]  trans
-        b1 = b.transpose(-1, -3)
-        # [nframes, d, nloc_i, nloc_j]
-        x = torch.matmul(a1, b1)
-        del a1, b1
-
-        # [nframes, d, nloc_k, nloc_j(i)] not trans
-        b2 = b.permute(0, 3, 1, 2)
-        # [nframes, d, nloc_i, nloc_k]  col trans # check TODO
-        a2 = a.transpose(-1, -3)
-
-        # [nframes, d, nloc_i, nloc_j]
-        x = x + torch.matmul(a2, b2)
-        del a, b, a2, b2
-
-        # [nframes, nloc_i, nloc_j, d]
-        x = x.permute(0, 2, 3, 1)
-
-        x = self.layer_norm_out(x)
-        x = self.linear_z(x)
-        return g * x
-
-
-class EvoformerEncoderLayer(nn.Module):
-    def __init__(
-        self,
-        feature_dim: int = 768,
-        ffn_dim: int = 2048,
-        attn_head: int = 8,
-        activation_fn: str = "gelu",
-        post_ln: bool = False,
-    ):
-        super().__init__()
-        self.feature_dim = feature_dim
-        self.ffn_dim = ffn_dim
-        self.attn_head = attn_head
-        self.activation_fn = (
-            ActivationFn(activation_fn) if activation_fn is not None else None
-        )
-        self.post_ln = post_ln
-        self.self_attn_layer_norm = nn.LayerNorm(
-            self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-
-        self.self_attn = LocalSelfMultiheadAttention(
-            self.feature_dim,
-            self.attn_head,
-        )
-        self.final_layer_norm = nn.LayerNorm(
-            self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        self.fc1 = SimpleLinear(self.feature_dim, self.ffn_dim)
-        self.fc2 = SimpleLinear(self.ffn_dim, self.feature_dim)
-
-    def forward(
-        self,
-        x,
-        attn_bias: Optional[torch.Tensor] = None,
-        nlist_mask: Optional[torch.Tensor] = None,
-        nlist: Optional[torch.Tensor] = None,
-        return_attn=True,
-    ):
-        residual = x
-        if not self.post_ln:
-            x = self.self_attn_layer_norm(x)
-        x = self.self_attn(
-            query=x,
-            attn_bias=attn_bias,
-            nlist_mask=nlist_mask,
-            nlist=nlist,
-            return_attn=return_attn,
-        )
-        if return_attn:
-            x, attn_weights, attn_probs = x
-        x = residual + x
-        if self.post_ln:
-            x = self.self_attn_layer_norm(x)
-
-        residual = x
-        if not self.post_ln:
-            x = self.final_layer_norm(x)
-        x = self.fc1(x)
-        x = self.activation_fn(x)
-        x = self.fc2(x)
-        x = residual + x
-        if self.post_ln:
-            x = self.final_layer_norm(x)
-        if not return_attn:
-            return x
-        else:
-            return x, attn_weights, attn_probs
-
-
-# output: atomic_rep, transformed_atomic_rep, pair_rep, delta_pair_rep, norm_x, norm_delta_pair_rep,
-class Evoformer2bEncoder(nn.Module):
-    def __init__(
-        self,
-        nnei: int,
-        layer_num: int = 6,
-        attn_head: int = 8,
-        atomic_dim: int = 1024,
-        pair_dim: int = 100,
-        feature_dim: int = 1024,
-        ffn_dim: int = 2048,
-        post_ln: bool = False,
-        final_layer_norm: bool = True,
-        final_head_layer_norm: bool = False,
-        emb_layer_norm: bool = False,
-        atomic_residual: bool = False,
-        evo_residual: bool = False,
-        residual_factor: float = 1.0,
-        activation_function: str = "gelu",
-    ):
-        super().__init__()
-        self.nnei = nnei
-        self.layer_num = layer_num
-        self.attn_head = attn_head
-        self.atomic_dim = atomic_dim
-        self.pair_dim = pair_dim
-        self.feature_dim = feature_dim
-        self.ffn_dim = ffn_dim
-        self.post_ln = post_ln
-        self._final_layer_norm = final_layer_norm
-        self._final_head_layer_norm = final_head_layer_norm
-        self._emb_layer_norm = emb_layer_norm
-        self.activation_function = activation_function
-        self.evo_residual = evo_residual
-        self.residual_factor = residual_factor
-        if atomic_residual and atomic_dim == feature_dim:
-            self.atomic_residual = True
-        else:
-            self.atomic_residual = False
-        self.in_proj = SimpleLinear(
-            self.atomic_dim,
-            self.feature_dim,
-            bavg=0.0,
-            stddev=1.0,
-            use_timestep=False,
-            activate="tanh",
-        )  # TODO
-        self.out_proj = SimpleLinear(
-            self.feature_dim,
-            self.atomic_dim,
-            bavg=0.0,
-            stddev=1.0,
-            use_timestep=False,
-            activate="tanh",
-        )
-        if self._emb_layer_norm:
-            self.emb_layer_norm = nn.LayerNorm(
-                self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-            )
-
-        ## TODO debug : self.in_proj_pair = NonLinearHead(self.pair_dim, self.attn_head, activation_fn=None)
-        self.in_proj_pair = SimpleLinear(self.pair_dim, self.attn_head, activate=None)
-        evoformer_encoder_layers = []
-        for i in range(self.layer_num):
-            evoformer_encoder_layers.append(
-                EvoformerEncoderLayer(
-                    feature_dim=self.feature_dim,
-                    ffn_dim=self.ffn_dim,
-                    attn_head=self.attn_head,
-                    activation_fn=self.activation_function,
-                    post_ln=self.post_ln,
-                )
-            )
-        self.evoformer_encoder_layers = nn.ModuleList(evoformer_encoder_layers)
-        if self._final_layer_norm:
-            self.final_layer_norm = nn.LayerNorm(
-                self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-            )
-        if self._final_head_layer_norm:
-            self.final_head_layer_norm = nn.LayerNorm(
-                self.attn_head, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-            )
-
-    def forward(self, atomic_rep, pair_rep, nlist, nlist_type, nlist_mask):
-        """Encoder the atomic and pair representations.
-
-        Args:
-        - atomic_rep: Atomic representation with shape [nframes, nloc, atomic_dim].
-        - pair_rep: Pair representation with shape [nframes, nloc, nnei, pair_dim].
-        - nlist: Neighbor list with shape [nframes, nloc, nnei].
-        - nlist_type: Neighbor types with shape [nframes, nloc, nnei].
-        - nlist_mask: Neighbor mask with shape [nframes, nloc, nnei], `False` if blank.
-
-        Returns
-        -------
-        - atomic_rep: Atomic representation after encoder with shape [nframes, nloc, feature_dim].
-        - transformed_atomic_rep: Transformed atomic representation after encoder with shape [nframes, nloc, atomic_dim].
-        - pair_rep: Pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
-        - delta_pair_rep: Delta pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
-        - norm_x: Normalization loss of atomic_rep.
-        - norm_delta_pair_rep: Normalization loss of delta_pair_rep.
-        """
-        # Global branch
-        nframes, nloc, _ = atomic_rep.size()
-        nnei = pair_rep.shape[2]
-        input_atomic_rep = atomic_rep
-        # [nframes, nloc, feature_dim]
-        if self.atomic_residual:
-            atomic_rep = atomic_rep + self.in_proj(atomic_rep)
-        else:
-            atomic_rep = self.in_proj(atomic_rep)
-
-        if self._emb_layer_norm:
-            atomic_rep = self.emb_layer_norm(atomic_rep)
-
-        # Local branch
-        # [nframes, nloc, nnei, attn_head]
-        pair_rep = self.in_proj_pair(pair_rep)
-        # [nframes, attn_head, nloc, nnei]
-        pair_rep = pair_rep.permute(0, 3, 1, 2).contiguous()
-        input_pair_rep = pair_rep
-        pair_rep = pair_rep.masked_fill(~nlist_mask.unsqueeze(1), float("-inf"))
-
-        for i in range(self.layer_num):
-            atomic_rep, pair_rep, _ = self.evoformer_encoder_layers[i](
-                atomic_rep,
-                attn_bias=pair_rep,
-                nlist_mask=nlist_mask,
-                nlist=nlist,
-                return_attn=True,
-            )
-
-        def norm_loss(x, eps=1e-10, tolerance=1.0):
-            # x = x.float()
-            max_norm = x.shape[-1] ** 0.5
-            norm = torch.sqrt(torch.sum(x**2, dim=-1) + eps)
-            error = F.relu((norm - max_norm).abs() - tolerance)
-            return error
-
-        def masked_mean(mask, value, dim=-1, eps=1e-10):
-            return (
-                torch.sum(mask * value, dim=dim) / (eps + torch.sum(mask, dim=dim))
-            ).mean()
-
-        # atomic_rep shape: [nframes, nloc, feature_dim]
-        # pair_rep shape: [nframes, attn_head, nloc, nnei]
-
-        norm_x = torch.mean(norm_loss(atomic_rep))
-        if self._final_layer_norm:
-            atomic_rep = self.final_layer_norm(atomic_rep)
-
-        delta_pair_rep = pair_rep - input_pair_rep
-        delta_pair_rep = delta_pair_rep.masked_fill(~nlist_mask.unsqueeze(1), 0)
-        # [nframes, nloc, nnei, attn_head]
-        delta_pair_rep = (
-            delta_pair_rep.view(nframes, self.attn_head, nloc, nnei)
-            .permute(0, 2, 3, 1)
-            .contiguous()
-        )
-
-        # [nframes, nloc, nnei]
-        norm_delta_pair_rep = norm_loss(delta_pair_rep)
-        norm_delta_pair_rep = masked_mean(mask=nlist_mask, value=norm_delta_pair_rep)
-        if self._final_head_layer_norm:
-            delta_pair_rep = self.final_head_layer_norm(delta_pair_rep)
-
-        if self.atomic_residual:
-            transformed_atomic_rep = atomic_rep + self.out_proj(atomic_rep)
-        else:
-            transformed_atomic_rep = self.out_proj(atomic_rep)
-
-        if self.evo_residual:
-            transformed_atomic_rep = (
-                self.residual_factor * transformed_atomic_rep + input_atomic_rep
-            ) * (1 / np.sqrt(2))
-
-        return (
-            atomic_rep,
-            transformed_atomic_rep,
-            pair_rep,
-            delta_pair_rep,
-            norm_x,
-            norm_delta_pair_rep,
-        )
-
-
-class Evoformer3bEncoderLayer(nn.Module):
-    def __init__(
-        self,
-        nnei,
-        embedding_dim: int = 768,
-        pair_dim: int = 64,
-        pair_hidden_dim: int = 32,
-        ffn_embedding_dim: int = 3072,
-        num_attention_heads: int = 8,
-        dropout: float = 0.1,
-        droppath_prob: float = 0.0,
-        pair_dropout: float = 0.25,
-        attention_dropout: float = 0.1,
-        activation_dropout: float = 0.1,
-        pre_ln: bool = True,
-        tri_update: bool = True,
-    ):
-        super().__init__()
-        # Initialize parameters
-        self.nnei = nnei
-        self.embedding_dim = embedding_dim
-        self.num_attention_heads = num_attention_heads
-        self.attention_dropout = attention_dropout
-
-        # self.dropout = dropout
-        self.activation_dropout = activation_dropout
-
-        if droppath_prob > 0.0:
-            self.dropout_module = DropPath(droppath_prob)
-        else:
-            self.dropout_module = Dropout(dropout)
-
-        # self.self_attn = AtomAttentionLocal(embedding_dim, embedding_dim, embedding_dim, pair_dim,
-        #                                     embedding_dim // num_attention_heads, num_attention_heads,
-        #                                     gating=False, dropout=attention_dropout)
-        self.self_attn = AtomAttention(
-            embedding_dim,
-            embedding_dim,
-            embedding_dim,
-            pair_dim,
-            embedding_dim // num_attention_heads,
-            num_attention_heads,
-            gating=False,
-            dropout=attention_dropout,
-        )
-        # layer norm associated with the self attention layer
-        self.pre_ln = pre_ln
-        self.self_attn_layer_norm = nn.LayerNorm(
-            self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        self.fc1 = nn.Linear(
-            self.embedding_dim, ffn_embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        self.fc2 = nn.Linear(
-            ffn_embedding_dim, self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        self.final_layer_norm = nn.LayerNorm(
-            self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-
-        self.x_layer_norm_opm = nn.LayerNorm(
-            self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        # self.opm = OuterProductLocal(self.embedding_dim, pair_dim, d_hid=pair_hidden_dim)
-        self.opm = OuterProduct(self.embedding_dim, pair_dim, d_hid=pair_hidden_dim)
-        # self.pair_layer_norm_opm = nn.LayerNorm(pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
-        self.pair_layer_norm_ffn = nn.LayerNorm(
-            pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        self.pair_ffn = Transition(
-            pair_dim,
-            1,
-            dropout=activation_dropout,
-        )
-        self.pair_dropout = pair_dropout
-        self.tri_update = tri_update
-        if self.tri_update:
-            self.pair_layer_norm_trimul = nn.LayerNorm(
-                pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
-            )
-            self.pair_tri_mul = TriangleMultiplication(pair_dim, pair_hidden_dim)
-
-    def update_pair(
-        self,
-        x,
-        pair,
-        nlist,
-        op_mask,
-        op_norm,
-    ):
-        # local:
-        # [nframes, nloc, nnei, pair_dim]
-        # global:
-        # [nframes, nloc, nloc, pair_dim]
-        pair = pair + self.dropout_module(
-            self.opm(self.x_layer_norm_opm(x), nlist, op_mask, op_norm)
-        )
-        if not self.pre_ln:
-            pair = self.pair_layer_norm_opm(pair)
-        return x, pair
-
-    def shared_dropout(self, x, shared_dim, dropout):
-        shape = list(x.shape)
-        shape[shared_dim] = 1
-        with torch.no_grad():
-            mask = x.new_ones(shape)
-        return F.dropout(mask, p=dropout, training=self.training) * x
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        pair: torch.Tensor,
-        nlist: torch.Tensor = None,
-        attn_mask: Optional[torch.Tensor] = None,
-        pair_mask: Optional[torch.Tensor] = None,
-        op_mask: float = 1.0,
-        op_norm: float = 1.0,
-    ):
-        """Encoder the atomic and pair representations.
-
-        Args:
-        - x: Atomic representation with shape [ncluster, natoms, embed_dim].
-        - pair: Pair representation with shape [ncluster, natoms, natoms, pair_dim].
-        - attn_mask: Attention mask with shape [ncluster, head, natoms, natoms].
-        - pair_mask: Neighbor mask with shape [ncluster, natoms, natoms].
-
-        """
-        # [ncluster, natoms, embed_dim]
-        residual = x
-        if self.pre_ln:
-            x = self.self_attn_layer_norm(x)
-        x = self.self_attn(
-            x,
-            x,
-            x,
-            nlist=nlist,
-            pair=pair,
-            mask=attn_mask,
-        )
-        # x = F.dropout(x, p=self.dropout, training=self.training)
-        x = self.dropout_module(x)
-        x = residual + x
-        if not self.pre_ln:
-            x = self.self_attn_layer_norm(x)
-
-        residual = x
-        if self.pre_ln:
-            x = self.final_layer_norm(x)
-        x = F.linear(x, self.fc1.weight)
-        # x = fused_ops.bias_torch_gelu(x, self.fc1.bias)
-        x = nn.GELU()(x) + self.fc1.bias
-        x = F.dropout(x, p=self.activation_dropout, training=self.training)
-        x = self.fc2(x)
-        # x = F.dropout(x, p=self.dropout, training=self.training)
-        x = self.dropout_module(x)
-
-        x = residual + x
-        if not self.pre_ln:
-            x = self.final_layer_norm(x)
-
-        block = [
-            partial(
-                self.update_pair,
-                nlist=nlist,
-                op_mask=op_mask,
-                op_norm=op_norm,
-            )
-        ]
-
-        x, pair = checkpoint_sequential(
-            block,
-            input_x=(x, pair),
-        )
-
-        if self.tri_update:
-            residual_pair = pair
-            if self.pre_ln:
-                pair = self.pair_layer_norm_trimul(pair)
-
-            pair = self.shared_dropout(
-                self.pair_tri_mul(pair, pair_mask), -3, self.pair_dropout
-            )
-            pair = residual_pair + pair
-            if not self.pre_ln:
-                pair = self.pair_layer_norm_trimul(pair)
-
-        residual_pair = pair
-        if self.pre_ln:
-            pair = self.pair_layer_norm_ffn(pair)
-        pair = self.dropout_module(self.pair_ffn(pair))
-        pair = residual_pair + pair
-        if not self.pre_ln:
-            pair = self.pair_layer_norm_ffn(pair)
-        return x, pair
-
-
-class Evoformer3bEncoder(nn.Module):
-    def __init__(
-        self,
-        nnei,
-        layer_num=6,
-        attn_head=8,
-        atomic_dim=768,
-        pair_dim=64,
-        pair_hidden_dim=32,
-        ffn_embedding_dim=3072,
-        dropout: float = 0.1,
-        droppath_prob: float = 0.0,
-        pair_dropout: float = 0.25,
-        attention_dropout: float = 0.1,
-        activation_dropout: float = 0.1,
-        pre_ln: bool = True,
-        tri_update: bool = True,
-        **kwargs,
-    ):
-        super().__init__()
-        self.nnei = nnei
-        if droppath_prob > 0:
-            droppath_probs = [
-                x.item()
-                for x in torch.linspace(0, droppath_prob, layer_num)  # pylint: disable=no-explicit-dtype,no-explicit-device
-            ]
-        else:
-            droppath_probs = None
-
-        self.layers = nn.ModuleList(
-            [
-                Evoformer3bEncoderLayer(
-                    nnei,
-                    atomic_dim,
-                    pair_dim,
-                    pair_hidden_dim,
-                    ffn_embedding_dim,
-                    num_attention_heads=attn_head,
-                    dropout=dropout,
-                    droppath_prob=droppath_probs[_],
-                    pair_dropout=pair_dropout,
-                    attention_dropout=attention_dropout,
-                    activation_dropout=activation_dropout,
-                    pre_ln=pre_ln,
-                    tri_update=tri_update,
-                )
-                for _ in range(layer_num)
-            ]
-        )
-
-    def forward(self, x, pair, attn_mask=None, pair_mask=None, atom_mask=None):
-        """Encoder the atomic and pair representations.
-
-        Args:
-            x: Atomic representation with shape [ncluster, natoms, atomic_dim].
-            pair: Pair representation with shape [ncluster, natoms, natoms, pair_dim].
-            attn_mask: Attention mask (with -inf for softmax) with shape [ncluster, head, natoms, natoms].
-            pair_mask: Pair mask (with 1 for real atom pair and 0 for padding) with shape [ncluster, natoms, natoms].
-            atom_mask: Atom mask (with 1 for real atom and 0 for padding) with shape [ncluster, natoms].
-
-        Returns
-        -------
-        x: Atomic representation with shape [ncluster, natoms, atomic_dim].
-        pair: Pair representation with shape [ncluster, natoms, natoms, pair_dim].
-
-        """
-        # [ncluster, natoms, 1]
-        op_mask = atom_mask.unsqueeze(-1)
-        op_mask = op_mask * (op_mask.size(-2) ** -0.5)
-        eps = 1e-3
-        # [ncluster, natoms, natoms, 1]
-        op_norm = 1.0 / (eps + torch.einsum("...bc,...dc->...bdc", op_mask, op_mask))
-        for layer in self.layers:
-            x, pair = layer(
-                x,
-                pair,
-                nlist=None,
-                attn_mask=attn_mask,
-                pair_mask=pair_mask,
-                op_mask=op_mask,
-                op_norm=op_norm,
-            )
-        return x, pair
diff --git a/deepmd/pt/model/task/__init__.py b/deepmd/pt/model/task/__init__.py
index 572dc60d56..02d852eab7 100644
--- a/deepmd/pt/model/task/__init__.py
+++ b/deepmd/pt/model/task/__init__.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from .atten_lcc import (
-    FittingNetAttenLcc,
-)
 from .base_fitting import (
     BaseFitting,
 )
@@ -32,7 +29,6 @@
 )
 
 __all__ = [
-    "FittingNetAttenLcc",
     "DenoiseNet",
     "DipoleFittingNet",
     "EnergyFittingNet",
diff --git a/deepmd/pt/model/task/atten_lcc.py b/deepmd/pt/model/task/atten_lcc.py
deleted file mode 100644
index 4f54038548..0000000000
--- a/deepmd/pt/model/task/atten_lcc.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import torch
-import torch.nn as nn
-
-from deepmd.pt.model.network.network import (
-    EnergyHead,
-    NodeTaskHead,
-)
-from deepmd.pt.model.task.fitting import (
-    Fitting,
-)
-from deepmd.pt.utils import (
-    env,
-)
-
-
-class FittingNetAttenLcc(Fitting):
-    def __init__(
-        self, embedding_width, bias_atom_e, pair_embed_dim, attention_heads, **kwargs
-    ):
-        super().__init__()
-        self.embedding_width = embedding_width
-        self.engergy_proj = EnergyHead(self.embedding_width, 1)
-        self.energe_agg_factor = nn.Embedding(4, 1, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
-        nn.init.normal_(self.energe_agg_factor.weight, 0, 0.01)
-        bias_atom_e = torch.tensor(bias_atom_e)  # pylint: disable=no-explicit-dtype,no-explicit-device
-        self.register_buffer("bias_atom_e", bias_atom_e)
-        self.pair_embed_dim = pair_embed_dim
-        self.attention_heads = attention_heads
-        self.node_proc = NodeTaskHead(
-            self.embedding_width, self.pair_embed_dim, self.attention_heads
-        )
-        self.node_proc.zero_init()
-
-    def forward(self, output, pair, delta_pos, atype, nframes, nloc):
-        # [nframes x nloc x tebd_dim]
-        output_nloc = (output[:, 0, :]).reshape(nframes, nloc, self.embedding_width)
-        # Optional: GRRG or mean of gbf TODO
-
-        # energy outut
-        # [nframes, nloc]
-        energy_out = self.engergy_proj(output_nloc).view(nframes, nloc)
-        # [nframes, nloc]
-        energy_factor = self.energe_agg_factor(torch.zeros_like(atype)).view(
-            nframes, nloc
-        )
-        energy_out = (energy_out * energy_factor) + self.bias_atom_e[atype]
-        energy_out = energy_out.sum(dim=-1)
-
-        # vector output
-        # predict_force: [(nframes x nloc) x (1 + nnei2) x 3]
-        predict_force = self.node_proc(output, pair, delta_pos=delta_pos)
-        # predict_force_nloc: [nframes x nloc x 3]
-        predict_force_nloc = (predict_force[:, 0, :]).reshape(nframes, nloc, 3)
-        return energy_out, predict_force_nloc
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index 56b14677b9..79f9a0a86c 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -113,7 +113,6 @@ def __init__(
             type_map=type_map,
             **kwargs,
         )
-        self.old_impl = False  # this only supports the new implementation.
 
     def _net_out_dim(self):
         """Set the FittingNet output dim."""
@@ -123,7 +122,6 @@ def serialize(self) -> dict:
         data = super().serialize()
         data["type"] = "dipole"
         data["embedding_width"] = self.embedding_width
-        data["old_impl"] = self.old_impl
         data["r_differentiable"] = self.r_differentiable
         data["c_differentiable"] = self.c_differentiable
         return data
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 1827569a17..10f88519e1 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -19,9 +19,6 @@
     FittingNet,
     NetworkCollection,
 )
-from deepmd.pt.model.network.network import (
-    ResidualDeep,
-)
 from deepmd.pt.model.task.base_fitting import (
     BaseFitting,
 )
@@ -211,41 +208,24 @@ def __init__(
 
         in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
 
-        self.old_impl = kwargs.get("old_impl", False)
-        if self.old_impl:
-            filter_layers = []
-            for type_i in range(self.ntypes if not self.mixed_types else 1):
-                bias_type = 0.0
-                one = ResidualDeep(
-                    type_i,
-                    self.dim_descrpt,
+        self.filter_layers = NetworkCollection(
+            1 if not self.mixed_types else 0,
+            self.ntypes,
+            network_type="fitting_network",
+            networks=[
+                FittingNet(
+                    in_dim,
+                    net_dim_out,
                     self.neuron,
-                    bias_type,
-                    resnet_dt=self.resnet_dt,
+                    self.activation_function,
+                    self.resnet_dt,
+                    self.precision,
+                    bias_out=True,
+                    seed=child_seed(self.seed, ii),
                 )
-                filter_layers.append(one)
-            self.filter_layers_old = torch.nn.ModuleList(filter_layers)
-            self.filter_layers = None
-        else:
-            self.filter_layers = NetworkCollection(
-                1 if not self.mixed_types else 0,
-                self.ntypes,
-                network_type="fitting_network",
-                networks=[
-                    FittingNet(
-                        in_dim,
-                        net_dim_out,
-                        self.neuron,
-                        self.activation_function,
-                        self.resnet_dt,
-                        self.precision,
-                        bias_out=True,
-                        seed=child_seed(self.seed, ii),
-                    )
-                    for ii in range(self.ntypes if not self.mixed_types else 1)
-                ],
-            )
-            self.filter_layers_old = None
+                for ii in range(self.ntypes if not self.mixed_types else 1)
+            ],
+        )
         # set trainable
         for param in self.parameters():
             param.requires_grad = self.trainable
@@ -488,47 +468,29 @@ def _forward_common(
             dtype=env.GLOBAL_PT_FLOAT_PRECISION,
             device=descriptor.device,
         )  # jit assertion
-        if self.old_impl:
-            assert self.filter_layers_old is not None
-            assert xx_zeros is None
-            if self.mixed_types:
-                atom_property = self.filter_layers_old[0](xx) + self.bias_atom_e[atype]
-                outs = outs + atom_property  # Shape is [nframes, natoms[0], 1]
-            else:
-                for type_i, filter_layer in enumerate(self.filter_layers_old):
-                    mask = atype == type_i
-                    atom_property = filter_layer(xx)
-                    atom_property = atom_property + self.bias_atom_e[type_i]
-                    atom_property = atom_property * mask.unsqueeze(-1)
-                    outs = outs + atom_property  # Shape is [nframes, natoms[0], 1]
+        if self.mixed_types:
+            atom_property = self.filter_layers.networks[0](xx) + self.bias_atom_e[atype]
+            if xx_zeros is not None:
+                atom_property -= self.filter_layers.networks[0](xx_zeros)
+            outs = outs + atom_property  # Shape is [nframes, natoms[0], net_dim_out]
         else:
-            if self.mixed_types:
-                atom_property = (
-                    self.filter_layers.networks[0](xx) + self.bias_atom_e[atype]
-                )
+            for type_i, ll in enumerate(self.filter_layers.networks):
+                mask = (atype == type_i).unsqueeze(-1)
+                mask = torch.tile(mask, (1, 1, net_dim_out))
+                atom_property = ll(xx)
                 if xx_zeros is not None:
-                    atom_property -= self.filter_layers.networks[0](xx_zeros)
+                    # must assert, otherwise jit is not happy
+                    assert self.remove_vaccum_contribution is not None
+                    if not (
+                        len(self.remove_vaccum_contribution) > type_i
+                        and not self.remove_vaccum_contribution[type_i]
+                    ):
+                        atom_property -= ll(xx_zeros)
+                atom_property = atom_property + self.bias_atom_e[type_i]
+                atom_property = atom_property * mask
                 outs = (
                     outs + atom_property
                 )  # Shape is [nframes, natoms[0], net_dim_out]
-            else:
-                for type_i, ll in enumerate(self.filter_layers.networks):
-                    mask = (atype == type_i).unsqueeze(-1)
-                    mask = torch.tile(mask, (1, 1, net_dim_out))
-                    atom_property = ll(xx)
-                    if xx_zeros is not None:
-                        # must assert, otherwise jit is not happy
-                        assert self.remove_vaccum_contribution is not None
-                        if not (
-                            len(self.remove_vaccum_contribution) > type_i
-                            and not self.remove_vaccum_contribution[type_i]
-                        ):
-                            atom_property -= ll(xx_zeros)
-                    atom_property = atom_property + self.bias_atom_e[type_i]
-                    atom_property = atom_property * mask
-                    outs = (
-                        outs + atom_property
-                    )  # Shape is [nframes, natoms[0], net_dim_out]
         # nf x nloc
         mask = self.emask(atype)
         # nf x nloc x nod
diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py
index a16ab886d4..512044efbd 100644
--- a/deepmd/pt/model/task/polarizability.py
+++ b/deepmd/pt/model/task/polarizability.py
@@ -138,7 +138,6 @@ def __init__(
             type_map=type_map,
             **kwargs,
         )
-        self.old_impl = False  # this only supports the new implementation.
 
     def _net_out_dim(self):
         """Set the FittingNet output dim."""
@@ -195,7 +194,6 @@ def serialize(self) -> dict:
         data["type"] = "polar"
         data["@version"] = 3
         data["embedding_width"] = self.embedding_width
-        data["old_impl"] = self.old_impl
         data["fit_diag"] = self.fit_diag
         data["shift_diag"] = self.shift_diag
         data["@variables"]["scale"] = to_numpy_array(self.scale)
diff --git a/source/tests/pt/model/test_descriptor_hybrid.py b/source/tests/pt/model/test_descriptor_hybrid.py
index 5d03b28399..074af4da4e 100644
--- a/source/tests/pt/model/test_descriptor_hybrid.py
+++ b/source/tests/pt/model/test_descriptor_hybrid.py
@@ -41,7 +41,6 @@ def test_jit(
             self.rcut,
             self.rcut_smth,
             self.sel,
-            old_impl=False,
         )
         ddsub1 = DescrptSeR(
             self.rcut,
diff --git a/source/tests/pt/model/test_descriptor_se_r.py b/source/tests/pt/model/test_descriptor_se_r.py
index f3692101c5..e4aa405dd8 100644
--- a/source/tests/pt/model/test_descriptor_se_r.py
+++ b/source/tests/pt/model/test_descriptor_se_r.py
@@ -61,7 +61,6 @@ def test_consistency(
                 self.sel,
                 precision=prec,
                 resnet_dt=idt,
-                old_impl=False,
                 exclude_mask=em,
                 seed=GLOBAL_SEED,
             ).to(env.DEVICE)
@@ -130,7 +129,6 @@ def test_load_stat(self):
                 self.sel,
                 precision=prec,
                 resnet_dt=idt,
-                old_impl=False,
                 seed=GLOBAL_SEED,
             )
             dd0.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
@@ -181,7 +179,6 @@ def test_jit(
                 self.sel,
                 precision=prec,
                 resnet_dt=idt,
-                old_impl=False,
                 seed=GLOBAL_SEED,
             )
             dd0.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
diff --git a/source/tests/pt/model/test_dpa1.py b/source/tests/pt/model/test_dpa1.py
index b825885311..d168ceb2ae 100644
--- a/source/tests/pt/model/test_dpa1.py
+++ b/source/tests/pt/model/test_dpa1.py
@@ -70,7 +70,6 @@ def test_consistency(
                 tebd_input_mode=tm,
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
-                old_impl=False,
                 seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             dd0.se_atten.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
@@ -108,69 +107,6 @@ def test_consistency(
                 atol=atol,
                 err_msg=err_msg,
             )
-            # old impl
-            if (
-                idt is False
-                and prec == "float64"
-                and to is False
-                and tm == "concat"
-                and ect is False
-            ):
-                dd3 = DescrptDPA1(
-                    self.rcut,
-                    self.rcut_smth,
-                    self.sel_mix,
-                    self.nt,
-                    attn_layer=2,
-                    precision=prec,
-                    resnet_dt=idt,
-                    smooth_type_embedding=sm,
-                    old_impl=True,
-                    seed=GLOBAL_SEED,
-                ).to(env.DEVICE)
-                dd0_state_dict = dd0.se_atten.state_dict()
-                dd3_state_dict = dd3.se_atten.state_dict()
-
-                dd0_state_dict_attn = dd0.se_atten.dpa1_attention.state_dict()
-                dd3_state_dict_attn = dd3.se_atten.dpa1_attention.state_dict()
-                for i in dd3_state_dict:
-                    dd3_state_dict[i] = (
-                        dd0_state_dict[
-                            i.replace(".deep_layers.", ".layers.")
-                            .replace("filter_layers_old.", "filter_layers._networks.")
-                            .replace(
-                                ".attn_layer_norm.weight", ".attn_layer_norm.matrix"
-                            )
-                        ]
-                        .detach()
-                        .clone()
-                    )
-                    if ".bias" in i and "attn_layer_norm" not in i:
-                        dd3_state_dict[i] = dd3_state_dict[i].unsqueeze(0)
-                dd3.se_atten.load_state_dict(dd3_state_dict)
-
-                dd0_state_dict_tebd = dd0.type_embedding.state_dict()
-                dd3_state_dict_tebd = dd3.type_embedding.state_dict()
-                for i in dd3_state_dict_tebd:
-                    dd3_state_dict_tebd[i] = (
-                        dd0_state_dict_tebd[i.replace("embedding.weight", "matrix")]
-                        .detach()
-                        .clone()
-                    )
-                dd3.type_embedding.load_state_dict(dd3_state_dict_tebd)
-
-                rd3, _, _, _, _ = dd3(
-                    torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
-                    torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
-                    torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
-                )
-                np.testing.assert_allclose(
-                    rd0.detach().cpu().numpy(),
-                    rd3.detach().cpu().numpy(),
-                    rtol=rtol,
-                    atol=atol,
-                    err_msg=err_msg,
-                )
 
     def test_jit(
         self,
@@ -211,7 +147,6 @@ def test_jit(
                 tebd_input_mode=tm,
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
-                old_impl=False,
                 seed=GLOBAL_SEED,
             )
             dd0.se_atten.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
diff --git a/source/tests/pt/model/test_dpa2.py b/source/tests/pt/model/test_dpa2.py
index 0beb34c031..2eac49d573 100644
--- a/source/tests/pt/model/test_dpa2.py
+++ b/source/tests/pt/model/test_dpa2.py
@@ -154,7 +154,6 @@ def test_consistency(
                 precision=prec,
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
-                old_impl=False,
                 seed=GLOBAL_SEED,
             ).to(env.DEVICE)
 
@@ -193,45 +192,6 @@ def test_consistency(
                 rtol=rtol,
                 atol=atol,
             )
-            # old impl
-            if prec == "float64" and rus == "res_avg" and ect is False and ns is False:
-                dd3 = DescrptDPA2(
-                    self.nt,
-                    repinit=repinit,
-                    repformer=repformer,
-                    # kwargs for descriptor
-                    smooth=sm,
-                    exclude_types=[],
-                    add_tebd_to_repinit_out=False,
-                    precision=prec,
-                    old_impl=True,
-                    seed=GLOBAL_SEED,
-                ).to(env.DEVICE)
-                dd0_state_dict = dd0.state_dict()
-                dd3_state_dict = dd3.state_dict()
-                for i in list(dd0_state_dict.keys()):
-                    if ".bias" in i and (
-                        ".linear1." in i or ".linear2." in i or ".head_map." in i
-                    ):
-                        dd0_state_dict[i] = dd0_state_dict[i].unsqueeze(0)
-                    if ".attn2_lm.matrix" in i:
-                        dd0_state_dict[
-                            i.replace(".attn2_lm.matrix", ".attn2_lm.weight")
-                        ] = dd0_state_dict.pop(i)
-
-                dd3.load_state_dict(dd0_state_dict)
-                rd3, _, _, _, _ = dd3(
-                    torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
-                    torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
-                    torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
-                    torch.tensor(self.mapping, dtype=int, device=env.DEVICE),
-                )
-                np.testing.assert_allclose(
-                    rd0.detach().cpu().numpy(),
-                    rd3.detach().cpu().numpy(),
-                    rtol=rtol,
-                    atol=atol,
-                )
 
     def test_jit(
         self,
@@ -350,7 +310,6 @@ def test_jit(
                 precision=prec,
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
-                old_impl=False,
                 seed=GLOBAL_SEED,
             ).to(env.DEVICE)
 
diff --git a/source/tests/pt/model/test_embedding_net.py b/source/tests/pt/model/test_embedding_net.py
index 3605316437..1566eb2416 100644
--- a/source/tests/pt/model/test_embedding_net.py
+++ b/source/tests/pt/model/test_embedding_net.py
@@ -167,20 +167,15 @@ def test_consistency(self):
         )
 
         # Reproduced
-        old_impl = False
         descriptor = DescrptSeA(
             self.rcut,
             self.rcut_smth,
             self.sel,
             neuron=self.filter_neuron,
             axis_neuron=self.axis_neuron,
-            old_impl=old_impl,
         ).to(DEVICE)
         for name, param in descriptor.named_parameters():
-            if old_impl:
-                ms = re.findall(r"(\d)\.deep_layers\.(\d)\.([a-z]+)", name)
-            else:
-                ms = re.findall(r"(\d)\.layers\.(\d)\.([a-z]+)", name)
+            ms = re.findall(r"(\d)\.layers\.(\d)\.([a-z]+)", name)
             if len(ms) == 1:
                 m = ms[0]
                 key = gen_key(worb=m[2], depth=int(m[1]) + 1, elemid=int(m[0]))
diff --git a/source/tests/pt/model/test_ener_fitting.py b/source/tests/pt/model/test_ener_fitting.py
index 3255db2784..5c55766455 100644
--- a/source/tests/pt/model/test_ener_fitting.py
+++ b/source/tests/pt/model/test_ener_fitting.py
@@ -10,7 +10,6 @@
     DescrptSeA,
 )
 from deepmd.pt.model.task.ener import (
-    EnergyFittingNet,
     InvarFitting,
 )
 from deepmd.pt.utils import (
@@ -103,53 +102,6 @@ def test_consistency(
             )
             self.assertEqual(ft0.get_sel_type(), ft1.get_sel_type())
 
-    def test_new_old(
-        self,
-    ):
-        nf, nloc, nnei = self.nlist.shape
-        dd = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
-        rd0, _, _, _, _ = dd(
-            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
-            torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
-            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
-        )
-        atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE)
-
-        od = 1
-        for foo, mixed_types in itertools.product(
-            [True],
-            [True, False],
-        ):
-            ft0 = EnergyFittingNet(
-                self.nt,
-                dd.dim_out,
-                mixed_types=mixed_types,
-            ).to(env.DEVICE)
-            ft1 = EnergyFittingNet(
-                self.nt,
-                dd.dim_out,
-                mixed_types=mixed_types,
-                old_impl=True,
-            ).to(env.DEVICE)
-            dd0 = ft0.state_dict()
-            dd1 = ft1.state_dict()
-            for kk, vv in dd1.items():
-                new_kk = kk
-                new_kk = new_kk.replace("filter_layers_old", "filter_layers.networks")
-                new_kk = new_kk.replace("deep_layers", "layers")
-                new_kk = new_kk.replace("final_layer", "layers.3")
-                dd1[kk] = dd0[new_kk]
-                if kk.split(".")[-1] in ["idt", "bias"]:
-                    dd1[kk] = dd1[kk].unsqueeze(0)
-            dd1["bias_atom_e"] = dd0["bias_atom_e"]
-            ft1.load_state_dict(dd1)
-            ret0 = ft0(rd0, atype)
-            ret1 = ft1(rd0, atype)
-            np.testing.assert_allclose(
-                to_numpy_array(ret0["energy"]),
-                to_numpy_array(ret1["energy"]),
-            )
-
     def test_jit(
         self,
     ):
diff --git a/source/tests/pt/model/test_se_atten_v2.py b/source/tests/pt/model/test_se_atten_v2.py
index f9857fc728..462b2aca34 100644
--- a/source/tests/pt/model/test_se_atten_v2.py
+++ b/source/tests/pt/model/test_se_atten_v2.py
@@ -66,7 +66,6 @@ def test_consistency(
                 type_one_side=to,
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
-                old_impl=False,
                 seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             dd0.se_atten.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
@@ -138,7 +137,6 @@ def test_jit(
                 type_one_side=to,
                 use_econf_tebd=ect,
                 type_map=["O", "H"] if ect else None,
-                old_impl=False,
                 seed=GLOBAL_SEED,
             )
             dd0.se_atten.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
diff --git a/source/tests/pt/model/test_se_e2_a.py b/source/tests/pt/model/test_se_e2_a.py
index abe13ce86e..da9e69243c 100644
--- a/source/tests/pt/model/test_se_e2_a.py
+++ b/source/tests/pt/model/test_se_e2_a.py
@@ -58,7 +58,6 @@ def test_consistency(
                 self.sel,
                 precision=prec,
                 resnet_dt=idt,
-                old_impl=False,
                 exclude_types=em,
                 seed=GLOBAL_SEED,
             ).to(env.DEVICE)
@@ -105,46 +104,6 @@ def test_consistency(
                     atol=atol,
                     err_msg=err_msg,
                 )
-            # old impl
-            if idt is False and prec == "float64" and em == []:
-                dd3 = DescrptSeA(
-                    self.rcut,
-                    self.rcut_smth,
-                    self.sel,
-                    precision=prec,
-                    resnet_dt=idt,
-                    old_impl=True,
-                    seed=GLOBAL_SEED,
-                ).to(env.DEVICE)
-                dd0_state_dict = dd0.sea.state_dict()
-                dd3_state_dict = dd3.sea.state_dict()
-                for i in dd3_state_dict:
-                    dd3_state_dict[i] = (
-                        dd0_state_dict[
-                            i.replace(".deep_layers.", ".layers.").replace(
-                                "filter_layers_old.", "filter_layers.networks."
-                            )
-                        ]
-                        .detach()
-                        .clone()
-                    )
-                    if ".bias" in i:
-                        dd3_state_dict[i] = dd3_state_dict[i].unsqueeze(0)
-                dd3.sea.load_state_dict(dd3_state_dict)
-
-                rd3, gr3, _, _, sw3 = dd3(
-                    torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
-                    torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
-                    torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
-                )
-                for aa, bb in zip([rd1, gr1, sw1], [rd3, gr3, sw3]):
-                    np.testing.assert_allclose(
-                        aa.detach().cpu().numpy(),
-                        bb.detach().cpu().numpy(),
-                        rtol=rtol,
-                        atol=atol,
-                        err_msg=err_msg,
-                    )
 
     def test_jit(
         self,
@@ -169,7 +128,6 @@ def test_jit(
                 self.sel,
                 precision=prec,
                 resnet_dt=idt,
-                old_impl=False,
                 seed=GLOBAL_SEED,
             )
             dd0.sea.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)

From 1e1090a302fa66b10a29cd255ce41881da57a9e8 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 17 Oct 2024 07:44:47 -0400
Subject: [PATCH 37/39] chore(lmp): add LAMMPS DPA-2 nopbc tests (#4220)

Adding tests to see whether #4167 is resolved. The answer is no.
Segfaults are thrown with MPI.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Introduced a new command-line argument `--nopbc` to modify boundary
conditions in LAMMPS simulations.
- **Tests**
- Added a comprehensive suite of unit tests for the DeepMD potential in
LAMMPS, covering various configurations and scenarios to ensure accuracy
and reliability.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/lmp/tests/run_mpi_pair_deepmd.py      |   6 +-
 source/lmp/tests/test_lammps_dpa_pt_nopbc.py | 728 +++++++++++++++++++
 2 files changed, 733 insertions(+), 1 deletion(-)
 create mode 100644 source/lmp/tests/test_lammps_dpa_pt_nopbc.py

diff --git a/source/lmp/tests/run_mpi_pair_deepmd.py b/source/lmp/tests/run_mpi_pair_deepmd.py
index 0c4291ab3a..7c0ff6edc0 100644
--- a/source/lmp/tests/run_mpi_pair_deepmd.py
+++ b/source/lmp/tests/run_mpi_pair_deepmd.py
@@ -21,6 +21,7 @@
 parser.add_argument("MD_FILE", type=str)
 parser.add_argument("OUTPUT", type=str)
 parser.add_argument("--balance", action="store_true")
+parser.add_argument("--nopbc", action="store_true")
 
 args = parser.parse_args()
 data_file = args.DATAFILE
@@ -38,7 +39,10 @@
     # 6 and 0 atoms
     lammps.processors("1 2 1")
 lammps.units("metal")
-lammps.boundary("p p p")
+if args.nopbc:
+    lammps.boundary("f f f")
+else:
+    lammps.boundary("p p p")
 lammps.atom_style("atomic")
 lammps.neighbor("2.0 bin")
 lammps.neigh_modify("every 10 delay 0 check no")
diff --git a/source/lmp/tests/test_lammps_dpa_pt_nopbc.py b/source/lmp/tests/test_lammps_dpa_pt_nopbc.py
new file mode 100644
index 0000000000..15fe2c0bc2
--- /dev/null
+++ b/source/lmp/tests/test_lammps_dpa_pt_nopbc.py
@@ -0,0 +1,728 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import importlib
+import os
+import shutil
+import subprocess as sp
+import sys
+import tempfile
+from pathlib import (
+    Path,
+)
+
+import constants
+import numpy as np
+import pytest
+from lammps import (
+    PyLammps,
+)
+from write_lmp_data import (
+    write_lmp_data,
+)
+
+pbtxt_file2 = Path(__file__).parent.parent.parent / "tests" / "infer" / "deeppot.pbtxt"
+pb_file = Path(__file__).parent.parent.parent / "tests" / "infer" / "deeppot_dpa.pth"
+pb_file2 = Path(__file__).parent / "graph.pb"
+system_file = Path(__file__).parent.parent.parent / "tests"
+data_file = Path(__file__).parent / "data.lmp"
+data_file_si = Path(__file__).parent / "data.si"
+data_type_map_file = Path(__file__).parent / "data_type_map.lmp"
+md_file = Path(__file__).parent / "md.out"
+
+# this is as the same as python and c++ tests, test_deeppot_a.py
+expected_ae = np.array(
+    [
+        -95.13216447995296,
+        -188.10146505781867,
+        -187.74742451023172,
+        -94.73864717001219,
+        -187.76956603003393,
+        -187.76904550434332,
+    ]
+)
+expected_e = np.sum(expected_ae)
+expected_f = np.array(
+    [
+        0.7486830600282869,
+        -0.240322915088127,
+        -0.3943366458127905,
+        -0.1776248813665344,
+        0.2359143394202788,
+        0.4210018319063822,
+        -0.2368532809002255,
+        0.0291156803500336,
+        -0.0219651427265617,
+        -1.407280069394403,
+        0.4932116549421467,
+        -0.9482072853582465,
+        -0.1501958909452974,
+        -0.9720722611839484,
+        1.5128172910814666,
+        1.2232710625781733,
+        0.4541535015596165,
+        -0.569310049090249,
+    ]
+).reshape(6, 3)
+
+expected_f2 = np.array(
+    [
+        -2.161037360255332107e00,
+        9.052994347015581589e-01,
+        1.635379623977007979e00,
+        2.161037360255332107e00,
+        -9.052994347015581589e-01,
+        -1.635379623977007979e00,
+        -1.167128117249453811e-02,
+        1.371975700096064992e-03,
+        -1.575265180249604477e-03,
+        6.226508593971802341e-01,
+        -1.816734122009256991e-01,
+        3.561766019664774907e-01,
+        -1.406075393906316626e-02,
+        3.789140061530929526e-01,
+        -6.018777878642909140e-01,
+        -5.969188242856223736e-01,
+        -1.986125696522633155e-01,
+        2.472764510780630642e-01,
+    ]
+).reshape(6, 3)
+
+expected_v = -np.array(
+    [
+        1.4724482801774368e00,
+        -1.8952544175284314e-01,
+        -2.0502896614522359e-01,
+        -2.0361724110178425e-01,
+        5.4221646102123211e-02,
+        8.7963957026666373e-02,
+        -1.3233356224791937e-01,
+        8.3907068051133571e-02,
+        1.6072164570432412e-01,
+        2.2913216241740741e00,
+        -6.0712170533586352e-02,
+        1.2802395909429765e-01,
+        6.9581050483420448e-03,
+        2.0894022035588655e-02,
+        4.3408316864598340e-02,
+        -1.4144392402206662e-03,
+        3.6852652738654124e-02,
+        7.7149761552687490e-02,
+        5.6814285976509526e-01,
+        -7.0738211182030164e-02,
+        5.4514470128648518e-02,
+        -7.1339324275474125e-02,
+        9.8158535704203354e-03,
+        -8.3431069537701560e-03,
+        5.4072790262097083e-02,
+        -8.1976736911977682e-03,
+        7.6505804915597275e-03,
+        1.6869950835783332e-01,
+        2.1880432930426963e-02,
+        1.0308234746703970e-01,
+        9.1015395953307099e-02,
+        7.1788910181538768e-02,
+        -1.4119552688428305e-01,
+        -1.4977320631771729e-01,
+        -1.0982955047012899e-01,
+        2.3324521962640055e-01,
+        8.1569862372597679e-01,
+        6.2848559999917952e-02,
+        -4.5341405643671506e-02,
+        -3.9134119664198064e-01,
+        4.1651372430088562e-01,
+        -5.8173709994663803e-01,
+        6.6155672230934037e-01,
+        -6.4774042800560672e-01,
+        9.0924772156749301e-01,
+        2.0503134548416586e00,
+        1.9684008914564011e-01,
+        -3.1711040533580070e-01,
+        5.2891751962511613e-01,
+        8.7385258358844808e-02,
+        -1.5487618319904839e-01,
+        -7.1396830520028809e-01,
+        -1.0977171171532918e-01,
+        1.9792085656111236e-01,
+    ]
+).reshape(6, 9)
+expected_v2 = -np.array(
+    [
+        -7.042445481792056761e-01,
+        2.950213647777754078e-01,
+        5.329418202437231633e-01,
+        2.950213647777752968e-01,
+        -1.235900311906896754e-01,
+        -2.232594111831812944e-01,
+        5.329418202437232743e-01,
+        -2.232594111831813499e-01,
+        -4.033073234276823849e-01,
+        -8.949230984097404917e-01,
+        3.749002169013777030e-01,
+        6.772391014992630298e-01,
+        3.749002169013777586e-01,
+        -1.570527935667933583e-01,
+        -2.837082722496912512e-01,
+        6.772391014992631408e-01,
+        -2.837082722496912512e-01,
+        -5.125052659994422388e-01,
+        4.858210330291591605e-02,
+        -6.902596153269104431e-03,
+        6.682612642430500391e-03,
+        -5.612247004554610057e-03,
+        9.767795567660207592e-04,
+        -9.773758942738038254e-04,
+        5.638322117219018645e-03,
+        -9.483806049779926932e-04,
+        8.493873281881353637e-04,
+        -2.941738570564985666e-01,
+        -4.482529909499673171e-02,
+        4.091569840186781021e-02,
+        -4.509020615859140463e-02,
+        -1.013919988807244071e-01,
+        1.551440772665269030e-01,
+        4.181857726606644232e-02,
+        1.547200233064863484e-01,
+        -2.398213304685777592e-01,
+        -3.218625798524068354e-02,
+        -1.012438450438508421e-02,
+        1.271639330380921855e-02,
+        3.072814938490859779e-03,
+        -9.556241797915024372e-02,
+        1.512251983492413077e-01,
+        -8.277872384009607454e-03,
+        1.505412040827929787e-01,
+        -2.386150620881526407e-01,
+        -2.312295470054945568e-01,
+        -6.631490213524345034e-02,
+        7.932427266386249398e-02,
+        -8.053754366323923053e-02,
+        -3.294595881137418747e-02,
+        4.342495071150231922e-02,
+        1.004599500126941436e-01,
+        4.450400364869536163e-02,
+        -5.951077548033092968e-02,
+    ]
+).reshape(6, 9)
+
+box = np.array([0, 13, 0, 13, 0, 13, 0, 0, 0])
+coord = np.array(
+    [
+        [12.83, 2.56, 2.18],
+        [12.09, 2.87, 2.74],
+        [0.25, 3.32, 1.68],
+        [3.36, 3.00, 1.81],
+        [3.51, 2.51, 2.60],
+        [4.27, 3.22, 1.56],
+    ]
+)
+type_OH = np.array([1, 2, 2, 1, 2, 2])
+type_HO = np.array([2, 1, 1, 2, 1, 1])
+
+
+sp.check_output(
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file2.resolve()} -o {pb_file2.resolve()}".split()
+)
+
+
+def setup_module():
+    write_lmp_data(box, coord, type_OH, data_file)
+    write_lmp_data(box, coord, type_HO, data_type_map_file)
+    write_lmp_data(
+        box * constants.dist_metal2si,
+        coord * constants.dist_metal2si,
+        type_OH,
+        data_file_si,
+    )
+
+
+def teardown_module():
+    os.remove(data_file)
+    os.remove(data_type_map_file)
+
+
+def _lammps(data_file, units="metal") -> PyLammps:
+    lammps = PyLammps()
+    lammps.units(units)
+    lammps.boundary("f f f")
+    lammps.atom_style("atomic")
+    if units == "metal" or units == "real":
+        lammps.neighbor("2.0 bin")
+    elif units == "si":
+        lammps.neighbor("2.0e-10 bin")
+    else:
+        raise ValueError("units should be metal, real, or si")
+    lammps.neigh_modify("every 10 delay 0 check no")
+    lammps.read_data(data_file.resolve())
+    if units == "metal" or units == "real":
+        lammps.mass("1 16")
+        lammps.mass("2 2")
+    elif units == "si":
+        lammps.mass("1 %.10e" % (16 * constants.mass_metal2si))
+        lammps.mass("2 %.10e" % (2 * constants.mass_metal2si))
+    else:
+        raise ValueError("units should be metal, real, or si")
+    if units == "metal":
+        lammps.timestep(0.0005)
+    elif units == "real":
+        lammps.timestep(0.5)
+    elif units == "si":
+        lammps.timestep(5e-16)
+    else:
+        raise ValueError("units should be metal, real, or si")
+    lammps.fix("1 all nve")
+    return lammps
+
+
+@pytest.fixture
+def lammps():
+    lmp = _lammps(data_file=data_file)
+    yield lmp
+    lmp.close()
+
+
+@pytest.fixture
+def lammps_type_map():
+    lmp = _lammps(data_file=data_type_map_file)
+    yield lmp
+    lmp.close()
+
+
+@pytest.fixture
+def lammps_real():
+    lmp = _lammps(data_file=data_file, units="real")
+    yield lmp
+    lmp.close()
+
+
+@pytest.fixture
+def lammps_si():
+    lmp = _lammps(data_file=data_file_si, units="si")
+    yield lmp
+    lmp.close()
+
+
+def test_pair_deepmd(lammps):
+    lammps.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    lammps.run(1)
+
+
+def test_pair_deepmd_virial(lammps):
+    lammps.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps.pair_coeff("* *")
+    lammps.compute("virial all centroid/stress/atom NULL pair")
+    for ii in range(9):
+        jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
+        lammps.variable(f"virial{jj} atom c_virial[{ii+1}]")
+    lammps.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    idx_map = lammps.lmp.numpy.extract_atom("id") - 1
+    for ii in range(9):
+        assert np.array(
+            lammps.variables[f"virial{ii}"].value
+        ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
+
+
+def test_pair_deepmd_model_devi(lammps):
+    lammps.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
+    )
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
+
+
+def test_pair_deepmd_model_devi_virial(lammps):
+    lammps.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
+    )
+    lammps.pair_coeff("* *")
+    lammps.compute("virial all centroid/stress/atom NULL pair")
+    for ii in range(9):
+        jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
+        lammps.variable(f"virial{jj} atom c_virial[{ii+1}]")
+    lammps.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    idx_map = lammps.lmp.numpy.extract_atom("id") - 1
+    for ii in range(9):
+        assert np.array(
+            lammps.variables[f"virial{ii}"].value
+        ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
+
+
+def test_pair_deepmd_model_devi_atomic_relative(lammps):
+    relative = 1.0
+    lammps.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative}"
+    )
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    norm = np.linalg.norm(np.mean([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f /= norm + relative
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
+
+
+def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
+    relative = 1.0
+    lammps.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative}"
+    )
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    norm = (
+        np.abs(
+            np.mean([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0)
+        )
+        / 6
+    )
+    expected_md_v /= norm + relative
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
+
+
+def test_pair_deepmd_type_map(lammps_type_map):
+    lammps_type_map.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps_type_map.pair_coeff("* * H O")
+    lammps_type_map.run(0)
+    assert lammps_type_map.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps_type_map.atoms[ii].force == pytest.approx(
+            expected_f[lammps_type_map.atoms[ii].id - 1]
+        )
+    lammps_type_map.run(1)
+
+
+def test_pair_deepmd_real(lammps_real):
+    lammps_real.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps_real.pair_coeff("* *")
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    lammps_real.run(1)
+
+
+def test_pair_deepmd_virial_real(lammps_real):
+    lammps_real.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps_real.pair_coeff("* *")
+    lammps_real.compute("virial all centroid/stress/atom NULL pair")
+    for ii in range(9):
+        jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
+        lammps_real.variable(f"virial{jj} atom c_virial[{ii+1}]")
+    lammps_real.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    idx_map = lammps_real.lmp.numpy.extract_atom("id") - 1
+    for ii in range(9):
+        assert np.array(
+            lammps_real.variables[f"virial{ii}"].value
+        ) / constants.nktv2p_real == pytest.approx(
+            expected_v[idx_map, ii] * constants.ener_metal2real
+        )
+
+
+def test_pair_deepmd_model_devi_real(lammps_real):
+    lammps_real.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
+    )
+    lammps_real.pair_coeff("* *")
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f * constants.force_metal2real)
+    assert md[4] == pytest.approx(np.max(expected_md_f) * constants.force_metal2real)
+    assert md[5] == pytest.approx(np.min(expected_md_f) * constants.force_metal2real)
+    assert md[6] == pytest.approx(np.mean(expected_md_f) * constants.force_metal2real)
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v) * constants.ener_metal2real)
+    assert md[2] == pytest.approx(np.min(expected_md_v) * constants.ener_metal2real)
+    assert md[3] == pytest.approx(
+        np.sqrt(np.mean(np.square(expected_md_v))) * constants.ener_metal2real
+    )
+
+
+def test_pair_deepmd_model_devi_virial_real(lammps_real):
+    lammps_real.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
+    )
+    lammps_real.pair_coeff("* *")
+    lammps_real.compute("virial all centroid/stress/atom NULL pair")
+    for ii in range(9):
+        jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
+        lammps_real.variable(f"virial{jj} atom c_virial[{ii+1}]")
+    lammps_real.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    idx_map = lammps_real.lmp.numpy.extract_atom("id") - 1
+    for ii in range(9):
+        assert np.array(
+            lammps_real.variables[f"virial{ii}"].value
+        ) / constants.nktv2p_real == pytest.approx(
+            expected_v[idx_map, ii] * constants.ener_metal2real
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f * constants.force_metal2real)
+    assert md[4] == pytest.approx(np.max(expected_md_f) * constants.force_metal2real)
+    assert md[5] == pytest.approx(np.min(expected_md_f) * constants.force_metal2real)
+    assert md[6] == pytest.approx(np.mean(expected_md_f) * constants.force_metal2real)
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v) * constants.ener_metal2real)
+    assert md[2] == pytest.approx(np.min(expected_md_v) * constants.ener_metal2real)
+    assert md[3] == pytest.approx(
+        np.sqrt(np.mean(np.square(expected_md_v))) * constants.ener_metal2real
+    )
+
+
+def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
+    relative = 1.0
+    lammps_real.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative * constants.force_metal2real}"
+    )
+    lammps_real.pair_coeff("* *")
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    norm = np.linalg.norm(np.mean([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f /= norm + relative
+    assert md[7:] == pytest.approx(expected_md_f * constants.force_metal2real)
+    assert md[4] == pytest.approx(np.max(expected_md_f) * constants.force_metal2real)
+    assert md[5] == pytest.approx(np.min(expected_md_f) * constants.force_metal2real)
+    assert md[6] == pytest.approx(np.mean(expected_md_f) * constants.force_metal2real)
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v) * constants.ener_metal2real)
+    assert md[2] == pytest.approx(np.min(expected_md_v) * constants.ener_metal2real)
+    assert md[3] == pytest.approx(
+        np.sqrt(np.mean(np.square(expected_md_v))) * constants.ener_metal2real
+    )
+
+
+def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real):
+    relative = 1.0
+    lammps_real.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative * constants.ener_metal2real}"
+    )
+    lammps_real.pair_coeff("* *")
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f * constants.force_metal2real)
+    assert md[4] == pytest.approx(np.max(expected_md_f) * constants.force_metal2real)
+    assert md[5] == pytest.approx(np.min(expected_md_f) * constants.force_metal2real)
+    assert md[6] == pytest.approx(np.mean(expected_md_f) * constants.force_metal2real)
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    norm = (
+        np.abs(
+            np.mean([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0)
+        )
+        / 6
+    )
+    expected_md_v /= norm + relative
+    assert md[1] == pytest.approx(np.max(expected_md_v) * constants.ener_metal2real)
+    assert md[2] == pytest.approx(np.min(expected_md_v) * constants.ener_metal2real)
+    assert md[3] == pytest.approx(
+        np.sqrt(np.mean(np.square(expected_md_v))) * constants.ener_metal2real
+    )
+
+
+def test_pair_deepmd_si(lammps_si):
+    lammps_si.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps_si.pair_coeff("* *")
+    lammps_si.run(0)
+    assert lammps_si.eval("pe") == pytest.approx(expected_e * constants.ener_metal2si)
+    for ii in range(6):
+        assert lammps_si.atoms[ii].force == pytest.approx(
+            expected_f[lammps_si.atoms[ii].id - 1] * constants.force_metal2si
+        )
+    lammps_si.run(1)
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+@pytest.mark.parametrize(
+    ("balance_args",),
+    [(["--balance"],), ([],)],
+)
+def test_pair_deepmd_mpi(balance_args: list):
+    if balance_args == []:
+        # python:5331 terminated with signal 11 at PC=7f3e940e3806 SP=7ffd5787edc0.  Backtrace:
+        # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x95806)[0x7f3e940e3806]
+        # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x8f76e)[0x7f3e940dd76e]
+        # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x9a38a)[0x7f3e940e838a]
+        # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(_Z9border_opRKN2at6TensorES2_S2_S2_S2_S2_S2_S2_S2_+0x8e)[0x7f3e940dda63]
+        # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0xaeac3)[0x7f3e940fcac3]
+        pytest.skip(reason="Known segfault, see comments for details")
+    with tempfile.NamedTemporaryFile() as f:
+        sp.check_call(
+            [
+                "mpirun",
+                "-n",
+                "2",
+                sys.executable,
+                Path(__file__).parent / "run_mpi_pair_deepmd.py",
+                data_file,
+                pb_file,
+                pb_file2,
+                md_file,
+                f.name,
+                *balance_args,
+                "--nopbc",
+            ]
+        )
+        arr = np.loadtxt(f.name, ndmin=1)
+    pe = arr[0]
+
+    relative = 1.0
+    assert pe == pytest.approx(expected_e)
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    norm = np.linalg.norm(np.mean([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f /= norm + relative
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))

From cc4b23d40c15479c0ea47783333efa789bb9ca8f Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 17 Oct 2024 12:21:33 -0400
Subject: [PATCH 38/39] chore: move `LearningRateExp` to
 `deepmd.utils.learning_rate` (#4219)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a new exponential decay learning rate scheduler to enhance
training efficiency.
- Added functionality to compute learning rates at specific training
steps.

- **Bug Fixes**
- Removed the outdated `LearningRateExp` class from the previous module
to avoid confusion.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/dpmodel/utils/learning_rate.py | 53 +++++++++++++++++++++++++
 deepmd/pt/utils/learning_rate.py      | 57 +++------------------------
 2 files changed, 59 insertions(+), 51 deletions(-)
 create mode 100644 deepmd/dpmodel/utils/learning_rate.py

diff --git a/deepmd/dpmodel/utils/learning_rate.py b/deepmd/dpmodel/utils/learning_rate.py
new file mode 100644
index 0000000000..5997b7d63a
--- /dev/null
+++ b/deepmd/dpmodel/utils/learning_rate.py
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+
+class LearningRateExp:
+    def __init__(
+        self,
+        start_lr,
+        stop_lr,
+        decay_steps,
+        stop_steps,
+        decay_rate=None,
+        **kwargs,
+    ):
+        """
+        Construct an exponential-decayed learning rate.
+
+        Parameters
+        ----------
+        start_lr
+            The learning rate at the start of the training.
+        stop_lr
+            The desired learning rate at the end of the training.
+            When decay_rate is explicitly set, this value will serve as
+            the minimum learning rate during training. In other words,
+            if the learning rate decays below stop_lr, stop_lr will be applied instead.
+        decay_steps
+            The learning rate is decaying every this number of training steps.
+        stop_steps
+            The total training steps for learning rate scheduler.
+        decay_rate
+            The decay rate for the learning rate.
+            If provided, the decay rate will be set instead of
+            calculating it through interpolation between start_lr and stop_lr.
+        """
+        self.start_lr = start_lr
+        default_ds = 100 if stop_steps // 10 > 100 else stop_steps // 100 + 1
+        self.decay_steps = decay_steps
+        if self.decay_steps >= stop_steps:
+            self.decay_steps = default_ds
+        self.decay_rate = np.exp(
+            np.log(stop_lr / self.start_lr) / (stop_steps / self.decay_steps)
+        )
+        if decay_rate is not None:
+            self.decay_rate = decay_rate
+        self.min_lr = stop_lr
+
+    def value(self, step) -> np.float64:
+        """Get the learning rate at the given step."""
+        step_lr = self.start_lr * np.power(self.decay_rate, step // self.decay_steps)
+        if step_lr < self.min_lr:
+            step_lr = self.min_lr
+        return step_lr
diff --git a/deepmd/pt/utils/learning_rate.py b/deepmd/pt/utils/learning_rate.py
index 94c657abd4..3502434bc0 100644
--- a/deepmd/pt/utils/learning_rate.py
+++ b/deepmd/pt/utils/learning_rate.py
@@ -1,53 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import numpy as np
+from deepmd.dpmodel.utils.learning_rate import (
+    LearningRateExp,
+)
 
-
-class LearningRateExp:
-    def __init__(
-        self,
-        start_lr,
-        stop_lr,
-        decay_steps,
-        stop_steps,
-        decay_rate=None,
-        **kwargs,
-    ):
-        """
-        Construct an exponential-decayed learning rate.
-
-        Parameters
-        ----------
-        start_lr
-            The learning rate at the start of the training.
-        stop_lr
-            The desired learning rate at the end of the training.
-            When decay_rate is explicitly set, this value will serve as
-            the minimum learning rate during training. In other words,
-            if the learning rate decays below stop_lr, stop_lr will be applied instead.
-        decay_steps
-            The learning rate is decaying every this number of training steps.
-        stop_steps
-            The total training steps for learning rate scheduler.
-        decay_rate
-            The decay rate for the learning rate.
-            If provided, the decay rate will be set instead of
-            calculating it through interpolation between start_lr and stop_lr.
-        """
-        self.start_lr = start_lr
-        default_ds = 100 if stop_steps // 10 > 100 else stop_steps // 100 + 1
-        self.decay_steps = decay_steps
-        if self.decay_steps >= stop_steps:
-            self.decay_steps = default_ds
-        self.decay_rate = np.exp(
-            np.log(stop_lr / self.start_lr) / (stop_steps / self.decay_steps)
-        )
-        if decay_rate is not None:
-            self.decay_rate = decay_rate
-        self.min_lr = stop_lr
-
-    def value(self, step):
-        """Get the learning rate at the given step."""
-        step_lr = self.start_lr * np.power(self.decay_rate, step // self.decay_steps)
-        if step_lr < self.min_lr:
-            step_lr = self.min_lr
-        return step_lr
+__all__ = [
+    "LearningRateExp",
+]

From c2944eb37a736ffd4c51b697fefe5518d3653c2a Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Sun, 20 Oct 2024 13:50:54 +0800
Subject: [PATCH 39/39] fix(pt): remove deprecating torch.norm (#4233)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Chores**
- Updated the normalization method for improved consistency and
potential optimization.
	- Minor code formatting adjustments for enhanced readability.
- Ensured compatibility with existing function calls by preserving
parameters and return types.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 deepmd/pt/utils/nlist.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/pt/utils/nlist.py b/deepmd/pt/utils/nlist.py
index a4f81a23a5..1060b40ce1 100644
--- a/deepmd/pt/utils/nlist.py
+++ b/deepmd/pt/utils/nlist.py
@@ -457,7 +457,7 @@ def extend_coord_with_ghosts(
         xyz = xyz.view(-1, 3)
         xyz = xyz.to(device=device, non_blocking=True)
         # ns x 3
-        shift_idx = xyz[torch.argsort(torch.norm(xyz, dim=1))]
+        shift_idx = xyz[torch.argsort(torch.linalg.norm(xyz, dim=-1))]
         ns, _ = shift_idx.shape
         nall = ns * nloc
         # nf x ns x 3