From 5394854d80d1d0baae8fb6ed65b70ff055888a11 Mon Sep 17 00:00:00 2001
From: Jia-Xin Zhu <53895049+ChiahsinChu@users.noreply.github.com>
Date: Sun, 27 Oct 2024 01:08:06 +0800
Subject: [PATCH 01/14] feat(pt): support `use_aparam_as_mask` for pt backend
 (#4246)

support `use_aparam_as_mask` for pt backend

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced `use_aparam_as_mask` parameter in `GeneralFitting`,
`InvarFitting`, and `EnerFitting` classes, allowing users to
conditionally exclude atomic parameters from fitting processes.
- Added `seed` parameter to `InvarFitting` for enhanced control over
randomness.
- New test method `test_use_aparam_as_mask` in `TestInvarFitting` to
validate behavior based on the new parameter.

- **Bug Fixes**
	- Improved error handling for `use_aparam_as_mask` in various classes.

- **Tests**
- Enhanced parameterization in multiple test classes to accommodate new
features related to atomic parameters.
- Updated test methods in `TestInvarFitting` to include
`use_aparam_as_mask` for comprehensive testing.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 deepmd/dpmodel/fitting/general_fitting.py     |  8 +++-
 deepmd/dpmodel/fitting/invar_fitting.py       |  4 --
 deepmd/pt/model/task/fitting.py               | 15 +++++--
 deepmd/pt/model/task/invar_fitting.py         |  5 ++-
 deepmd/tf/fit/ener.py                         | 32 +++++++++-----
 source/tests/consistent/fitting/common.py     |  8 +++-
 source/tests/consistent/fitting/test_dos.py   | 22 ++++++++++
 source/tests/consistent/fitting/test_ener.py  | 34 ++++++++++++--
 .../tests/consistent/fitting/test_property.py | 18 ++++++++
 source/tests/pt/model/test_ener_fitting.py    | 44 ++++++++++++++++++-
 10 files changed, 162 insertions(+), 28 deletions(-)

diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
index 62aafc6207..25d15b2e75 100644
--- a/deepmd/dpmodel/fitting/general_fitting.py
+++ b/deepmd/dpmodel/fitting/general_fitting.py
@@ -173,7 +173,11 @@ def __init__(
         else:
             self.aparam_avg, self.aparam_inv_std = None, None
         # init networks
-        in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
+        in_dim = (
+            self.dim_descrpt
+            + self.numb_fparam
+            + (0 if self.use_aparam_as_mask else self.numb_aparam)
+        )
         self.nets = NetworkCollection(
             1 if not self.mixed_types else 0,
             self.ntypes,
@@ -401,7 +405,7 @@ def _call_common(
                     axis=-1,
                 )
         # check aparam dim, concate to input descriptor
-        if self.numb_aparam > 0:
+        if self.numb_aparam > 0 and not self.use_aparam_as_mask:
             assert aparam is not None, "aparam should not be None"
             if aparam.shape[-1] != self.numb_aparam:
                 raise ValueError(
diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py
index 893853bb38..2a251834fe 100644
--- a/deepmd/dpmodel/fitting/invar_fitting.py
+++ b/deepmd/dpmodel/fitting/invar_fitting.py
@@ -139,10 +139,6 @@ def __init__(
             raise NotImplementedError("tot_ener_zero is not implemented")
         if spin is not None:
             raise NotImplementedError("spin is not implemented")
-        if use_aparam_as_mask:
-            raise NotImplementedError("use_aparam_as_mask is not implemented")
-        if use_aparam_as_mask:
-            raise NotImplementedError("use_aparam_as_mask is not implemented")
         if layer_name is not None:
             raise NotImplementedError("layer_name is not implemented")
 
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 6e9829e4b6..15837aca98 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -126,6 +126,8 @@ class GeneralFitting(Fitting):
         length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list.
     type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
+    use_aparam_as_mask: bool
+        If True, the aparam will not be used in fitting net for embedding.
     """
 
     def __init__(
@@ -147,6 +149,7 @@ def __init__(
         trainable: Union[bool, list[bool]] = True,
         remove_vaccum_contribution: Optional[list[bool]] = None,
         type_map: Optional[list[str]] = None,
+        use_aparam_as_mask: bool = False,
         **kwargs,
     ):
         super().__init__()
@@ -164,6 +167,7 @@ def __init__(
         self.rcond = rcond
         self.seed = seed
         self.type_map = type_map
+        self.use_aparam_as_mask = use_aparam_as_mask
         # order matters, should be place after the assignment of ntypes
         self.reinit_exclude(exclude_types)
         self.trainable = trainable
@@ -208,7 +212,11 @@ def __init__(
         else:
             self.aparam_avg, self.aparam_inv_std = None, None
 
-        in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
+        in_dim = (
+            self.dim_descrpt
+            + self.numb_fparam
+            + (0 if self.use_aparam_as_mask else self.numb_aparam)
+        )
 
         self.filter_layers = NetworkCollection(
             1 if not self.mixed_types else 0,
@@ -293,13 +301,12 @@ def serialize(self) -> dict:
             # "trainable": self.trainable ,
             # "atom_ener": self.atom_ener ,
             # "layer_name": self.layer_name ,
-            # "use_aparam_as_mask": self.use_aparam_as_mask ,
             # "spin": self.spin ,
             ## NOTICE:  not supported by far
             "tot_ener_zero": False,
             "trainable": [self.trainable] * (len(self.neuron) + 1),
             "layer_name": None,
-            "use_aparam_as_mask": False,
+            "use_aparam_as_mask": self.use_aparam_as_mask,
             "spin": None,
         }
 
@@ -441,7 +448,7 @@ def _forward_common(
                     dim=-1,
                 )
         # check aparam dim, concate to input descriptor
-        if self.numb_aparam > 0:
+        if self.numb_aparam > 0 and not self.use_aparam_as_mask:
             assert aparam is not None, "aparam should not be None"
             assert self.aparam_avg is not None
             assert self.aparam_inv_std is not None
diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py
index 230046b74b..e76e1d2063 100644
--- a/deepmd/pt/model/task/invar_fitting.py
+++ b/deepmd/pt/model/task/invar_fitting.py
@@ -77,7 +77,8 @@ class InvarFitting(GeneralFitting):
         The `set_davg_zero` key in the descrptor should be set.
     type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
-
+    use_aparam_as_mask: bool
+        If True, the aparam will not be used in fitting net for embedding.
     """
 
     def __init__(
@@ -99,6 +100,7 @@ def __init__(
         exclude_types: list[int] = [],
         atom_ener: Optional[list[Optional[torch.Tensor]]] = None,
         type_map: Optional[list[str]] = None,
+        use_aparam_as_mask: bool = False,
         **kwargs,
     ):
         self.dim_out = dim_out
@@ -122,6 +124,7 @@ def __init__(
             if atom_ener is None or len([x for x in atom_ener if x is not None]) == 0
             else [x is not None for x in atom_ener],
             type_map=type_map,
+            use_aparam_as_mask=use_aparam_as_mask,
             **kwargs,
         )
 
diff --git a/deepmd/tf/fit/ener.py b/deepmd/tf/fit/ener.py
index b01574cf87..330ea57179 100644
--- a/deepmd/tf/fit/ener.py
+++ b/deepmd/tf/fit/ener.py
@@ -384,7 +384,7 @@ def _build_lower(
             ext_fparam = tf.reshape(ext_fparam, [-1, self.numb_fparam])
             ext_fparam = tf.cast(ext_fparam, self.fitting_precision)
             layer = tf.concat([layer, ext_fparam], axis=1)
-        if aparam is not None:
+        if aparam is not None and not self.use_aparam_as_mask:
             ext_aparam = tf.slice(
                 aparam,
                 [0, start_index * self.numb_aparam],
@@ -561,7 +561,7 @@ def build(
                     trainable=False,
                     initializer=tf.constant_initializer(self.fparam_inv_std),
                 )
-            if self.numb_aparam > 0:
+            if self.numb_aparam > 0 and not self.use_aparam_as_mask:
                 t_aparam_avg = tf.get_variable(
                     "t_aparam_avg",
                     self.numb_aparam,
@@ -576,6 +576,13 @@ def build(
                     trainable=False,
                     initializer=tf.constant_initializer(self.aparam_inv_std),
                 )
+            else:
+                t_aparam_avg = tf.zeros(
+                    self.numb_aparam, dtype=GLOBAL_TF_FLOAT_PRECISION
+                )
+                t_aparam_istd = tf.ones(
+                    self.numb_aparam, dtype=GLOBAL_TF_FLOAT_PRECISION
+                )
 
         inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt])
         if len(self.atom_ener):
@@ -602,12 +609,11 @@ def build(
             fparam = (fparam - t_fparam_avg) * t_fparam_istd
 
         aparam = None
-        if not self.use_aparam_as_mask:
-            if self.numb_aparam > 0:
-                aparam = input_dict["aparam"]
-                aparam = tf.reshape(aparam, [-1, self.numb_aparam])
-                aparam = (aparam - t_aparam_avg) * t_aparam_istd
-                aparam = tf.reshape(aparam, [-1, self.numb_aparam * natoms[0]])
+        if self.numb_aparam > 0 and not self.use_aparam_as_mask:
+            aparam = input_dict["aparam"]
+            aparam = tf.reshape(aparam, [-1, self.numb_aparam])
+            aparam = (aparam - t_aparam_avg) * t_aparam_istd
+            aparam = tf.reshape(aparam, [-1, self.numb_aparam * natoms[0]])
 
         atype_nall = tf.reshape(atype, [-1, natoms[1]])
         self.atype_nloc = tf.slice(
@@ -783,7 +789,7 @@ def init_variables(
             self.fparam_inv_std = get_tensor_by_name_from_graph(
                 graph, f"fitting_attr{suffix}/t_fparam_istd"
             )
-        if self.numb_aparam > 0:
+        if self.numb_aparam > 0 and not self.use_aparam_as_mask:
             self.aparam_avg = get_tensor_by_name_from_graph(
                 graph, f"fitting_attr{suffix}/t_aparam_avg"
             )
@@ -883,7 +889,7 @@ def deserialize(cls, data: dict, suffix: str = ""):
         if fitting.numb_fparam > 0:
             fitting.fparam_avg = data["@variables"]["fparam_avg"]
             fitting.fparam_inv_std = data["@variables"]["fparam_inv_std"]
-        if fitting.numb_aparam > 0:
+        if fitting.numb_aparam > 0 and not fitting.use_aparam_as_mask:
             fitting.aparam_avg = data["@variables"]["aparam_avg"]
             fitting.aparam_inv_std = data["@variables"]["aparam_inv_std"]
         return fitting
@@ -922,7 +928,11 @@ def serialize(self, suffix: str = "") -> dict:
             "nets": self.serialize_network(
                 ntypes=self.ntypes,
                 ndim=0 if self.mixed_types else 1,
-                in_dim=self.dim_descrpt + self.numb_fparam + self.numb_aparam,
+                in_dim=(
+                    self.dim_descrpt
+                    + self.numb_fparam
+                    + (0 if self.use_aparam_as_mask else self.numb_aparam)
+                ),
                 neuron=self.n_neuron,
                 activation_function=self.activation_function_name,
                 resnet_dt=self.resnet_dt,
diff --git a/source/tests/consistent/fitting/common.py b/source/tests/consistent/fitting/common.py
index bdd4b7cf81..95557d9ab8 100644
--- a/source/tests/consistent/fitting/common.py
+++ b/source/tests/consistent/fitting/common.py
@@ -18,7 +18,7 @@
 class FittingTest:
     """Useful utilities for descriptor tests."""
 
-    def build_tf_fitting(self, obj, inputs, natoms, atype, fparam, suffix):
+    def build_tf_fitting(self, obj, inputs, natoms, atype, fparam, aparam, suffix):
         t_inputs = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_inputs")
         t_natoms = tf.placeholder(tf.int32, natoms.shape, name="i_natoms")
         t_atype = tf.placeholder(tf.int32, [None], name="i_atype")
@@ -30,6 +30,12 @@ def build_tf_fitting(self, obj, inputs, natoms, atype, fparam, suffix):
             )
             extras["fparam"] = t_fparam
             feed_dict[t_fparam] = fparam
+        if aparam is not None:
+            t_aparam = tf.placeholder(
+                GLOBAL_TF_FLOAT_PRECISION, [None, None], name="i_aparam"
+            )
+            extras["aparam"] = t_aparam
+            feed_dict[t_aparam] = aparam
         t_out = obj.build(
             t_inputs,
             t_natoms,
diff --git a/source/tests/consistent/fitting/test_dos.py b/source/tests/consistent/fitting/test_dos.py
index 4a78b69341..774e3f655e 100644
--- a/source/tests/consistent/fitting/test_dos.py
+++ b/source/tests/consistent/fitting/test_dos.py
@@ -58,6 +58,7 @@
     ("float64", "float32"),  # precision
     (True, False),  # mixed_types
     (0, 1),  # numb_fparam
+    (0, 1),  # numb_aparam
     (10, 20),  # numb_dos
 )
 class TestDOS(CommonTest, FittingTest, unittest.TestCase):
@@ -68,6 +69,7 @@ def data(self) -> dict:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         return {
@@ -75,6 +77,7 @@ def data(self) -> dict:
             "resnet_dt": resnet_dt,
             "precision": precision,
             "numb_fparam": numb_fparam,
+            "numb_aparam": numb_aparam,
             "seed": 20240217,
             "numb_dos": numb_dos,
         }
@@ -86,6 +89,7 @@ def skip_pt(self) -> bool:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         return CommonTest.skip_pt
@@ -115,6 +119,9 @@ def setUp(self):
         # inconsistent if not sorted
         self.atype.sort()
         self.fparam = -np.ones((1,), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.aparam = np.zeros_like(
+            self.atype, dtype=GLOBAL_NP_FLOAT_PRECISION
+        ).reshape(-1, 1)
 
     @property
     def addtional_data(self) -> dict:
@@ -123,6 +130,7 @@ def addtional_data(self) -> dict:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         return {
@@ -137,6 +145,7 @@ def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         return self.build_tf_fitting(
@@ -145,6 +154,7 @@ def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
             self.natoms,
             self.atype,
             self.fparam if numb_fparam else None,
+            self.aparam if numb_aparam else None,
             suffix,
         )
 
@@ -154,6 +164,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         return (
@@ -163,6 +174,9 @@ def eval_pt(self, pt_obj: Any) -> Any:
                 fparam=torch.from_numpy(self.fparam).to(device=PT_DEVICE)
                 if numb_fparam
                 else None,
+                aparam=torch.from_numpy(self.aparam).to(device=PT_DEVICE)
+                if numb_aparam
+                else None,
             )["dos"]
             .detach()
             .cpu()
@@ -175,12 +189,14 @@ def eval_dp(self, dp_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         return dp_obj(
             self.inputs,
             self.atype.reshape(1, -1),
             fparam=self.fparam if numb_fparam else None,
+            aparam=self.aparam if numb_aparam else None,
         )["dos"]
 
     def eval_jax(self, jax_obj: Any) -> Any:
@@ -189,6 +205,7 @@ def eval_jax(self, jax_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         return np.asarray(
@@ -196,6 +213,7 @@ def eval_jax(self, jax_obj: Any) -> Any:
                 jnp.asarray(self.inputs),
                 jnp.asarray(self.atype.reshape(1, -1)),
                 fparam=jnp.asarray(self.fparam) if numb_fparam else None,
+                aparam=jnp.asarray(self.aparam) if numb_aparam else None,
             )["dos"]
         )
 
@@ -206,6 +224,7 @@ def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         return np.asarray(
@@ -213,6 +232,7 @@ def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
                 array_api_strict.asarray(self.inputs),
                 array_api_strict.asarray(self.atype.reshape(1, -1)),
                 fparam=array_api_strict.asarray(self.fparam) if numb_fparam else None,
+                aparam=array_api_strict.asarray(self.aparam) if numb_aparam else None,
             )["dos"]
         )
 
@@ -230,6 +250,7 @@ def rtol(self) -> float:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         if precision == "float64":
@@ -247,6 +268,7 @@ def atol(self) -> float:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             numb_dos,
         ) = self.param
         if precision == "float64":
diff --git a/source/tests/consistent/fitting/test_ener.py b/source/tests/consistent/fitting/test_ener.py
index ba2be1d86b..e32410a0ec 100644
--- a/source/tests/consistent/fitting/test_ener.py
+++ b/source/tests/consistent/fitting/test_ener.py
@@ -60,6 +60,7 @@
     ("float64", "float32", "bfloat16"),  # precision
     (True, False),  # mixed_types
     (0, 1),  # numb_fparam
+    ((0, False), (1, False), (1, True)),  # (numb_aparam, use_aparam_as_mask)
     ([], [-12345.6, None]),  # atom_ener
 )
 class TestEner(CommonTest, FittingTest, unittest.TestCase):
@@ -70,6 +71,7 @@ def data(self) -> dict:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         return {
@@ -77,8 +79,10 @@ def data(self) -> dict:
             "resnet_dt": resnet_dt,
             "precision": precision,
             "numb_fparam": numb_fparam,
+            "numb_aparam": numb_aparam,
             "seed": 20240217,
             "atom_ener": atom_ener,
+            "use_aparam_as_mask": use_aparam_as_mask,
         }
 
     @property
@@ -88,6 +92,7 @@ def skip_pt(self) -> bool:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         return CommonTest.skip_pt
@@ -101,6 +106,7 @@ def skip_array_api_strict(self) -> bool:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         # TypeError: The array_api_strict namespace does not support the dtype 'bfloat16'
@@ -123,6 +129,9 @@ def setUp(self):
         # inconsistent if not sorted
         self.atype.sort()
         self.fparam = -np.ones((1,), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.aparam = np.zeros_like(
+            self.atype, dtype=GLOBAL_NP_FLOAT_PRECISION
+        ).reshape(-1, 1)
 
     @property
     def addtional_data(self) -> dict:
@@ -131,6 +140,7 @@ def addtional_data(self) -> dict:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         return {
@@ -145,6 +155,7 @@ def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         return self.build_tf_fitting(
@@ -153,6 +164,7 @@ def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
             self.natoms,
             self.atype,
             self.fparam if numb_fparam else None,
+            self.aparam if numb_aparam else None,
             suffix,
         )
 
@@ -162,15 +174,23 @@ def eval_pt(self, pt_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         return (
             pt_obj(
                 torch.from_numpy(self.inputs).to(device=PT_DEVICE),
                 torch.from_numpy(self.atype.reshape(1, -1)).to(device=PT_DEVICE),
-                fparam=torch.from_numpy(self.fparam).to(device=PT_DEVICE)
-                if numb_fparam
-                else None,
+                fparam=(
+                    torch.from_numpy(self.fparam).to(device=PT_DEVICE)
+                    if numb_fparam
+                    else None
+                ),
+                aparam=(
+                    torch.from_numpy(self.aparam).to(device=PT_DEVICE)
+                    if numb_aparam
+                    else None
+                ),
             )["energy"]
             .detach()
             .cpu()
@@ -183,12 +203,14 @@ def eval_dp(self, dp_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         return dp_obj(
             self.inputs,
             self.atype.reshape(1, -1),
             fparam=self.fparam if numb_fparam else None,
+            aparam=self.aparam if numb_aparam else None,
         )["energy"]
 
     def eval_jax(self, jax_obj: Any) -> Any:
@@ -197,6 +219,7 @@ def eval_jax(self, jax_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         return np.asarray(
@@ -204,6 +227,7 @@ def eval_jax(self, jax_obj: Any) -> Any:
                 jnp.asarray(self.inputs),
                 jnp.asarray(self.atype.reshape(1, -1)),
                 fparam=jnp.asarray(self.fparam) if numb_fparam else None,
+                aparam=jnp.asarray(self.aparam) if numb_aparam else None,
             )["energy"]
         )
 
@@ -214,6 +238,7 @@ def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         return np.asarray(
@@ -221,6 +246,7 @@ def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
                 array_api_strict.asarray(self.inputs),
                 array_api_strict.asarray(self.atype.reshape(1, -1)),
                 fparam=array_api_strict.asarray(self.fparam) if numb_fparam else None,
+                aparam=array_api_strict.asarray(self.aparam) if numb_aparam else None,
             )["energy"]
         )
 
@@ -238,6 +264,7 @@ def rtol(self) -> float:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         if precision == "float64":
@@ -257,6 +284,7 @@ def atol(self) -> float:
             precision,
             mixed_types,
             numb_fparam,
+            (numb_aparam, use_aparam_as_mask),
             atom_ener,
         ) = self.param
         if precision == "float64":
diff --git a/source/tests/consistent/fitting/test_property.py b/source/tests/consistent/fitting/test_property.py
index a9fb6b694a..beb21d9c04 100644
--- a/source/tests/consistent/fitting/test_property.py
+++ b/source/tests/consistent/fitting/test_property.py
@@ -40,6 +40,7 @@
     ("float64", "float32"),  # precision
     (True, False),  # mixed_types
     (0, 1),  # numb_fparam
+    (0, 1),  # numb_aparam
     (1, 3),  # task_dim
     (True, False),  # intensive
 )
@@ -51,6 +52,7 @@ def data(self) -> dict:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             task_dim,
             intensive,
         ) = self.param
@@ -59,6 +61,7 @@ def data(self) -> dict:
             "resnet_dt": resnet_dt,
             "precision": precision,
             "numb_fparam": numb_fparam,
+            "numb_aparam": numb_aparam,
             "seed": 20240217,
             "task_dim": task_dim,
             "intensive": intensive,
@@ -71,6 +74,7 @@ def skip_pt(self) -> bool:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             task_dim,
             intensive,
         ) = self.param
@@ -95,6 +99,9 @@ def setUp(self):
         # inconsistent if not sorted
         self.atype.sort()
         self.fparam = -np.ones((1,), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.aparam = np.zeros_like(
+            self.atype, dtype=GLOBAL_NP_FLOAT_PRECISION
+        ).reshape(-1, 1)
 
     @property
     def addtional_data(self) -> dict:
@@ -103,6 +110,7 @@ def addtional_data(self) -> dict:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             task_dim,
             intensive,
         ) = self.param
@@ -118,6 +126,7 @@ def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             task_dim,
             intensive,
         ) = self.param
@@ -127,6 +136,7 @@ def build_tf(self, obj: Any, suffix: str) -> tuple[list, dict]:
             self.natoms,
             self.atype,
             self.fparam if numb_fparam else None,
+            self.aparam if numb_aparam else None,
             suffix,
         )
 
@@ -136,6 +146,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             task_dim,
             intensive,
         ) = self.param
@@ -146,6 +157,9 @@ def eval_pt(self, pt_obj: Any) -> Any:
                 fparam=torch.from_numpy(self.fparam).to(device=PT_DEVICE)
                 if numb_fparam
                 else None,
+                aparam=torch.from_numpy(self.aparam).to(device=PT_DEVICE)
+                if numb_aparam
+                else None,
             )["property"]
             .detach()
             .cpu()
@@ -158,6 +172,7 @@ def eval_dp(self, dp_obj: Any) -> Any:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             task_dim,
             intensive,
         ) = self.param
@@ -165,6 +180,7 @@ def eval_dp(self, dp_obj: Any) -> Any:
             self.inputs,
             self.atype.reshape(1, -1),
             fparam=self.fparam if numb_fparam else None,
+            aparam=self.aparam if numb_aparam else None,
         )["property"]
 
     def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
@@ -181,6 +197,7 @@ def rtol(self) -> float:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             task_dim,
             intensive,
         ) = self.param
@@ -199,6 +216,7 @@ def atol(self) -> float:
             precision,
             mixed_types,
             numb_fparam,
+            numb_aparam,
             task_dim,
             intensive,
         ) = self.param
diff --git a/source/tests/pt/model/test_ener_fitting.py b/source/tests/pt/model/test_ener_fitting.py
index 5c55766455..acf0a47769 100644
--- a/source/tests/pt/model/test_ener_fitting.py
+++ b/source/tests/pt/model/test_ener_fitting.py
@@ -36,6 +36,7 @@ def setUp(self):
     def test_consistency(
         self,
     ):
+        # ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1600 is different from 1604)
         rng = np.random.default_rng(GLOBAL_SEED)
         nf, nloc, nnei = self.nlist.shape
         dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
@@ -46,13 +47,14 @@ def test_consistency(
         )
         atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE)
 
-        for od, mixed_types, nfp, nap, et, nn in itertools.product(
+        for od, mixed_types, nfp, nap, et, nn, use_aparam_as_mask in itertools.product(
             [1, 3],
             [True, False],
             [0, 3],
             [0, 4],
             [[], [0], [1]],
             [[4, 4, 4], []],
+            [True, False],
         ):
             ft0 = InvarFitting(
                 "foo",
@@ -65,6 +67,7 @@ def test_consistency(
                 exclude_types=et,
                 neuron=nn,
                 seed=GLOBAL_SEED,
+                use_aparam_as_mask=use_aparam_as_mask,
             ).to(env.DEVICE)
             ft1 = DPInvarFitting.deserialize(ft0.serialize())
             ft2 = InvarFitting.deserialize(ft0.serialize())
@@ -105,12 +108,13 @@ def test_consistency(
     def test_jit(
         self,
     ):
-        for od, mixed_types, nfp, nap, et in itertools.product(
+        for od, mixed_types, nfp, nap, et, use_aparam_as_mask in itertools.product(
             [1, 3],
             [True, False],
             [0, 3],
             [0, 4],
             [[], [0]],
+            [True, False],
         ):
             ft0 = InvarFitting(
                 "foo",
@@ -122,6 +126,7 @@ def test_jit(
                 mixed_types=mixed_types,
                 exclude_types=et,
                 seed=GLOBAL_SEED,
+                use_aparam_as_mask=use_aparam_as_mask,
             ).to(env.DEVICE)
             torch.jit.script(ft0)
 
@@ -146,3 +151,38 @@ def test_get_set(self):
             np.testing.assert_allclose(
                 foo, np.reshape(ifn0[ii].detach().cpu().numpy(), foo.shape)
             )
+
+    def test_use_aparam_as_mask(self):
+        nap = 4
+        dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+
+        for od, mixed_types, nfp, et, nn in itertools.product(
+            [1, 3],
+            [True, False],
+            [0, 3],
+            [[], [0], [1]],
+            [[4, 4, 4], []],
+        ):
+            ft0 = InvarFitting(
+                "foo",
+                self.nt,
+                dd0.dim_out,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+                exclude_types=et,
+                neuron=nn,
+                seed=GLOBAL_SEED,
+                use_aparam_as_mask=True,
+            ).to(env.DEVICE)
+            in_dim = ft0.dim_descrpt + ft0.numb_fparam
+            assert ft0.filter_layers[0].in_dim == in_dim
+
+            ft1 = DPInvarFitting.deserialize(ft0.serialize())
+            in_dim = ft1.dim_descrpt + ft1.numb_fparam
+            assert ft1.nets[0].in_dim == in_dim
+
+            ft2 = InvarFitting.deserialize(ft0.serialize())
+            in_dim = ft2.dim_descrpt + ft2.numb_fparam
+            assert ft2.filter_layers[0].in_dim == in_dim

From 13e247ecb528d78ae8443c2a98bca37fa8459940 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Sun, 27 Oct 2024 02:25:18 +0800
Subject: [PATCH 02/14] fix(tf): fix compress suffix in DescrptDPA1Compat
 (#4243)

Fix #4114 .
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced compression capabilities in descriptor models with new
optional parameters for improved flexibility.
- Improved serialization processes for attention layers, allowing for
better handling of scaling factors and normalization.
- Dynamic tensor name construction in utility functions to accommodate
varying suffixes.

- **Bug Fixes**
- Adjusted method parameters to ensure compatibility and functionality
with new suffix options.

- **Tests**
- Introduced a new test suite to validate the functionality of the
TensorFlow-based descriptor model, ensuring consistent output with the
updated features.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/tf/descriptor/se_atten.py              |  57 ++++++-
 deepmd/tf/utils/compress.py                   |   4 +-
 deepmd/tf/utils/tabulate.py                   |   2 +-
 ...del_compression_dpa1_compat_suffix_only.py | 153 ++++++++++++++++++
 4 files changed, 212 insertions(+), 4 deletions(-)
 create mode 100644 source/tests/tf/test_model_compression_dpa1_compat_suffix_only.py

diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py
index 963e81ecf0..8d101f151c 100644
--- a/deepmd/tf/descriptor/se_atten.py
+++ b/deepmd/tf/descriptor/se_atten.py
@@ -423,6 +423,7 @@ def enable_compression(
         table_stride_2: float = 0.1,
         check_frequency: int = -1,
         suffix: str = "",
+        tebd_suffix: str = "",
     ) -> None:
         """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
 
@@ -444,6 +445,8 @@ def enable_compression(
             The overflow check frequency
         suffix : str, optional
             The suffix of the scope
+        tebd_suffix : str, optional
+            The suffix of the type embedding scope, only for DescrptDPA1Compat
         """
         # do some checks before the mocel compression process
         assert (
@@ -496,7 +499,9 @@ def enable_compression(
             min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
         )
 
-        self.final_type_embedding = get_two_side_type_embedding(self, graph)
+        self.final_type_embedding = get_two_side_type_embedding(
+            self, graph, suffix=tebd_suffix
+        )
         type_side_suffix = get_extra_embedding_net_suffix(type_one_side=False)
         self.matrix = get_extra_side_embedding_net_variable(
             self, graph_def, type_side_suffix, "matrix", suffix
@@ -2248,6 +2253,56 @@ def build(
             self.dout = tf.concat([self.dout, atom_embed], axis=-1)
         return self.dout
 
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        table_extrapolate: float = 5,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+        suffix: str = "",
+        tebd_suffix: str = "",
+    ) -> None:
+        """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
+
+        Parameters
+        ----------
+        min_nbor_dist
+            The nearest distance between atoms
+        graph : tf.Graph
+            The graph of the model
+        graph_def : tf.GraphDef
+            The graph_def of the model
+        table_extrapolate
+            The scale of model extrapolation
+        table_stride_1
+            The uniform stride of the first table
+        table_stride_2
+            The uniform stride of the second table
+        check_frequency
+            The overflow check frequency
+        suffix : str, optional
+            The suffix of the scope
+        tebd_suffix : str, optional
+            Same as suffix.
+        """
+        assert (
+            tebd_suffix == ""
+        ), "DescrptDPA1Compat must use the same tebd_suffix as suffix!"
+        super().enable_compression(
+            min_nbor_dist,
+            graph,
+            graph_def,
+            table_extrapolate=table_extrapolate,
+            table_stride_1=table_stride_1,
+            table_stride_2=table_stride_2,
+            check_frequency=check_frequency,
+            suffix=suffix,
+            tebd_suffix=suffix,
+        )
+
     def init_variables(
         self,
         graph: tf.Graph,
diff --git a/deepmd/tf/utils/compress.py b/deepmd/tf/utils/compress.py
index 0bce633573..f96b59920f 100644
--- a/deepmd/tf/utils/compress.py
+++ b/deepmd/tf/utils/compress.py
@@ -20,8 +20,8 @@ def get_type_embedding(self, graph):
     return type_embedding
 
 
-def get_two_side_type_embedding(self, graph):
-    type_embedding = get_tensor_by_name_from_graph(graph, "t_typeebd")
+def get_two_side_type_embedding(self, graph, suffix=""):
+    type_embedding = get_tensor_by_name_from_graph(graph, f"t_typeebd{suffix}")
     type_embedding = type_embedding.astype(self.filter_np_precision)
     type_embedding_shape = type_embedding.shape
 
diff --git a/deepmd/tf/utils/tabulate.py b/deepmd/tf/utils/tabulate.py
index 1dc6128f62..d68f5cadf7 100644
--- a/deepmd/tf/utils/tabulate.py
+++ b/deepmd/tf/utils/tabulate.py
@@ -126,7 +126,7 @@ def __init__(
         self.dstd = get_tensor_by_name_from_graph(
             self.graph, f"descrpt_attr{self.suffix}/t_std"
         )
-        self.ntypes = get_tensor_by_name_from_graph(self.graph, "descrpt_attr/ntypes")
+        self.ntypes = self.descrpt.get_ntypes()
 
         self.embedding_net_nodes = get_embedding_net_nodes_from_graph_def(
             self.graph_def, suffix=self.suffix
diff --git a/source/tests/tf/test_model_compression_dpa1_compat_suffix_only.py b/source/tests/tf/test_model_compression_dpa1_compat_suffix_only.py
new file mode 100644
index 0000000000..5557305a7a
--- /dev/null
+++ b/source/tests/tf/test_model_compression_dpa1_compat_suffix_only.py
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.common import (
+    make_default_mesh,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.tf.descriptor.se_atten import DescrptDPA1Compat as tf_SeAtten
+from deepmd.tf.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    tf,
+)
+from deepmd.tf.utils.sess import (
+    run_sess,
+)
+
+
+def build_tf_descriptor(obj, natoms, coords, atype, box, suffix):
+    t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+    t_type = tf.placeholder(tf.int32, [None], name="i_type")
+    t_natoms = tf.placeholder(tf.int32, natoms.shape, name="i_natoms")
+    t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [9], name="i_box")
+    t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+    t_des = obj.build(
+        t_coord,
+        t_type,
+        t_natoms,
+        t_box,
+        t_mesh,
+        {},
+        suffix=suffix,
+    )
+    return [t_des], {
+        t_coord: coords,
+        t_type: atype,
+        t_natoms: natoms,
+        t_box: box,
+        t_mesh: make_default_mesh(True, False),
+    }
+
+
+def build_eval_tf(sess, obj, natoms, coords, atype, box, suffix):
+    t_out, feed_dict = build_tf_descriptor(obj, natoms, coords, atype, box, suffix)
+
+    t_out_indentity = [
+        tf.identity(tt, name=f"o_{ii}_{suffix}") for ii, tt in enumerate(t_out)
+    ]
+    run_sess(sess, tf.global_variables_initializer())
+    return run_sess(
+        sess,
+        t_out_indentity,
+        feed_dict=feed_dict,
+    )
+
+
+class TestDescriptorSeA(unittest.TestCase):
+    def setUp(self):
+        self.device = "cpu"
+        self.seed = 21
+        self.sel = [9, 10]
+        self.rcut_smth = 5.80
+        self.rcut = 6.00
+        self.neuron = [6, 12, 24]
+        self.axis_neuron = 3
+        self.ntypes = 2
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        )
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32)
+        # self.atype = np.array([0, 0, 1, 1, 1, 1], dtype=np.int32)
+        self.box = np.array(
+            [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        )
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+        self.suffix = "test"
+        self.type_one_side = False
+        self.se_a_tf = tf_SeAtten(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+            self.ntypes,
+            self.neuron,
+            self.axis_neuron,
+            type_one_side=self.type_one_side,
+            seed=21,
+            precision="float32",
+            tebd_input_mode="strip",
+            temperature=1.0,
+            attn_layer=0,
+        )
+
+    def test_tf_pt_consistent(
+        self,
+    ):
+        with tf.Session(config=default_tf_session_config) as sess:
+            graph = tf.get_default_graph()
+            ret = build_eval_tf(
+                sess,
+                self.se_a_tf,
+                self.natoms,
+                self.coords,
+                self.atype,
+                self.box,
+                self.suffix,
+            )
+            output_graph_def = tf.graph_util.convert_variables_to_constants(
+                sess,
+                graph.as_graph_def(),
+                [f"o_{ii}_{self.suffix}" for ii, _ in enumerate(ret)],
+            )
+            with tf.Graph().as_default() as new_graph:
+                tf.import_graph_def(output_graph_def, name="")
+            self.se_a_tf.init_variables(
+                new_graph,
+                output_graph_def,
+                suffix=self.suffix,
+            )
+            self.se_a_tf.enable_compression(
+                1.0,
+                new_graph,
+                output_graph_def,
+                suffix=self.suffix,
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()

From aba932c78169804360c9bc4a4d1f39e3c81149eb Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 28 Oct 2024 00:56:57 -0400
Subject: [PATCH 03/14] ci: skip `test_data_equal` on the GPU machine (#4260)

This test crashes on the machine iZ0xih0eykcp6eddga4w5iZ with exit code
1:
https://github.com/deepmodeling/deepmd-kit/actions/runs/11533273426/job/32106001782

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Bug Fixes**
- Enhanced test execution control to ensure compatibility with CPU
environments during continuous integration.

- **Tests**
- Updated the `test_data_equal` method to conditionally skip tests based
on the testing device and CI status.
- Retained cleanup procedures in the `tearDown` method to ensure proper
test environment management.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/consistent/io/test_io.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/source/tests/consistent/io/test_io.py b/source/tests/consistent/io/test_io.py
index feafde234d..df81c24ff5 100644
--- a/source/tests/consistent/io/test_io.py
+++ b/source/tests/consistent/io/test_io.py
@@ -21,6 +21,11 @@
     DeepEval,
 )
 
+from ...utils import (
+    CI,
+    TEST_DEVICE,
+)
+
 infer_path = Path(__file__).parent.parent.parent / "infer"
 
 
@@ -66,6 +71,7 @@ def tearDown(self):
             elif Path(ii).is_dir():
                 shutil.rmtree(ii)
 
+    @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_data_equal(self):
         prefix = "test_consistent_io_" + self.__class__.__name__.lower()
         for backend_name in ("tensorflow", "pytorch", "dpmodel", "jax"):

From 39cddd4832873ca89c3b22e01e6edc1101f1de54 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 28 Oct 2024 01:02:10 -0400
Subject: [PATCH 04/14] feat(dev): setup devcontainer for developers (#4263)

See `.devcontainer/READMD.md` for details.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

## Release Notes

- **New Features**
	- Introduced a development container setup with a new Dockerfile.
- Added scripts for building C++ and Python components, downloading the
LibTorch library, and setting up the environment for LAMMPS simulations.
	- New README documentation for the development environment setup.
- New configuration file for the development container to streamline
setup processes.

- **Bug Fixes**
- Expanded `.gitignore` to prevent unnecessary files from being tracked.

- **Chores**
- Enhanced dependency management in `pyproject.toml` for improved
organization and clarity.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .devcontainer/Dockerfile           |  3 +++
 .devcontainer/READMD.md            | 35 ++++++++++++++++++++++++++++++
 .devcontainer/build_cxx.sh         | 21 ++++++++++++++++++
 .devcontainer/build_py.sh          |  8 +++++++
 .devcontainer/devcontainer.json    | 17 +++++++++++++++
 .devcontainer/download_libtorch.sh |  8 +++++++
 .devcontainer/gdb_lmp              |  9 ++++++++
 .devcontainer/lmp                  |  9 ++++++++
 .gitignore                         |  5 +++++
 pyproject.toml                     | 20 +++++++++++++++++
 10 files changed, 135 insertions(+)
 create mode 100644 .devcontainer/Dockerfile
 create mode 100644 .devcontainer/READMD.md
 create mode 100755 .devcontainer/build_cxx.sh
 create mode 100755 .devcontainer/build_py.sh
 create mode 100644 .devcontainer/devcontainer.json
 create mode 100755 .devcontainer/download_libtorch.sh
 create mode 100755 .devcontainer/gdb_lmp
 create mode 100755 .devcontainer/lmp

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
new file mode 100644
index 0000000000..18a2acda7f
--- /dev/null
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,3 @@
+FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-24.04
+
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
diff --git a/.devcontainer/READMD.md b/.devcontainer/READMD.md
new file mode 100644
index 0000000000..8e600a143f
--- /dev/null
+++ b/.devcontainer/READMD.md
@@ -0,0 +1,35 @@
+# DeePMD-kit devcontainer environment
+
+This [devcontainer](https://vscode.js.cn/docs/devcontainers/devcontainer-cli) environment setups Python and C++ environment to develop DeePMD-kit.
+One can setup locally or use [GitHub Codespaces](https://docs.github.com/en/codespaces) by clicking the Code button on the DeePMD-kit repository page.
+The whole setup process requires about 10 minutes, so one needs to be patient.
+
+## Python environment
+
+The following packages are installed into the Python environment `.venv`:
+
+- DeePMD-kit (in edit mode)
+- Backends including TensorFlow, PyTorch, JAX
+- LAMMPS
+- MPICH
+- CMake
+- pre-commit (including hooks)
+- Test packages including pytest
+- Doc packages including sphinx
+
+## C++ interface
+
+The C++ interface with TensorFlow and PyTorch support is installed into `dp` directory.
+
+When calling and debuging LAMMPS with DeePMD-kit, use the following scripts instead of the regular `lmp`:
+
+- `.devcontainer/lmp`
+- `.devcontainer/gdb_lmp`
+
+## Rebuild
+
+Usually the Python package does not need to reinstall.
+But when one wants to recompile the C++ code, the following scripts can be executed.
+
+- `.devcontainer/build_cxx.sh`
+- `.devcontainer/build_py.sh`
diff --git a/.devcontainer/build_cxx.sh b/.devcontainer/build_cxx.sh
new file mode 100755
index 0000000000..442539301e
--- /dev/null
+++ b/.devcontainer/build_cxx.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -ev
+
+NPROC=$(nproc --all)
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+
+export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch
+TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
+
+mkdir -p ${SCRIPT_PATH}/../buildcxx/
+cd ${SCRIPT_PATH}/../buildcxx/
+cmake -D ENABLE_TENSORFLOW=ON \
+	-D ENABLE_PYTORCH=ON \
+	-D CMAKE_INSTALL_PREFIX=${SCRIPT_PATH}/../dp/ \
+	-D LAMMPS_VERSION=stable_29Aug2024_update1 \
+	-D CMAKE_BUILD_TYPE=Debug \
+	-D BUILD_TESTING:BOOL=TRUE \
+	-D TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+	${SCRIPT_PATH}/../source
+cmake --build . -j${NPROC}
+cmake --install .
diff --git a/.devcontainer/build_py.sh b/.devcontainer/build_py.sh
new file mode 100755
index 0000000000..8e9a006a4f
--- /dev/null
+++ b/.devcontainer/build_py.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -ev
+
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+cd ${SCRIPT_PATH}/..
+
+uv sync --dev --python 3.12 --extra cpu --extra torch --extra jax --extra lmp --extra test --extra docs
+pre-commit install
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000000..27c40bbe6a
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,17 @@
+{
+  "name": "DeePMD-kit",
+  "build": {
+    "dockerfile": "Dockerfile"
+  },
+  "features": {
+    "ghcr.io/devcontainers/features/github-cli:1": {}
+  },
+  "postCreateCommand": ".devcontainer/build_py.sh && .devcontainer/download_libtorch.sh && .devcontainer/build_cxx.sh && pre-commit install-hooks",
+  "remoteEnv": {
+    "PATH": "${containerEnv:PATH}:${containerWorkspaceFolder}/.venv/bin",
+    "DP_ENABLE_PYTORCH": "1",
+    "DP_VARIANT": "cpu",
+    "LMP_CXX11_ABI_0": "1",
+    "UV_EXTRA_INDEX_URL": "https://download.pytorch.org/whl/cpu"
+  }
+}
diff --git a/.devcontainer/download_libtorch.sh b/.devcontainer/download_libtorch.sh
new file mode 100755
index 0000000000..d78b559997
--- /dev/null
+++ b/.devcontainer/download_libtorch.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -ev
+
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+cd ${SCRIPT_PATH}/..
+
+wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.5.0%2Bcpu.zip -O ~/libtorch.zip
+unzip ~/libtorch.zip
diff --git a/.devcontainer/gdb_lmp b/.devcontainer/gdb_lmp
new file mode 100755
index 0000000000..33e883780b
--- /dev/null
+++ b/.devcontainer/gdb_lmp
@@ -0,0 +1,9 @@
+#!/bin/bash
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+
+export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch
+TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
+
+env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \
+	LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \
+	gdb ${SCRIPT_PATH}/../.venv/lib/python3.12/site-packages/lammps/lmp "$@"
diff --git a/.devcontainer/lmp b/.devcontainer/lmp
new file mode 100755
index 0000000000..c8e781aa57
--- /dev/null
+++ b/.devcontainer/lmp
@@ -0,0 +1,9 @@
+#!/bin/bash
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+
+export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch
+TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
+
+env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \
+	LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \
+	${SCRIPT_PATH}/../.venv/bin/lmp "$@"
diff --git a/.gitignore b/.gitignore
index c531a76177..c574da757a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,3 +45,8 @@ build_c_tests
 build_c/
 libdeepmd_c/
 .uv/
+libtorch/
+uv.lock
+buildcxx/
+node_modules/
+*.bib.original
diff --git a/pyproject.toml b/pyproject.toml
index 6f0404174d..0a1b2e6731 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -136,9 +136,14 @@ cu12 = [
     "nvidia-cuda-nvcc-cu12",
 ]
 jax = [
+    # below is a funny workaround for
+    # https://github.com/astral-sh/uv/issues/8601
     'jax>=0.4.33;python_version>="3.10"',
+    'jax>=0.4.33;python_version>="3.10"',
+    'flax>=0.10.0;python_version>="3.10"',
     'flax>=0.10.0;python_version>="3.10"',
     'orbax-checkpoint;python_version>="3.10"',
+    'orbax-checkpoint;python_version>="3.10"',
     # The pinning of ml_dtypes may conflict with TF
     # 'jax-ai-stack;python_version>="3.10"',
 ]
@@ -146,6 +151,13 @@ jax = [
 [tool.deepmd_build_backend.scripts]
 dp = "deepmd.main:main"
 
+[dependency-groups]
+dev = [
+  "pre-commit",
+  "cmake",
+  "mpich",
+]
+
 [tool.setuptools_scm]
 
 [tool.scikit-build]
@@ -428,3 +440,11 @@ select = [
     "TOR1",
     "TOR2",
 ]
+
+[tool.uv.sources]
+mpich = { index = "mpi4py" }
+
+[[tool.uv.index]]
+name = "mpi4py"
+url = "https://pypi.anaconda.org/mpi4py/simple"
+explicit = true

From 04e1159b3f9b3bccd82ab91f0204f65c86cda914 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 28 Oct 2024 15:39:25 -0400
Subject: [PATCH 05/14] fix(pt): set device for PT C++ (#4261)

Fix #4171.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
	- Improved GPU initialization to ensure the correct device is utilized.
	- Enhanced error handling for clearer context on exceptions.

- **Bug Fixes**
- Updated error handling in multiple methods to catch and rethrow
specific exceptions.
- Added logic to handle communication-related tensors during
computation.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_cc/src/DeepPotPT.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc
index 4c7aac19b8..780a8007f3 100644
--- a/source/api_cc/src/DeepPotPT.cc
+++ b/source/api_cc/src/DeepPotPT.cc
@@ -80,6 +80,9 @@ void DeepPotPT::init(const std::string& model,
     device = torch::Device(torch::kCPU);
     std::cout << "load model from: " << model << " to cpu " << std::endl;
   } else {
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+    DPErrcheck(DPSetDevice(gpu_id));
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
     std::cout << "load model from: " << model << " to gpu " << gpu_id
               << std::endl;
   }

From 8f546cf262a15aa33032941162f8c6561423b33d Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 28 Oct 2024 19:54:48 -0400
Subject: [PATCH 06/14] docs: fix word spellings (#4264)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 backend/read_env.py                           |  2 +-
 deepmd/__init__.py                            |  2 +-
 deepmd/backend/suffix.py                      |  2 +-
 deepmd/calculator.py                          |  2 +-
 deepmd/common.py                              |  6 +--
 .../dpmodel/atomic_model/base_atomic_model.py |  2 +-
 .../dpmodel/atomic_model/dp_atomic_model.py   |  4 +-
 .../atomic_model/linear_atomic_model.py       |  4 +-
 .../atomic_model/make_base_atomic_model.py    |  4 +-
 deepmd/dpmodel/common.py                      |  2 +-
 deepmd/dpmodel/descriptor/descriptor.py       |  2 +-
 deepmd/dpmodel/descriptor/dpa1.py             | 14 +++---
 deepmd/dpmodel/descriptor/dpa2.py             |  8 ++--
 deepmd/dpmodel/descriptor/hybrid.py           |  6 +--
 .../descriptor/make_base_descriptor.py        |  4 +-
 deepmd/dpmodel/descriptor/repformers.py       |  8 ++--
 deepmd/dpmodel/descriptor/se_e2_a.py          |  8 ++--
 deepmd/dpmodel/descriptor/se_r.py             |  8 ++--
 deepmd/dpmodel/descriptor/se_t.py             |  6 +--
 deepmd/dpmodel/descriptor/se_t_tebd.py        | 14 +++---
 deepmd/dpmodel/fitting/general_fitting.py     | 14 +++---
 deepmd/dpmodel/fitting/invar_fitting.py       |  4 +-
 deepmd/dpmodel/fitting/property_fitting.py    |  2 +-
 deepmd/dpmodel/infer/deep_eval.py             |  4 +-
 deepmd/dpmodel/model/base_model.py            |  2 +-
 deepmd/dpmodel/model/dp_model.py              |  2 +-
 deepmd/dpmodel/model/make_model.py            |  8 ++--
 deepmd/dpmodel/model/transform_output.py      |  2 +-
 deepmd/dpmodel/output_def.py                  |  4 +-
 deepmd/dpmodel/utils/neighbor_stat.py         |  2 +-
 deepmd/dpmodel/utils/network.py               |  6 +--
 deepmd/dpmodel/utils/nlist.py                 | 10 ++--
 deepmd/dpmodel/utils/region.py                |  2 +-
 deepmd/driver.py                              |  2 +-
 deepmd/entrypoints/test.py                    |  4 +-
 deepmd/env.py                                 |  2 +-
 deepmd/infer/__init__.py                      |  2 +-
 deepmd/infer/deep_eval.py                     |  4 +-
 deepmd/infer/model_devi.py                    |  2 +-
 deepmd/loggers/loggers.py                     |  8 ++--
 deepmd/pt/cxx_op.py                           |  2 +-
 deepmd/pt/infer/deep_eval.py                  |  4 +-
 .../model/atomic_model/base_atomic_model.py   |  8 ++--
 .../pt/model/atomic_model/dp_atomic_model.py  |  2 +-
 .../model/atomic_model/linear_atomic_model.py |  4 +-
 .../atomic_model/pairtab_atomic_model.py      |  4 +-
 deepmd/pt/model/descriptor/descriptor.py      |  2 +-
 deepmd/pt/model/descriptor/dpa1.py            |  8 ++--
 deepmd/pt/model/descriptor/dpa2.py            |  8 ++--
 deepmd/pt/model/descriptor/hybrid.py          |  6 +--
 deepmd/pt/model/descriptor/repformer_layer.py |  4 +-
 deepmd/pt/model/descriptor/repformers.py      |  6 +--
 deepmd/pt/model/descriptor/se_a.py            |  8 ++--
 deepmd/pt/model/descriptor/se_atten.py        |  4 +-
 deepmd/pt/model/descriptor/se_r.py            |  8 ++--
 deepmd/pt/model/descriptor/se_t.py            |  8 ++--
 deepmd/pt/model/descriptor/se_t_tebd.py       | 12 ++---
 deepmd/pt/model/model/dp_linear_model.py      |  2 +-
 deepmd/pt/model/model/dp_model.py             |  2 +-
 deepmd/pt/model/model/dp_zbl_model.py         |  2 +-
 deepmd/pt/model/model/frozen.py               |  2 +-
 deepmd/pt/model/model/make_model.py           |  8 ++--
 deepmd/pt/model/model/spin_model.py           |  8 ++--
 deepmd/pt/model/network/init.py               |  2 +-
 deepmd/pt/model/network/network.py            |  2 +-
 deepmd/pt/model/task/denoise.py               |  2 +-
 deepmd/pt/model/task/ener.py                  |  2 +-
 deepmd/pt/model/task/fitting.py               | 12 ++---
 deepmd/pt/model/task/invar_fitting.py         |  4 +-
 deepmd/pt/model/task/property.py              |  2 +-
 deepmd/pt/model/task/type_predict.py          |  2 +-
 deepmd/pt/train/wrapper.py                    |  2 +-
 deepmd/pt/utils/dataloader.py                 |  2 +-
 deepmd/pt/utils/env_mat_stat.py               |  2 +-
 deepmd/pt/utils/neighbor_stat.py              |  2 +-
 deepmd/pt/utils/nlist.py                      |  6 +--
 deepmd/pt/utils/region.py                     |  2 +-
 deepmd/pt/utils/stat.py                       |  2 +-
 deepmd/tf/cluster/local.py                    |  2 +-
 deepmd/tf/descriptor/descriptor.py            |  6 +--
 deepmd/tf/descriptor/hybrid.py                |  8 ++--
 deepmd/tf/descriptor/loc_frame.py             |  4 +-
 deepmd/tf/descriptor/se.py                    |  4 +-
 deepmd/tf/descriptor/se_a.py                  |  8 ++--
 deepmd/tf/descriptor/se_a_ebd_v2.py           |  2 +-
 deepmd/tf/descriptor/se_a_ef.py               |  2 +-
 deepmd/tf/descriptor/se_a_mask.py             |  4 +-
 deepmd/tf/descriptor/se_atten.py              | 10 ++--
 deepmd/tf/descriptor/se_r.py                  |  4 +-
 deepmd/tf/descriptor/se_t.py                  |  4 +-
 deepmd/tf/entrypoints/freeze.py               |  6 +--
 deepmd/tf/entrypoints/ipi.py                  |  2 +-
 deepmd/tf/entrypoints/main.py                 |  2 +-
 deepmd/tf/entrypoints/train.py                |  2 +-
 deepmd/tf/entrypoints/transfer.py             | 12 ++---
 deepmd/tf/env.py                              |  8 ++--
 deepmd/tf/fit/dipole.py                       |  8 ++--
 deepmd/tf/fit/dos.py                          |  8 ++--
 deepmd/tf/fit/ener.py                         | 12 ++---
 deepmd/tf/fit/polar.py                        | 14 +++---
 deepmd/tf/infer/deep_dipole.py                |  2 +-
 deepmd/tf/infer/deep_eval.py                  | 18 ++++----
 deepmd/tf/infer/deep_tensor.py                |  4 +-
 deepmd/tf/loss/ener.py                        |  2 +-
 deepmd/tf/model/ener.py                       |  4 +-
 deepmd/tf/model/frozen.py                     |  2 +-
 deepmd/tf/model/linear.py                     |  2 +-
 deepmd/tf/model/model.py                      |  8 ++--
 deepmd/tf/model/pairtab.py                    |  4 +-
 deepmd/tf/model/pairwise_dprc.py              |  2 +-
 deepmd/tf/nvnmd/data/data.py                  |  4 +-
 deepmd/tf/nvnmd/entrypoints/mapt.py           |  6 +--
 deepmd/tf/nvnmd/utils/encode.py               |  2 +-
 deepmd/tf/nvnmd/utils/network.py              |  2 +-
 deepmd/tf/op/__init__.py                      |  2 +-
 deepmd/tf/op/_dotmul_flt_nvnmd_grad.py        |  2 +-
 deepmd/tf/op/_matmul_flt2fix_nvnmd.py         |  2 +-
 deepmd/tf/op/_matmul_flt_nvnmd_grad.py        |  2 +-
 deepmd/tf/train/run_options.py                |  8 ++--
 deepmd/tf/train/trainer.py                    |  6 +--
 deepmd/tf/utils/learning_rate.py              |  2 +-
 deepmd/tf/utils/neighbor_stat.py              |  2 +-
 deepmd/tf/utils/network.py                    |  2 +-
 deepmd/tf/utils/nlist.py                      |  2 +-
 deepmd/tf/utils/sess.py                       |  2 +-
 deepmd/tf/utils/tabulate.py                   |  2 +-
 deepmd/utils/argcheck.py                      | 46 +++++++++----------
 deepmd/utils/batch_size.py                    |  2 +-
 deepmd/utils/data.py                          |  6 +--
 deepmd/utils/data_system.py                   | 10 ++--
 deepmd/utils/econf_embd.py                    |  2 +-
 deepmd/utils/out_stat.py                      |  4 +-
 deepmd/utils/summary.py                       |  2 +-
 deepmd/utils/weight_avg.py                    |  2 +-
 doc/README                                    |  2 +-
 doc/development/coding-conventions.rst        |  2 +-
 doc/development/create-a-model-pt.md          |  2 +-
 doc/development/create-a-model-tf.md          |  2 +-
 doc/getting-started/quick_start.ipynb         |  8 ++--
 doc/install/install-from-source.md            |  4 +-
 doc/install/install-tf.2.12.md                |  2 +-
 doc/install/install-tf.2.8.md                 |  2 +-
 doc/model/dprc.md                             |  2 +-
 doc/model/train-energy-spin.md                |  2 +-
 doc/model/train-se-a-mask.md                  |  2 +-
 doc/nvnmd/nvnmd.md                            |  6 +--
 doc/third-party/lammps-command.md             |  4 +-
 doc/train/finetuning.md                       |  2 +-
 doc/troubleshooting/precision.md              |  4 +-
 pyproject.toml                                |  6 +--
 source/CMakeLists.txt                         |  4 +-
 source/api_c/include/deepmd.hpp               |  8 ++--
 source/api_c/src/c_api.cc                     |  2 +-
 source/api_cc/include/DeepTensor.h            | 18 ++++----
 source/api_cc/include/DeepTensorTF.h          | 14 +++---
 source/api_cc/include/commonTF.h              |  6 +--
 source/api_cc/src/DataModifierTF.cc           |  2 +-
 source/api_cc/src/DeepPotTF.cc                |  4 +-
 source/api_cc/src/DeepTensorTF.cc             |  4 +-
 source/api_cc/src/common.cc                   |  2 +-
 source/cmake/Findtensorflow.cmake             |  2 +-
 source/cmake/tf_version.cpp                   |  2 +-
 source/gmx/dp_gmx_patch                       |  2 +-
 source/gmx/src/gmx_plugin.cpp                 |  2 +-
 source/install/build_tf.py                    |  6 +--
 source/lib/include/ComputeDescriptor.h        |  2 +-
 source/lib/include/coord.h                    |  4 +-
 source/lib/include/neighbor_list.h            |  4 +-
 source/lib/include/prod_force.h               |  2 +-
 source/lib/src/gpu/tabulate.cu                |  6 +--
 source/lib/tests/test_fmt_nlist.cc            |  8 ++--
 source/lmp/pppm_dplr.cpp                      |  4 +-
 source/op/tf/descrpt.cc                       |  2 +-
 source/op/tf/descrpt_se_a_ef.cc               |  2 +-
 source/op/tf/descrpt_se_a_ef_para.cc          |  2 +-
 source/op/tf/descrpt_se_a_ef_vert.cc          |  2 +-
 source/op/tf/descrpt_se_a_mask.cc             |  4 +-
 source/op/tf/neighbor_stat.cc                 |  2 +-
 source/op/tf/pairwise.cc                      |  2 +-
 source/op/tf/prod_env_mat_multi_device.cc     |  6 +--
 .../op/tf/prod_env_mat_multi_device_nvnmd.cc  |  8 ++--
 .../common/dpmodel/test_pairtab_preprocess.py |  2 +-
 source/tests/common/test_argument_parser.py   |  4 +-
 source/tests/consistent/common.py             |  2 +-
 .../tests/consistent/test_type_embedding.py   |  2 +-
 source/tests/pt/model/test_descriptor_dpa1.py |  2 +-
 source/tests/pt/model/test_embedding_net.py   |  2 +-
 source/tests/pt/model/test_fitting_net.py     |  2 +-
 .../tests/pt/model/test_make_hessian_model.py |  2 +-
 source/tests/pt/model/test_model.py           |  4 +-
 source/tests/pt/model/test_nlist.py           |  2 +-
 source/tests/pt/model/test_unused_params.py   |  4 +-
 source/tests/pt/test_training.py              |  2 +-
 source/tests/tf/common.py                     |  2 +-
 source/tests/tf/test_model_pairtab.py         |  2 +-
 .../common/cases/atomic_model/utils.py        |  2 +-
 196 files changed, 448 insertions(+), 442 deletions(-)

diff --git a/backend/read_env.py b/backend/read_env.py
index edc3600115..3b217926d6 100644
--- a/backend/read_env.py
+++ b/backend/read_env.py
@@ -43,7 +43,7 @@ def get_argument_from_env() -> tuple[str, list, list, dict, str, str]:
     """
     cmake_args = []
     extra_scripts = {}
-    # get variant option from the environment varibles, available: cpu, cuda, rocm
+    # get variant option from the environment variables, available: cpu, cuda, rocm
     dp_variant = os.environ.get("DP_VARIANT", "cpu").lower()
     if dp_variant == "cpu" or dp_variant == "":
         cmake_minimum_required_version = "3.16"
diff --git a/deepmd/__init__.py b/deepmd/__init__.py
index 1ce4beb723..6f2b65ba63 100644
--- a/deepmd/__init__.py
+++ b/deepmd/__init__.py
@@ -17,7 +17,7 @@
 
 
 def DeepPotential(*args, **kwargs):
-    """Factory function that forwards to DeepEval (for compatbility
+    """Factory function that forwards to DeepEval (for compatibility
     and performance).
 
     Parameters
diff --git a/deepmd/backend/suffix.py b/deepmd/backend/suffix.py
index d694b43488..e77aecb5d9 100644
--- a/deepmd/backend/suffix.py
+++ b/deepmd/backend/suffix.py
@@ -23,7 +23,7 @@ def format_model_suffix(
     """Check and format the suffixes of a filename.
 
     When preferred_backend is not given, this method checks the suffix of the filename
-    is within the suffixes of the any backends (with the given feature) and doesn't do formating.
+    is within the suffixes of the any backends (with the given feature) and doesn't do formatting.
     When preferred_backend is given, strict_prefer must be given.
     If strict_prefer is True and the suffix is not within the suffixes of the preferred backend,
     or strict_prefer is False and the suffix is not within the suffixes of the any backend with the given feature,
diff --git a/deepmd/calculator.py b/deepmd/calculator.py
index 032fa2bcfa..6f863ab09b 100644
--- a/deepmd/calculator.py
+++ b/deepmd/calculator.py
@@ -32,7 +32,7 @@
 class DP(Calculator):
     """Implementation of ASE deepmd calculator.
 
-    Implemented propertie are `energy`, `forces` and `stress`
+    Implemented properties are `energy`, `forces` and `stress`
 
     Parameters
     ----------
diff --git a/deepmd/common.py b/deepmd/common.py
index fdfeef0e6d..185722f4a8 100644
--- a/deepmd/common.py
+++ b/deepmd/common.py
@@ -77,7 +77,7 @@ def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarr
     Parameters
     ----------
     atom_types : np.ndarray
-        array specifing type for each atoms as integer
+        array specifying type for each atoms as integer
     select_types : np.ndarray
         types of atoms you want to find indices for
 
@@ -126,7 +126,7 @@ def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
 def j_deprecated(
     jdata: dict[str, "_DICT_VAL"], key: str, deprecated_key: list[str] = []
 ) -> "_DICT_VAL":
-    """Assert that supplied dictionary conaines specified key.
+    """Assert that supplied dictionary contains specified key.
 
     Parameters
     ----------
@@ -218,7 +218,7 @@ def get_np_precision(precision: "_PRECISION") -> np.dtype:
     Returns
     -------
     np.dtype
-        numpy presicion constant
+        numpy precision constant
 
     Raises
     ------
diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py
index b615c81d1f..4e7620bdda 100644
--- a/deepmd/dpmodel/atomic_model/base_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/base_atomic_model.py
@@ -158,7 +158,7 @@ def forward_common_atomic(
         Parameters
         ----------
         extended_coord
-            extended coodinates, shape: nf x (nall x 3)
+            extended coordinates, shape: nf x (nall x 3)
         extended_atype
             extended atom typs, shape: nf x nall
             for a type < 0 indicating the atomic is virtual.
diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
index fe049021fe..a621ece27e 100644
--- a/deepmd/dpmodel/atomic_model/dp_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
@@ -100,7 +100,7 @@ def forward_atomic(
         Parameters
         ----------
         extended_coord
-            coodinates in extended region
+            coordinates in extended region
         extended_atype
             atomic type in extended region
         nlist
@@ -169,7 +169,7 @@ def serialize(self) -> dict:
         )
         return dd
 
-    # for subclass overriden
+    # for subclass overridden
     base_descriptor_cls = BaseDescriptor
     """The base descriptor class."""
     base_fitting_cls = BaseFitting
diff --git a/deepmd/dpmodel/atomic_model/linear_atomic_model.py b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
index 880c92f504..5d86472674 100644
--- a/deepmd/dpmodel/atomic_model/linear_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
@@ -162,7 +162,7 @@ def forward_atomic(
         Parameters
         ----------
         extended_coord
-            coodinates in extended region, (nframes, nall * 3)
+            coordinates in extended region, (nframes, nall * 3)
         extended_atype
             atomic type in extended region, (nframes, nall)
         nlist
@@ -341,7 +341,7 @@ class DPZBLLinearEnergyAtomicModel(LinearEnergyAtomicModel):
         Mapping atom type to the name (str) of the type.
         For example `type_map[1]` gives the name of the type 1.
     smin_alpha
-        The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor.
+        The short-range tabulated interaction will be switched according to the distance of the nearest neighbor.
         This distance is calculated by softmin.
     """
 
diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
index 6c0fc88e2c..99a92c23a4 100644
--- a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
@@ -152,7 +152,7 @@ def make_atom_mask(
             self,
             atype: t_tensor,
         ) -> t_tensor:
-            """The atoms with type < 0 are treated as virutal atoms,
+            """The atoms with type < 0 are treated as virtual atoms,
             which serves as place-holders for multi-frame calculations
             with different number of atoms in different frames.
 
@@ -164,7 +164,7 @@ def make_atom_mask(
             Returns
             -------
             mask
-                True for real atoms and False for virutal atoms.
+                True for real atoms and False for virtual atoms.
 
             """
             # supposed to be supported by all backends
diff --git a/deepmd/dpmodel/common.py b/deepmd/dpmodel/common.py
index 5c75229e49..f834754195 100644
--- a/deepmd/dpmodel/common.py
+++ b/deepmd/dpmodel/common.py
@@ -30,7 +30,7 @@
     "int64": np.int64,
     "bool": bool,
     "default": GLOBAL_NP_FLOAT_PRECISION,
-    # NumPy doesn't have bfloat16 (and does't plan to add)
+    # NumPy doesn't have bfloat16 (and doesn't plan to add)
     # ml_dtypes is a solution, but it seems not supporting np.save/np.load
     # hdf5 hasn't supported bfloat16 as well (see https://forum.hdfgroup.org/t/11975)
     "bfloat16": ml_dtypes.bfloat16,
diff --git a/deepmd/dpmodel/descriptor/descriptor.py b/deepmd/dpmodel/descriptor/descriptor.py
index 6d0644f856..746c02eb68 100644
--- a/deepmd/dpmodel/descriptor/descriptor.py
+++ b/deepmd/dpmodel/descriptor/descriptor.py
@@ -110,7 +110,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         raise NotImplementedError
 
diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
index add9cb9f71..2f2b12e03c 100644
--- a/deepmd/dpmodel/descriptor/dpa1.py
+++ b/deepmd/dpmodel/descriptor/dpa1.py
@@ -358,11 +358,11 @@ def get_dim_emb(self) -> int:
         return self.se_atten.dim_emb
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
@@ -385,7 +385,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         raise NotImplementedError
 
@@ -459,7 +459,7 @@ def call(
         nlist
             The neighbor list. shape: nf x nloc x nnei
         mapping
-            The index mapping from extended to lcoal region. not used by this descriptor.
+            The index mapping from extended to local region. not used by this descriptor.
 
         Returns
         -------
@@ -602,7 +602,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
@@ -793,11 +793,11 @@ def __getitem__(self, key):
             raise KeyError(key)
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py
index 285dc724a7..1dbb14961e 100644
--- a/deepmd/dpmodel/descriptor/dpa2.py
+++ b/deepmd/dpmodel/descriptor/dpa2.py
@@ -624,11 +624,11 @@ def get_dim_emb(self) -> int:
         return self.repformers.dim_emb
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
@@ -653,7 +653,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         raise NotImplementedError
 
@@ -1021,7 +1021,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/dpmodel/descriptor/hybrid.py b/deepmd/dpmodel/descriptor/hybrid.py
index 3aa8882db1..4eb14f29cf 100644
--- a/deepmd/dpmodel/descriptor/hybrid.py
+++ b/deepmd/dpmodel/descriptor/hybrid.py
@@ -63,7 +63,7 @@ def __init__(
         for ii in range(1, self.numb_descrpt):
             assert (
                 self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes()
-            ), f"number of atom types in {ii}th descrptor {self.descrpt_list[0].__class__.__name__} does not match others"
+            ), f"number of atom types in {ii}th descriptor {self.descrpt_list[0].__class__.__name__} does not match others"
         # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type
         hybrid_sel = self.get_sel()
         self.nlist_cut_idx: list[np.ndarray] = []
@@ -161,7 +161,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         raise NotImplementedError
 
@@ -284,7 +284,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
index a9b434d5f5..b9c1e93387 100644
--- a/deepmd/dpmodel/descriptor/make_base_descriptor.py
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -116,7 +116,7 @@ def share_params(self, base_class, shared_level, resume=False):
             """
             Share the parameters of self to the base_class with shared_level during multitask training.
             If not start from checkpoint (resume is False),
-            some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+            some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
             """
             pass
 
@@ -194,7 +194,7 @@ def update_sel(
             Parameters
             ----------
             train_data : DeepmdDataSystem
-                data used to do neighbor statictics
+                data used to do neighbor statistics
             type_map : list[str], optional
                 The name of each type of atoms
             local_jdata : dict
diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py
index ec8be21a53..ef79ecdd28 100644
--- a/deepmd/dpmodel/descriptor/repformers.py
+++ b/deepmd/dpmodel/descriptor/repformers.py
@@ -307,11 +307,11 @@ def __getitem__(self, key):
             raise KeyError(key)
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
@@ -1480,7 +1480,7 @@ def call(
         """
         Parameters
         ----------
-        g1_ext : nf x nall x ng1         extended single-atom chanel
+        g1_ext : nf x nall x ng1         extended single-atom channel
         g2 : nf x nloc x nnei x ng2  pair-atom channel, invariant
         h2 : nf x nloc x nnei x 3    pair-atom channel, equivariant
         nlist : nf x nloc x nnei        neighbor list (padded neis are set to 0)
@@ -1489,7 +1489,7 @@ def call(
 
         Returns
         -------
-        g1:     nf x nloc x ng1         updated single-atom chanel
+        g1:     nf x nloc x ng1         updated single-atom channel
         g2:     nf x nloc x nnei x ng2  updated pair-atom channel, invariant
         h2:     nf x nloc x nnei x 3    updated pair-atom channel, equivariant
         """
diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
index d29ce8862e..feebe57af7 100644
--- a/deepmd/dpmodel/descriptor/se_e2_a.py
+++ b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -281,7 +281,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         raise NotImplementedError
 
@@ -359,7 +359,7 @@ def call(
         nlist
             The neighbor list. shape: nf x nloc x nnei
         mapping
-            The index mapping from extended to lcoal region. not used by this descriptor.
+            The index mapping from extended to local region. not used by this descriptor.
 
         Returns
         -------
@@ -486,7 +486,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
@@ -525,7 +525,7 @@ def call(
         nlist
             The neighbor list. shape: nf x nloc x nnei
         mapping
-            The index mapping from extended to lcoal region. not used by this descriptor.
+            The index mapping from extended to local region. not used by this descriptor.
 
         Returns
         -------
diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py
index 6d0ddc5621..0f646e143c 100644
--- a/deepmd/dpmodel/descriptor/se_r.py
+++ b/deepmd/dpmodel/descriptor/se_r.py
@@ -48,7 +48,7 @@
 @BaseDescriptor.register("se_e2_r")
 @BaseDescriptor.register("se_r")
 class DescrptSeR(NativeOP, BaseDescriptor):
-    r"""DeepPot-SE_R constructed from only the radial imformation of atomic configurations.
+    r"""DeepPot-SE_R constructed from only the radial information of atomic configurations.
 
 
     Parameters
@@ -237,7 +237,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         raise NotImplementedError
 
@@ -308,7 +308,7 @@ def call(
         nlist
             The neighbor list. shape: nf x nloc x nnei
         mapping
-            The index mapping from extended to lcoal region. not used by this descriptor.
+            The index mapping from extended to local region. not used by this descriptor.
 
         Returns
         -------
@@ -414,7 +414,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/dpmodel/descriptor/se_t.py b/deepmd/dpmodel/descriptor/se_t.py
index f2ea751c50..4dc4c965fb 100644
--- a/deepmd/dpmodel/descriptor/se_t.py
+++ b/deepmd/dpmodel/descriptor/se_t.py
@@ -225,7 +225,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         raise NotImplementedError
 
@@ -279,7 +279,7 @@ def call(
         nlist
             The neighbor list. shape: nf x nloc x nnei
         mapping
-            The index mapping from extended to lcoal region. not used by this descriptor.
+            The index mapping from extended to local region. not used by this descriptor.
 
         Returns
         -------
@@ -405,7 +405,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/dpmodel/descriptor/se_t_tebd.py b/deepmd/dpmodel/descriptor/se_t_tebd.py
index 147a335926..ca89c23968 100644
--- a/deepmd/dpmodel/descriptor/se_t_tebd.py
+++ b/deepmd/dpmodel/descriptor/se_t_tebd.py
@@ -199,11 +199,11 @@ def get_dim_emb(self) -> int:
         return self.se_ttebd.dim_emb
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
@@ -226,7 +226,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         raise NotImplementedError
 
@@ -300,7 +300,7 @@ def call(
         nlist
             The neighbor list. shape: nf x nloc x nnei
         mapping
-            The index mapping from extended to lcoal region. not used by this descriptor.
+            The index mapping from extended to local region. not used by this descriptor.
 
         Returns
         -------
@@ -418,7 +418,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
@@ -576,11 +576,11 @@ def __getitem__(self, key):
             raise KeyError(key)
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
index 25d15b2e75..e55f57c774 100644
--- a/deepmd/dpmodel/fitting/general_fitting.py
+++ b/deepmd/dpmodel/fitting/general_fitting.py
@@ -56,7 +56,7 @@ class GeneralFitting(NativeOP, BaseFitting):
     neuron
             Number of neurons :math:`N` in each hidden layer of the fitting net
     bias_atom_e
-            Average enery per atom for each element.
+            Average energy per atom for each element.
     resnet_dt
             Time-step `dt` in the resnet construction:
             :math:`y = x + dt * \phi (Wx + b)`
@@ -88,9 +88,9 @@ class GeneralFitting(NativeOP, BaseFitting):
     exclude_types: list[int]
             Atomic contributions of the excluded atom types are set zero.
     remove_vaccum_contribution: list[bool], optional
-        Remove vaccum contribution before the bias is added. The list assigned each
+        Remove vacuum contribution before the bias is added. The list assigned each
         type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same
-        length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list.
+        length as `ntypes` signaling if or not removing the vacuum contribution for the atom types in the list.
     type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
     seed: Optional[Union[int, list[int]]]
@@ -375,10 +375,10 @@ def _call_common(
             )
         xx = descriptor
         if self.remove_vaccum_contribution is not None:
-            # TODO: comput the input for vaccum when setting remove_vaccum_contribution
-            # Idealy, the input for vaccum should be computed;
+            # TODO: comput the input for vacuum when setting remove_vaccum_contribution
+            # Ideally, the input for vacuum should be computed;
             # we consider it as always zero for convenience.
-            # Needs a compute_input_stats for vaccum passed from the
+            # Needs a compute_input_stats for vacuum passed from the
             # descriptor.
             xx_zeros = xp.zeros_like(xx)
         else:
@@ -424,7 +424,7 @@ def _call_common(
                     axis=-1,
                 )
 
-        # calcualte the prediction
+        # calculate the prediction
         if not self.mixed_types:
             outs = xp.zeros(
                 [nf, nloc, net_dim_out], dtype=get_xp_precision(xp, self.precision)
diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py
index 2a251834fe..3f8607109b 100644
--- a/deepmd/dpmodel/fitting/invar_fitting.py
+++ b/deepmd/dpmodel/fitting/invar_fitting.py
@@ -28,7 +28,7 @@
 @GeneralFitting.register("invar")
 @fitting_check_output
 class InvarFitting(GeneralFitting):
-    r"""Fitting the energy (or a rotationally invariant porperty of `dim_out`) of the system. The force and the virial can also be trained.
+    r"""Fitting the energy (or a rotationally invariant property of `dim_out`) of the system. The force and the virial can also be trained.
 
     Lets take the energy fitting task as an example.
     The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`:
@@ -90,7 +90,7 @@ class InvarFitting(GeneralFitting):
             Suppose that we have :math:`N_l` hidden layers in the fitting net,
             this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
     atom_ener
-            Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+            Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descriptor should be set.
     activation_function
             The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN|
     precision
diff --git a/deepmd/dpmodel/fitting/property_fitting.py b/deepmd/dpmodel/fitting/property_fitting.py
index 1a8fe44aae..a1b6fe7638 100644
--- a/deepmd/dpmodel/fitting/property_fitting.py
+++ b/deepmd/dpmodel/fitting/property_fitting.py
@@ -20,7 +20,7 @@
 
 @InvarFitting.register("property")
 class PropertyFittingNet(InvarFitting):
-    r"""Fitting the rotationally invariant porperties of `task_dim` of the system.
+    r"""Fitting the rotationally invariant properties of `task_dim` of the system.
 
     Parameters
     ----------
diff --git a/deepmd/dpmodel/infer/deep_eval.py b/deepmd/dpmodel/infer/deep_eval.py
index 2b1e74c8de..c1f3e4630b 100644
--- a/deepmd/dpmodel/infer/deep_eval.py
+++ b/deepmd/dpmodel/infer/deep_eval.py
@@ -52,7 +52,7 @@
 
 
 class DeepEval(DeepEvalBackend):
-    """NumPy backend implementaion of DeepEval.
+    """NumPy backend implementation of DeepEval.
 
     Parameters
     ----------
@@ -374,5 +374,5 @@ def _get_output_shape(self, odef, nframes, natoms):
             raise RuntimeError("unknown category")
 
     def get_model_def_script(self) -> dict:
-        """Get model defination script."""
+        """Get model definition script."""
         return json.loads(self.model.get_model_def_script())
diff --git a/deepmd/dpmodel/model/base_model.py b/deepmd/dpmodel/model/base_model.py
index 3f71003bad..777697b4b7 100644
--- a/deepmd/dpmodel/model/base_model.py
+++ b/deepmd/dpmodel/model/base_model.py
@@ -171,7 +171,7 @@ def update_sel(
             Parameters
             ----------
             train_data : DeepmdDataSystem
-                data used to do neighbor statictics
+                data used to do neighbor statistics
             type_map : list[str], optional
                 The name of each type of atoms
             local_jdata : dict
diff --git a/deepmd/dpmodel/model/dp_model.py b/deepmd/dpmodel/model/dp_model.py
index eda0414398..769bba0b20 100644
--- a/deepmd/dpmodel/model/dp_model.py
+++ b/deepmd/dpmodel/model/dp_model.py
@@ -27,7 +27,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py
index dc90f10da7..afe2eaffb6 100644
--- a/deepmd/dpmodel/model/make_model.py
+++ b/deepmd/dpmodel/model/make_model.py
@@ -190,7 +190,7 @@ def call_lower(
             Parameters
             ----------
             extended_coord
-                coodinates in extended region. nf x (nall x 3).
+                coordinates in extended region. nf x (nall x 3).
             extended_atype
                 atomic type in extended region. nf x nall.
             nlist
@@ -319,7 +319,7 @@ def format_nlist(
             the `nlist` is pad with -1.
 
             3. If the number of neighbors in the `nlist` is larger than sum(self.sel),
-            the nearest sum(sel) neighbors will be preseved.
+            the nearest sum(sel) neighbors will be preserved.
 
             Known limitations:
 
@@ -329,7 +329,7 @@ def format_nlist(
             Parameters
             ----------
             extended_coord
-                coodinates in extended region. nf x nall x 3
+                coordinates in extended region. nf x nall x 3
             extended_atype
                 atomic type in extended region. nf x nall
             nlist
@@ -340,7 +340,7 @@ def format_nlist(
             Returns
             -------
             formated_nlist
-                the formated nlist.
+                the formatted nlist.
 
             """
             n_nf, n_nloc, n_nnei = nlist.shape
diff --git a/deepmd/dpmodel/model/transform_output.py b/deepmd/dpmodel/model/transform_output.py
index 928c33f3bd..107455a6d5 100644
--- a/deepmd/dpmodel/model/transform_output.py
+++ b/deepmd/dpmodel/model/transform_output.py
@@ -32,7 +32,7 @@ def fit_output_to_model_output(
         atom_axis = -(len(shap) + 1)
         if vdef.reducible:
             kk_redu = get_reduce_name(kk)
-            # cast to energy prec brefore reduction
+            # cast to energy prec before reduction
             model_ret[kk_redu] = xp.sum(
                 vv.astype(GLOBAL_ENER_FLOAT_PRECISION), axis=atom_axis
             )
diff --git a/deepmd/dpmodel/output_def.py b/deepmd/dpmodel/output_def.py
index 2ceb4f412a..bfee338d64 100644
--- a/deepmd/dpmodel/output_def.py
+++ b/deepmd/dpmodel/output_def.py
@@ -166,7 +166,7 @@ class OutputVariableDef:
     r_differentiable
           If the variable is differentiated with respect to coordinates
           of atoms. Only reducible variable are differentiable.
-          Negative derivative w.r.t. coordinates will be calcualted. (e.g. force)
+          Negative derivative w.r.t. coordinates will be calculated. (e.g. force)
     c_differentiable
           If the variable is differentiated with respect to the
           cell tensor (pbc case). Only reducible variable
@@ -178,7 +178,7 @@ class OutputVariableDef:
     category : int
           The category of the output variable.
     r_hessian : bool
-          If hessian is requred
+          If hessian is required
     magnetic : bool
           If the derivatives of variable have magnetic parts.
     intensive : bool
diff --git a/deepmd/dpmodel/utils/neighbor_stat.py b/deepmd/dpmodel/utils/neighbor_stat.py
index 744a4476cd..43ca2cadd1 100644
--- a/deepmd/dpmodel/utils/neighbor_stat.py
+++ b/deepmd/dpmodel/utils/neighbor_stat.py
@@ -21,7 +21,7 @@
 
 
 class NeighborStatOP(NativeOP):
-    """Class for getting neighbor statics data information.
+    """Class for getting neighbor statistics data information.
 
     Parameters
     ----------
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
index 339035ff4e..5140a88c97 100644
--- a/deepmd/dpmodel/utils/network.py
+++ b/deepmd/dpmodel/utils/network.py
@@ -600,7 +600,7 @@ class EN(T_Network):
         resnet_dt
             Use time step at the resnet architecture.
         precision
-            Floating point precision for the model paramters.
+            Floating point precision for the model parameters.
         seed : int, optional
             Random seed.
         bias : bool, Optional
@@ -704,7 +704,7 @@ class FN(T_EmbeddingNet):
         resnet_dt
             Use time step at the resnet architecture.
         precision
-            Floating point precision for the model paramters.
+            Floating point precision for the model parameters.
         bias_out
             The last linear layer has bias.
         seed : int, optional
@@ -794,7 +794,7 @@ def deserialize(cls, data: dict) -> "FittingNet":
 class NetworkCollection:
     """A collection of networks for multiple elements.
 
-    The number of dimesions for types might be 0, 1, or 2.
+    The number of dimensions for types might be 0, 1, or 2.
     - 0: embedding or fitting with type embedding, in ()
     - 1: embedding with type_one_side, or fitting, in (type_i)
     - 2: embedding without type_one_side, in (type_i, type_j)
diff --git a/deepmd/dpmodel/utils/nlist.py b/deepmd/dpmodel/utils/nlist.py
index 3ef17fc6b9..b827032588 100644
--- a/deepmd/dpmodel/utils/nlist.py
+++ b/deepmd/dpmodel/utils/nlist.py
@@ -48,7 +48,7 @@ def extend_input_and_build_neighbor_list(
     return extended_coord, extended_atype, mapping, nlist
 
 
-## translated from torch implemantation by chatgpt
+## translated from torch implementation by chatgpt
 def build_neighbor_list(
     coord: np.ndarray,
     atype: np.ndarray,
@@ -57,7 +57,7 @@ def build_neighbor_list(
     sel: Union[int, list[int]],
     distinguish_types: bool = True,
 ) -> np.ndarray:
-    """Build neightbor list for a single frame. keeps nsel neighbors.
+    """Build neighbor list for a single frame. keeps nsel neighbors.
 
     Parameters
     ----------
@@ -185,7 +185,7 @@ def get_multiple_nlist_key(rcut: float, nsel: int) -> str:
     return str(rcut) + "_" + str(nsel)
 
 
-## translated from torch implemantation by chatgpt
+## translated from torch implementation by chatgpt
 def build_multiple_neighbor_list(
     coord: np.ndarray,
     nlist: np.ndarray,
@@ -243,7 +243,7 @@ def build_multiple_neighbor_list(
     return ret
 
 
-## translated from torch implemantation by chatgpt
+## translated from torch implementation by chatgpt
 def extend_coord_with_ghosts(
     coord: np.ndarray,
     atype: np.ndarray,
@@ -272,7 +272,7 @@ def extend_coord_with_ghosts(
     extended_atype: np.ndarray
         extended atom type of shape [-1, nall].
     index_mapping: np.ndarray
-        maping extended index to the local index
+        mapping extended index to the local index
 
     """
     xp = array_api_compat.array_namespace(coord, atype)
diff --git a/deepmd/dpmodel/utils/region.py b/deepmd/dpmodel/utils/region.py
index 8102020827..8b24cbf948 100644
--- a/deepmd/dpmodel/utils/region.py
+++ b/deepmd/dpmodel/utils/region.py
@@ -59,7 +59,7 @@ def normalize_coord(
     Parameters
     ----------
     coord : np.ndarray
-        orignal coordinates of shape [*, na, 3].
+        original coordinates of shape [*, na, 3].
     cell : np.ndarray
         simulation cell shape [*, 3, 3].
 
diff --git a/deepmd/driver.py b/deepmd/driver.py
index 998edcbc18..30916259aa 100644
--- a/deepmd/driver.py
+++ b/deepmd/driver.py
@@ -3,7 +3,7 @@
 
 # Derived from https://github.com/deepmodeling/dpdata/blob/18a0ed5ebced8b1f6887038883d46f31ae9990a4/dpdata/plugins/deepmd.py#L361-L443
 # under LGPL-3.0-or-later license.
-# The original deepmd driver maintained in the dpdata package will be overriden.
+# The original deepmd driver maintained in the dpdata package will be overridden.
 # The class in the dpdata package needs to handle different situations for v1 and v2 interface,
 # which is too complex with the development of deepmd-kit.
 # So, it will be a good idea to ship it with DeePMD-kit itself.
diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index ad445fdea1..d9ccf392f5 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -248,7 +248,7 @@ def save_txt_file(
     header : str, optional
         header string to use in file, by default ""
     append : bool, optional
-        if true file will be appended insted of overwriting, by default False
+        if true file will be appended instead of overwriting, by default False
     """
     flags = "ab" if append else "w"
     with fname.open(flags) as fp:
@@ -1015,7 +1015,7 @@ def test_polar(
     detail_file : Optional[str]
         file where test details will be output
     atomic : bool
-        wheter to use glovbal version of polar potential
+        whether to use glovbal version of polar potential
 
     Returns
     -------
diff --git a/deepmd/env.py b/deepmd/env.py
index 605dfeed99..50e52fd719 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -102,7 +102,7 @@ def set_default_nthreads():
 
 
 def get_default_nthreads() -> tuple[int, int]:
-    """Get paralellism settings.
+    """Get parallelism settings.
 
     The method will first read the environment variables with the prefix `DP_`.
     If not found, it will read the environment variables with the prefix `TF_`
diff --git a/deepmd/infer/__init__.py b/deepmd/infer/__init__.py
index 5678494023..8a8afb165a 100644
--- a/deepmd/infer/__init__.py
+++ b/deepmd/infer/__init__.py
@@ -18,7 +18,7 @@
 
 
 def DeepPotential(*args, **kwargs) -> "DeepEval":
-    """Factory function that forwards to DeepEval (for compatbility).
+    """Factory function that forwards to DeepEval (for compatibility).
 
     Parameters
     ----------
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index 4d0134c37c..e08dc88674 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -281,7 +281,7 @@ def get_ntypes_spin(self) -> int:
         """Get the number of spin atom types of this model. Only used in old implement."""
 
     def get_model_def_script(self) -> dict:
-        """Get model defination script."""
+        """Get model definition script."""
         raise NotImplementedError("Not implemented in this backend.")
 
 
@@ -548,5 +548,5 @@ def get_ntypes_spin(self) -> int:
         return self.deep_eval.get_ntypes_spin()
 
     def get_model_def_script(self) -> dict:
-        """Get model defination script."""
+        """Get model definition script."""
         return self.deep_eval.get_model_def_script()
diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
index 68100ba739..304aabdadc 100644
--- a/deepmd/infer/model_devi.py
+++ b/deepmd/infer/model_devi.py
@@ -378,7 +378,7 @@ def make_model_devi(
     frequency : int
         The number of steps that elapse between writing coordinates
         in a trajectory by a MD engine (such as Gromacs / LAMMPS).
-        This paramter is used to determine the index in the output file.
+        This parameter is used to determine the index in the output file.
     real_error : bool, default: False
         If True, calculate the RMS real error instead of model deviation.
     atomic : bool, default: False
diff --git a/deepmd/loggers/loggers.py b/deepmd/loggers/loggers.py
index 33b9497507..f42c032cfa 100644
--- a/deepmd/loggers/loggers.py
+++ b/deepmd/loggers/loggers.py
@@ -23,7 +23,7 @@
 
 __all__ = ["set_log_handles"]
 
-# logger formater
+# logger formatter
 FFORMATTER = logging.Formatter(
     "[%(asctime)s] %(app_name)s %(levelname)-7s %(name)-45s %(message)s"
 )
@@ -61,7 +61,7 @@ def filter(self, record):
 
 
 class _MPIMasterFilter(logging.Filter):
-    """Filter that lets through only messages emited from rank==0."""
+    """Filter that lets through only messages emitted from rank==0."""
 
     def __init__(self, rank: int) -> None:
         super().__init__(name="MPI_master_log")
@@ -138,7 +138,7 @@ def _open(self):
         return _MPIFileStream(self.baseFilename, self.MPI, self.mode)
 
     def setStream(self, stream):
-        """Stream canot be reasigned in MPI mode."""
+        """Stream cannot be reasigned in MPI mode."""
         raise NotImplementedError("Unable to do for MPI file handler!")
 
 
@@ -254,7 +254,7 @@ def set_log_handles(
             fh.setFormatter(FFORMATTER_MPI)
         elif mpi_log == "workers":
             rank = MPI.COMM_WORLD.Get_rank()
-            # if file has suffix than inser rank number before suffix
+            # if file has suffix than insert rank number before suffix
             # e.g deepmd.log -> deepmd_<rank>.log
             # if no suffix is present, insert rank as suffix
             # e.g. deepmdlog -> deepmdlog.<rank>
diff --git a/deepmd/pt/cxx_op.py b/deepmd/pt/cxx_op.py
index d46f20a0bc..b0653522b2 100644
--- a/deepmd/pt/cxx_op.py
+++ b/deepmd/pt/cxx_op.py
@@ -76,7 +76,7 @@ def load_library(module_name: str) -> bool:
                     "instead."
                 ) from e
             error_message = (
-                "This deepmd-kit package is inconsitent with PyTorch "
+                "This deepmd-kit package is inconsistent with PyTorch "
                 f"Runtime, thus an error is raised when loading {module_name}. "
                 "You need to rebuild deepmd-kit against this PyTorch "
                 "runtime."
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index 8f0b686e7b..934cafdb47 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -70,7 +70,7 @@
 
 
 class DeepEval(DeepEvalBackend):
-    """PyTorch backend implementaion of DeepEval.
+    """PyTorch backend implementation of DeepEval.
 
     Parameters
     ----------
@@ -601,7 +601,7 @@ def eval_typeebd(self) -> np.ndarray:
         return to_numpy_array(typeebd)
 
     def get_model_def_script(self) -> str:
-        """Get model defination script."""
+        """Get model definition script."""
         return self.model_def_script
 
     def eval_descriptor(
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
index bd3c2b49ab..e26549581e 100644
--- a/deepmd/pt/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -68,7 +68,7 @@ class BaseAtomicModel(torch.nn.Module, BaseAtomicModel_):
         Specifying atomic energy contribution in vacuum. Given by key:value pairs.
         The value is a list specifying the bias. the elements can be None or np.ndarray of output shape.
         For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
-        The `set_davg_zero` key in the descrptor should be set.
+        The `set_davg_zero` key in the descriptor should be set.
 
     """
 
@@ -150,7 +150,7 @@ def make_atom_mask(
         self,
         atype: torch.Tensor,
     ) -> torch.Tensor:
-        """The atoms with type < 0 are treated as virutal atoms,
+        """The atoms with type < 0 are treated as virtual atoms,
         which serves as place-holders for multi-frame calculations
         with different number of atoms in different frames.
 
@@ -162,7 +162,7 @@ def make_atom_mask(
         Returns
         -------
         mask
-            True for real atoms and False for virutal atoms.
+            True for real atoms and False for virtual atoms.
 
         """
         # supposed to be supported by all backends
@@ -202,7 +202,7 @@ def forward_common_atomic(
         Parameters
         ----------
         extended_coord
-            extended coodinates, shape: nf x (nall x 3)
+            extended coordinates, shape: nf x (nall x 3)
         extended_atype
             extended atom typs, shape: nf x nall
             for a type < 0 indicating the atomic is virtual.
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index edb1253234..48c8d0d859 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -175,7 +175,7 @@ def forward_atomic(
         Parameters
         ----------
         extended_coord
-            coodinates in extended region
+            coordinates in extended region
         extended_atype
             atomic type in extended region
         nlist
diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py
index 0aa5afc67f..570fcdcc43 100644
--- a/deepmd/pt/model/atomic_model/linear_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py
@@ -199,7 +199,7 @@ def forward_atomic(
         Parameters
         ----------
         extended_coord
-            coodinates in extended region, (nframes, nall * 3)
+            coordinates in extended region, (nframes, nall * 3)
         extended_atype
             atomic type in extended region, (nframes, nall)
         nlist
@@ -489,7 +489,7 @@ class DPZBLLinearEnergyAtomicModel(LinearEnergyAtomicModel):
         Mapping atom type to the name (str) of the type.
         For example `type_map[1]` gives the name of the type 1.
     smin_alpha
-        The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor.
+        The short-range tabulated interaction will be switched according to the distance of the nearest neighbor.
         This distance is calculated by softmin.
     """
 
diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
index 28a165d501..87e3027bc8 100644
--- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
@@ -59,7 +59,7 @@ class PairTabAtomicModel(BaseAtomicModel):
     rcond : float, optional
         The condition number for the regression of atomic energy.
     atom_ener
-        Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+        Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descriptor should be set.
 
     """
 
@@ -104,7 +104,7 @@ def __init__(
         )
 
         # self.model_type = "ener"
-        # self.model_version = MODEL_VERSION ## this shoud be in the parent class
+        # self.model_version = MODEL_VERSION ## this should be in the parent class
 
         if isinstance(sel, int):
             self.sel = sel
diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py
index 03173a7693..5d36606760 100644
--- a/deepmd/pt/model/descriptor/descriptor.py
+++ b/deepmd/pt/model/descriptor/descriptor.py
@@ -129,7 +129,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index 322fa3a12d..d3156f7c84 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -344,11 +344,11 @@ def get_dim_emb(self) -> int:
         return self.se_atten.dim_emb
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
@@ -371,7 +371,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
@@ -620,7 +620,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index 632efe5dbf..277aa4917f 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -343,11 +343,11 @@ def get_dim_emb(self) -> int:
         return self.repformers.dim_emb
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
@@ -373,7 +373,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
@@ -819,7 +819,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index c8730e3465..ba64f53ef7 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -70,7 +70,7 @@ def __init__(
         for ii in range(1, self.numb_descrpt):
             assert (
                 self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes()
-            ), f"number of atom types in {ii}th descrptor does not match others"
+            ), f"number of atom types in {ii}th descriptor does not match others"
         # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type
         self.nlist_cut_idx: list[torch.Tensor] = []
         if self.mixed_types() and not all(
@@ -168,7 +168,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
@@ -308,7 +308,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py
index 5270c94112..31132f365e 100644
--- a/deepmd/pt/model/descriptor/repformer_layer.py
+++ b/deepmd/pt/model/descriptor/repformer_layer.py
@@ -1105,7 +1105,7 @@ def forward(
         """
         Parameters
         ----------
-        g1_ext : nf x nall x ng1         extended single-atom chanel
+        g1_ext : nf x nall x ng1         extended single-atom channel
         g2 : nf x nloc x nnei x ng2  pair-atom channel, invariant
         h2 : nf x nloc x nnei x 3    pair-atom channel, equivariant
         nlist : nf x nloc x nnei        neighbor list (padded neis are set to 0)
@@ -1114,7 +1114,7 @@ def forward(
 
         Returns
         -------
-        g1:     nf x nloc x ng1         updated single-atom chanel
+        g1:     nf x nloc x ng1         updated single-atom channel
         g2:     nf x nloc x nnei x ng2  updated pair-atom channel, invariant
         h2:     nf x nloc x nnei x 3    updated pair-atom channel, equivariant
         """
diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
index 023a84b3ee..81d96d4372 100644
--- a/deepmd/pt/model/descriptor/repformers.py
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -60,7 +60,7 @@ def border_op(
             "See documentation for DPA-2 for details."
         )
 
-    # Note: this hack cannot actually save a model that can be runned using LAMMPS.
+    # Note: this hack cannot actually save a model that can be run using LAMMPS.
     torch.ops.deepmd.border_op = border_op
 
 
@@ -342,11 +342,11 @@ def __getitem__(self, key):
             raise KeyError(key)
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index 8f3c7605d5..56cb1f5bc6 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -164,7 +164,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
@@ -342,7 +342,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
@@ -479,11 +479,11 @@ def get_dim_in(self) -> int:
         return self.dim_in
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
index 8f418c28f9..aab72f7e98 100644
--- a/deepmd/pt/model/descriptor/se_atten.py
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -298,11 +298,11 @@ def __getitem__(self, key):
             raise KeyError(key)
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index 12677a3daf..0aa50c613f 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -163,11 +163,11 @@ def get_dim_in(self) -> int:
         return 0
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
@@ -190,7 +190,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
@@ -473,7 +473,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/pt/model/descriptor/se_t.py b/deepmd/pt/model/descriptor/se_t.py
index 666eba6baf..7b83bcbd69 100644
--- a/deepmd/pt/model/descriptor/se_t.py
+++ b/deepmd/pt/model/descriptor/se_t.py
@@ -198,7 +198,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
@@ -372,7 +372,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
@@ -529,11 +529,11 @@ def get_dim_in(self) -> int:
         return self.dim_in
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
diff --git a/deepmd/pt/model/descriptor/se_t_tebd.py b/deepmd/pt/model/descriptor/se_t_tebd.py
index 9ee9b4dc0b..82ccb06f32 100644
--- a/deepmd/pt/model/descriptor/se_t_tebd.py
+++ b/deepmd/pt/model/descriptor/se_t_tebd.py
@@ -215,11 +215,11 @@ def get_dim_emb(self) -> int:
         return self.se_ttebd.dim_emb
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
@@ -242,7 +242,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
@@ -470,7 +470,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
@@ -632,11 +632,11 @@ def __getitem__(self, key):
             raise KeyError(key)
 
     def mixed_types(self) -> bool:
-        """If true, the discriptor
+        """If true, the descriptor
         1. assumes total number of atoms aligned across frames;
         2. requires a neighbor list that does not distinguish different atomic types.
 
-        If false, the discriptor
+        If false, the descriptor
         1. assumes total number of atoms of each atom type aligned across frames;
         2. requires a neighbor list that distinguishes different atomic types.
 
diff --git a/deepmd/pt/model/model/dp_linear_model.py b/deepmd/pt/model/model/dp_linear_model.py
index ef2e84bd19..d19070fc5b 100644
--- a/deepmd/pt/model/model/dp_linear_model.py
+++ b/deepmd/pt/model/model/dp_linear_model.py
@@ -140,7 +140,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/pt/model/model/dp_model.py b/deepmd/pt/model/model/dp_model.py
index bd278ed787..e71c5e08de 100644
--- a/deepmd/pt/model/model/dp_model.py
+++ b/deepmd/pt/model/model/dp_model.py
@@ -28,7 +28,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/pt/model/model/dp_zbl_model.py b/deepmd/pt/model/model/dp_zbl_model.py
index 59147e1d4c..e1ef00f5fe 100644
--- a/deepmd/pt/model/model/dp_zbl_model.py
+++ b/deepmd/pt/model/model/dp_zbl_model.py
@@ -140,7 +140,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/pt/model/model/frozen.py b/deepmd/pt/model/model/frozen.py
index 431c035339..37149303d4 100644
--- a/deepmd/pt/model/model/frozen.py
+++ b/deepmd/pt/model/model/frozen.py
@@ -182,7 +182,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
index 46b7e51109..a9d5e26060 100644
--- a/deepmd/pt/model/model/make_model.py
+++ b/deepmd/pt/model/model/make_model.py
@@ -221,7 +221,7 @@ def forward_common_lower(
             Parameters
             ----------
             extended_coord
-                coodinates in extended region. nf x (nall x 3)
+                coordinates in extended region. nf x (nall x 3)
             extended_atype
                 atomic type in extended region. nf x nall
             nlist
@@ -362,7 +362,7 @@ def format_nlist(
             the `nlist` is pad with -1.
 
             3. If the number of neighbors in the `nlist` is larger than sum(self.sel),
-            the nearest sum(sel) neighbors will be preseved.
+            the nearest sum(sel) neighbors will be preserved.
 
             Known limitations:
 
@@ -372,7 +372,7 @@ def format_nlist(
             Parameters
             ----------
             extended_coord
-                coodinates in extended region. nf x nall x 3
+                coordinates in extended region. nf x nall x 3
             extended_atype
                 atomic type in extended region. nf x nall
             nlist
@@ -383,7 +383,7 @@ def format_nlist(
             Returns
             -------
             formated_nlist
-                the formated nlist.
+                the formatted nlist.
 
             """
             mixed_types = self.mixed_types()
diff --git a/deepmd/pt/model/model/spin_model.py b/deepmd/pt/model/model/spin_model.py
index a9f6e4d75a..bc1bc81a74 100644
--- a/deepmd/pt/model/model/spin_model.py
+++ b/deepmd/pt/model/model/spin_model.py
@@ -105,9 +105,9 @@ def process_spin_output(
         """
         Split the output both real and virtual atoms, and scale the latter.
         add_mag: whether to add magnetic tensor onto the real tensor.
-            Default: True. e.g. Ture for forces and False for atomic virials on real atoms.
+            Default: True. e.g. True for forces and False for atomic virials on real atoms.
         virtual_scale: whether to scale the magnetic tensor with virtual scale factor.
-            Default: True. e.g. Ture for forces and False for atomic virials on virtual atoms.
+            Default: True. e.g. True for forces and False for atomic virials on virtual atoms.
         """
         nframes, nloc_double = out_tensor.shape[:2]
         nloc = nloc_double // 2
@@ -138,9 +138,9 @@ def process_spin_output_lower(
         """
         Split the extended output of both real and virtual atoms with switch, and scale the latter.
         add_mag: whether to add magnetic tensor onto the real tensor.
-            Default: True. e.g. Ture for forces and False for atomic virials on real atoms.
+            Default: True. e.g. True for forces and False for atomic virials on real atoms.
         virtual_scale: whether to scale the magnetic tensor with virtual scale factor.
-            Default: True. e.g. Ture for forces and False for atomic virials on virtual atoms.
+            Default: True. e.g. True for forces and False for atomic virials on virtual atoms.
         """
         nframes, nall_double = extended_out_tensor.shape[:2]
         nall = nall_double // 2
diff --git a/deepmd/pt/model/network/init.py b/deepmd/pt/model/network/init.py
index 0bab6b66bd..fe3c034637 100644
--- a/deepmd/pt/model/network/init.py
+++ b/deepmd/pt/model/network/init.py
@@ -17,7 +17,7 @@
 # These no_grad_* functions are necessary as wrappers around the parts of these
 # functions that use `with torch.no_grad()`. The JIT doesn't support context
 # managers, so these need to be implemented as builtins. Using these wrappers
-# lets us keep those builtins small and re-usable.
+# lets us keep those builtins small and reusable.
 def _no_grad_uniform_(tensor, a, b, generator=None):
     with torch.no_grad():
         return tensor.uniform_(a, b, generator=generator)
diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py
index 12e1eabf22..88ea108ce7 100644
--- a/deepmd/pt/model/network/network.py
+++ b/deepmd/pt/model/network/network.py
@@ -300,7 +300,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
diff --git a/deepmd/pt/model/task/denoise.py b/deepmd/pt/model/task/denoise.py
index dd32042564..df65f1cd18 100644
--- a/deepmd/pt/model/task/denoise.py
+++ b/deepmd/pt/model/task/denoise.py
@@ -39,7 +39,7 @@ def __init__(
         - ntypes: Element count.
         - embedding_width: Embedding width per atom.
         - neuron: Number of neurons in each hidden layers of the fitting net.
-        - bias_atom_e: Average enery per atom for each element.
+        - bias_atom_e: Average energy per atom for each element.
         - resnet_dt: Using time-step in the ResNet construction.
         """
         super().__init__()
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index e0c5b0951e..ee8372c3ac 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -117,7 +117,7 @@ def __init__(
         - ntypes: Element count.
         - embedding_width: Embedding width per atom.
         - neuron: Number of neurons in each hidden layers of the fitting net.
-        - bias_atom_e: Average enery per atom for each element.
+        - bias_atom_e: Average energy per atom for each element.
         - resnet_dt: Using time-step in the ResNet construction.
         """
         super().__init__()
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 15837aca98..bae46c2adb 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -59,7 +59,7 @@ def share_params(self, base_class, shared_level, resume=False):
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         assert (
             self.__class__ == base_class.__class__
@@ -96,7 +96,7 @@ class GeneralFitting(Fitting):
     neuron : list[int]
         Number of neurons in each hidden layers of the fitting net.
     bias_atom_e : torch.Tensor, optional
-        Average enery per atom for each element.
+        Average energy per atom for each element.
     resnet_dt : bool
         Using time-step in the ResNet construction.
     numb_fparam : int
@@ -121,9 +121,9 @@ class GeneralFitting(Fitting):
         Now this only supports setting all the parameters in the fitting net at one state.
         When in list[bool], the trainable will be True only if all the boolean parameters are True.
     remove_vaccum_contribution: list[bool], optional
-        Remove vaccum contribution before the bias is added. The list assigned each
+        Remove vacuum contribution before the bias is added. The list assigned each
         type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same
-        length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list.
+        length as `ntypes` signaling if or not removing the vacuum contribution for the atom types in the list.
     type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
     use_aparam_as_mask: bool
@@ -407,9 +407,9 @@ def _forward_common(
         xx = descriptor
         if self.remove_vaccum_contribution is not None:
             # TODO: compute the input for vaccm when remove_vaccum_contribution is set
-            # Idealy, the input for vaccum should be computed;
+            # Ideally, the input for vacuum should be computed;
             # we consider it as always zero for convenience.
-            # Needs a compute_input_stats for vaccum passed from the
+            # Needs a compute_input_stats for vacuum passed from the
             # descriptor.
             xx_zeros = torch.zeros_like(xx)
         else:
diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py
index e76e1d2063..b9be26cfdc 100644
--- a/deepmd/pt/model/task/invar_fitting.py
+++ b/deepmd/pt/model/task/invar_fitting.py
@@ -50,7 +50,7 @@ class InvarFitting(GeneralFitting):
     neuron : list[int]
         Number of neurons in each hidden layers of the fitting net.
     bias_atom_e : torch.Tensor, optional
-        Average enery per atom for each element.
+        Average energy per atom for each element.
     resnet_dt : bool
         Using time-step in the ResNet construction.
     numb_fparam : int
@@ -74,7 +74,7 @@ class InvarFitting(GeneralFitting):
         Specifying atomic energy contribution in vacuum.
         The value is a list specifying the bias. the elements can be None or np.array of output shape.
         For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
-        The `set_davg_zero` key in the descrptor should be set.
+        The `set_davg_zero` key in the descriptor should be set.
     type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
     use_aparam_as_mask: bool
diff --git a/deepmd/pt/model/task/property.py b/deepmd/pt/model/task/property.py
index cc6a4e8745..4017f51468 100644
--- a/deepmd/pt/model/task/property.py
+++ b/deepmd/pt/model/task/property.py
@@ -35,7 +35,7 @@
 
 @Fitting.register("property")
 class PropertyFittingNet(InvarFitting):
-    """Fitting the rotationally invariant porperties of `task_dim` of the system.
+    """Fitting the rotationally invariant properties of `task_dim` of the system.
 
     Parameters
     ----------
diff --git a/deepmd/pt/model/task/type_predict.py b/deepmd/pt/model/task/type_predict.py
index c696590043..e8a5db62b5 100644
--- a/deepmd/pt/model/task/type_predict.py
+++ b/deepmd/pt/model/task/type_predict.py
@@ -19,7 +19,7 @@ def __init__(self, feature_dim, ntypes, activation_function="gelu", **kwargs):
 
         Args:
         - feature_dim: Input dm.
-        - ntypes: Numer of types to predict.
+        - ntypes: Number of types to predict.
         - activation_function: Activate function.
         """
         super().__init__()
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index 922ac296ea..17fb8477a5 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -63,7 +63,7 @@ def share_params(self, shared_links, resume=False):
         """
         Share the parameters of classes following rules defined in shared_links during multitask training.
         If not start from checkpoint (resume is False),
-        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
         """
         supported_types = ["descriptor", "fitting_net"]
         for shared_item in shared_links:
diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
index c7f44cfb70..581f67196c 100644
--- a/deepmd/pt/utils/dataloader.py
+++ b/deepmd/pt/utils/dataloader.py
@@ -301,7 +301,7 @@ def get_weighted_sampler(training_data, prob_style, sys_prob=False):
     else:
         probs = process_sys_probs(prob_style, training_data.index)
     log.debug("Generated weighted sampler with prob array: " + str(probs))
-    # training_data.total_batch is the size of one epoch, you can increase it to avoid too many  rebuilding of iteraters
+    # training_data.total_batch is the size of one epoch, you can increase it to avoid too many  rebuilding of iterators
     len_sampler = training_data.total_batch * max(env.NUM_WORKERS, 1)
     with torch.device("cpu"):
         sampler = WeightedRandomSampler(probs, len_sampler, replacement=True)
diff --git a/deepmd/pt/utils/env_mat_stat.py b/deepmd/pt/utils/env_mat_stat.py
index cc30bd5155..b253a1b55e 100644
--- a/deepmd/pt/utils/env_mat_stat.py
+++ b/deepmd/pt/utils/env_mat_stat.py
@@ -61,7 +61,7 @@ def compute_stat(self, env_mat: dict[str, torch.Tensor]) -> dict[str, StatItem]:
 
 
 class EnvMatStatSe(EnvMatStat):
-    """Environmental matrix statistics for the se_a/se_r environemntal matrix.
+    """Environmental matrix statistics for the se_a/se_r environmental matrix.
 
     Parameters
     ----------
diff --git a/deepmd/pt/utils/neighbor_stat.py b/deepmd/pt/utils/neighbor_stat.py
index 7d52bfaae1..64ad695827 100644
--- a/deepmd/pt/utils/neighbor_stat.py
+++ b/deepmd/pt/utils/neighbor_stat.py
@@ -25,7 +25,7 @@
 
 
 class NeighborStatOP(torch.nn.Module):
-    """Class for getting neighbor statics data information.
+    """Class for getting neighbor statistics data information.
 
     Parameters
     ----------
diff --git a/deepmd/pt/utils/nlist.py b/deepmd/pt/utils/nlist.py
index c30ec6dd02..db1e87785b 100644
--- a/deepmd/pt/utils/nlist.py
+++ b/deepmd/pt/utils/nlist.py
@@ -56,7 +56,7 @@ def build_neighbor_list(
     sel: Union[int, list[int]],
     distinguish_types: bool = True,
 ) -> torch.Tensor:
-    """Build neightbor list for a single frame. keeps nsel neighbors.
+    """Build neighbor list for a single frame. keeps nsel neighbors.
 
     Parameters
     ----------
@@ -264,7 +264,7 @@ def build_directional_neighbor_list(
     rr = torch.linalg.norm(diff, dim=-1)
     rr, nlist = torch.sort(rr, dim=-1)
 
-    # We assume that the central and neighbor atoms are diffferent,
+    # We assume that the central and neighbor atoms are different,
     # thus we do not need to exclude self-neighbors.
     # # if central atom has two zero distances, sorting sometimes can not exclude itself
     # rr -= torch.eye(nloc_cntl, nall_neig, dtype=rr.dtype, device=rr.device).unsqueeze(0)
@@ -429,7 +429,7 @@ def extend_coord_with_ghosts(
     extended_atype: torch.Tensor
         extended atom type of shape [-1, nall].
     index_mapping: torch.Tensor
-        maping extended index to the local index
+        mapping extended index to the local index
 
     """
     device = coord.device
diff --git a/deepmd/pt/utils/region.py b/deepmd/pt/utils/region.py
index 6fa77125aa..3272434995 100644
--- a/deepmd/pt/utils/region.py
+++ b/deepmd/pt/utils/region.py
@@ -92,7 +92,7 @@ def normalize_coord(
     Parameters
     ----------
     coord : torch.Tensor
-        orignal coordinates of shape [*, na, 3].
+        original coordinates of shape [*, na, 3].
 
     Returns
     -------
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 831d2bef76..4028d89fc9 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -266,7 +266,7 @@ def compute_output_stats(
         Specifying atomic energy contribution in vacuum. Given by key:value pairs.
         The value is a list specifying the bias. the elements can be None or np.ndarray of output shape.
         For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
-        The `set_davg_zero` key in the descrptor should be set.
+        The `set_davg_zero` key in the descriptor should be set.
     model_forward : Callable[..., torch.Tensor], optional
         The wrapped forward function of atomic model.
         If not None, the model will be utilized to generate the original energy prediction,
diff --git a/deepmd/tf/cluster/local.py b/deepmd/tf/cluster/local.py
index a9392bd326..25fb1cc645 100644
--- a/deepmd/tf/cluster/local.py
+++ b/deepmd/tf/cluster/local.py
@@ -43,7 +43,7 @@ def get_gpus():
         stdout, stderr = p.communicate()
         if p.returncode != 0:
             decoded = stderr.decode("UTF-8")
-            raise RuntimeError(f"Failed to detect availbe GPUs due to:\n{decoded}")
+            raise RuntimeError(f"Failed to detect available GPUs due to:\n{decoded}")
         decoded = stdout.decode("UTF-8").strip()
         num_gpus = int(decoded)
         return list(range(num_gpus)) if num_gpus > 0 else None
diff --git a/deepmd/tf/descriptor/descriptor.py b/deepmd/tf/descriptor/descriptor.py
index ba54ca1309..dd86beb21e 100644
--- a/deepmd/tf/descriptor/descriptor.py
+++ b/deepmd/tf/descriptor/descriptor.py
@@ -222,7 +222,7 @@ def enable_compression(
         check_frequency: int = -1,
         suffix: str = "",
     ) -> None:
-        """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the
+        """Receive the statisitcs (distance, max_nbor_size and env_mat_range) of the
         training data.
 
         Parameters
@@ -253,7 +253,7 @@ def enable_compression(
         )
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
-        """Reveive the mixed precision setting.
+        """Receive the mixed precision setting.
 
         Parameters
         ----------
@@ -473,7 +473,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/descriptor/hybrid.py b/deepmd/tf/descriptor/hybrid.py
index e4458476c8..3f20e7d856 100644
--- a/deepmd/tf/descriptor/hybrid.py
+++ b/deepmd/tf/descriptor/hybrid.py
@@ -72,7 +72,7 @@ def __init__(
         for ii in range(1, self.numb_descrpt):
             assert (
                 self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes()
-            ), f"number of atom types in {ii}th descrptor does not match others"
+            ), f"number of atom types in {ii}th descriptor does not match others"
 
     def get_rcut(self) -> float:
         """Returns the cut-off radius."""
@@ -317,7 +317,7 @@ def enable_compression(
         check_frequency: int = -1,
         suffix: str = "",
     ) -> None:
-        """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the
+        """Receive the statisitcs (distance, max_nbor_size and env_mat_range) of the
         training data.
 
         Parameters
@@ -352,7 +352,7 @@ def enable_compression(
             )
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
-        """Reveive the mixed precision setting.
+        """Receive the mixed precision setting.
 
         Parameters
         ----------
@@ -434,7 +434,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/descriptor/loc_frame.py b/deepmd/tf/descriptor/loc_frame.py
index 74ba755b4c..9b338a5d25 100644
--- a/deepmd/tf/descriptor/loc_frame.py
+++ b/deepmd/tf/descriptor/loc_frame.py
@@ -72,7 +72,7 @@ def __init__(
         self.ntypes = len(self.sel_a)
         assert self.ntypes == len(self.sel_r)
         self.rcut_a = -1
-        # numb of neighbors and numb of descrptors
+        # numb of neighbors and numb of descriptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
         self.nnei_r = np.cumsum(self.sel_r)[-1]
         self.nnei = self.nnei_a + self.nnei_r
@@ -443,7 +443,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/descriptor/se.py b/deepmd/tf/descriptor/se.py
index 319a65f6da..746ea8c628 100644
--- a/deepmd/tf/descriptor/se.py
+++ b/deepmd/tf/descriptor/se.py
@@ -35,7 +35,7 @@ class DescrptSe(Descriptor):
     -----
     All of these descriptors have an environmental matrix and an
     embedding network (:meth:`deepmd.tf.utils.network.embedding_net`), so
-    they can share some similiar methods without defining them twice.
+    they can share some similar methods without defining them twice.
 
     Attributes
     ----------
@@ -162,7 +162,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/descriptor/se_a.py b/deepmd/tf/descriptor/se_a.py
index d5a8ed6815..a0b6b810e4 100644
--- a/deepmd/tf/descriptor/se_a.py
+++ b/deepmd/tf/descriptor/se_a.py
@@ -237,7 +237,7 @@ def __init__(
         self.ntypes = len(self.sel_a)
         assert self.ntypes == len(self.sel_r)
         self.rcut_a = -1
-        # numb of neighbors and numb of descrptors
+        # numb of neighbors and numb of descriptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
         self.nnei_r = np.cumsum(self.sel_r)[-1]
         self.nnei = self.nnei_a + self.nnei_r
@@ -448,7 +448,7 @@ def enable_compression(
         check_frequency: int = -1,
         suffix: str = "",
     ) -> None:
-        """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
+        """Receive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
 
         Parameters
         ----------
@@ -502,7 +502,7 @@ def enable_compression(
                 )
             elif len(ret_one_side) != 0 and len(ret_two_side) != 0:
                 raise RuntimeError(
-                    "both one side and two side embedding net varaibles are detected, it is a wrong model."
+                    "both one side and two side embedding net variables are detected, it is a wrong model."
                 )
             elif len(ret_two_side) != 0:
                 self.final_type_embedding = get_two_side_type_embedding(self, graph)
@@ -548,7 +548,7 @@ def enable_compression(
         self.dstd = get_tensor_by_name_from_graph(graph, f"descrpt_attr{suffix}/t_std")
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
-        """Reveive the mixed precision setting.
+        """Receive the mixed precision setting.
 
         Parameters
         ----------
diff --git a/deepmd/tf/descriptor/se_a_ebd_v2.py b/deepmd/tf/descriptor/se_a_ebd_v2.py
index af43eedbbc..035fc6509c 100644
--- a/deepmd/tf/descriptor/se_a_ebd_v2.py
+++ b/deepmd/tf/descriptor/se_a_ebd_v2.py
@@ -23,7 +23,7 @@
 class DescrptSeAEbdV2(DescrptSeA):
     r"""A compressible se_a_ebd model.
 
-    This model is a warpper for DescriptorSeA, which set tebd_input_mode='strip'.
+    This model is a wrapper for DescriptorSeA, which set tebd_input_mode='strip'.
     """
 
     def __init__(
diff --git a/deepmd/tf/descriptor/se_a_ef.py b/deepmd/tf/descriptor/se_a_ef.py
index 9f70464c56..bf891e6032 100644
--- a/deepmd/tf/descriptor/se_a_ef.py
+++ b/deepmd/tf/descriptor/se_a_ef.py
@@ -348,7 +348,7 @@ def __init__(
         self.ntypes = len(self.sel_a)
         assert self.ntypes == len(self.sel_r)
         self.rcut_a = -1
-        # numb of neighbors and numb of descrptors
+        # numb of neighbors and numb of descriptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
         self.nnei_r = np.cumsum(self.sel_r)[-1]
         self.nnei = self.nnei_a + self.nnei_r
diff --git a/deepmd/tf/descriptor/se_a_mask.py b/deepmd/tf/descriptor/se_a_mask.py
index e12f6a0fff..5667122809 100644
--- a/deepmd/tf/descriptor/se_a_mask.py
+++ b/deepmd/tf/descriptor/se_a_mask.py
@@ -157,7 +157,7 @@ def __init__(
         self.ntypes = len(self.sel_a)
         assert self.ntypes == len(self.sel_r)
         self.rcut_a = -1
-        # numb of neighbors and numb of descrptors
+        # numb of neighbors and numb of descriptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
         self.nnei = self.nnei_a
         # to be compat with old option of `stripped_type_embedding`
@@ -435,7 +435,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py
index 8d101f151c..7bfb784419 100644
--- a/deepmd/tf/descriptor/se_atten.py
+++ b/deepmd/tf/descriptor/se_atten.py
@@ -425,7 +425,7 @@ def enable_compression(
         suffix: str = "",
         tebd_suffix: str = "",
     ) -> None:
-        """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
+        """Receive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
 
         Parameters
         ----------
@@ -707,7 +707,7 @@ def _pass_filter(
         assert (
             input_dict is not None
             and input_dict.get("type_embedding", None) is not None
-        ), "se_atten desctiptor must use type_embedding"
+        ), "se_atten descriptor must use type_embedding"
         type_embedding = input_dict.get("type_embedding", None)
         inputs = tf.reshape(inputs, [-1, natoms[0], self.ndescrpt])
         output = []
@@ -1434,9 +1434,9 @@ def build_type_exclude_mask_mixed(
 
         Notes
         -----
-        This method has the similiar way to build the type exclude mask as
+        This method has the similar way to build the type exclude mask as
         :meth:`deepmd.tf.descriptor.descriptor.Descriptor.build_type_exclude_mask`.
-        The mathmatical expression has been explained in that method.
+        The mathematical expression has been explained in that method.
         The difference is that the attention descriptor has provided the type of
         the neighbors (idx_j) that is not in order, so we use it from an extra
         input.
@@ -1521,7 +1521,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/descriptor/se_r.py b/deepmd/tf/descriptor/se_r.py
index 8096ef7c96..752642c1d5 100644
--- a/deepmd/tf/descriptor/se_r.py
+++ b/deepmd/tf/descriptor/se_r.py
@@ -149,7 +149,7 @@ def __init__(
         # descrpt config
         self.sel_a = [0 for ii in range(len(self.sel_r))]
         self.ntypes = len(self.sel_r)
-        # numb of neighbors and numb of descrptors
+        # numb of neighbors and numb of descriptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
         self.nnei_r = np.cumsum(self.sel_r)[-1]
         self.nnei = self.nnei_a + self.nnei_r
@@ -325,7 +325,7 @@ def enable_compression(
         check_frequency: int = -1,
         suffix: str = "",
     ) -> None:
-        """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
+        """Receive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
 
         Parameters
         ----------
diff --git a/deepmd/tf/descriptor/se_t.py b/deepmd/tf/descriptor/se_t.py
index f96b1ba778..464839aeac 100644
--- a/deepmd/tf/descriptor/se_t.py
+++ b/deepmd/tf/descriptor/se_t.py
@@ -145,7 +145,7 @@ def __init__(
         self.ntypes = len(self.sel_a)
         assert self.ntypes == len(self.sel_r)
         self.rcut_a = -1
-        # numb of neighbors and numb of descrptors
+        # numb of neighbors and numb of descriptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
         self.nnei_r = np.cumsum(self.sel_r)[-1]
         self.nnei = self.nnei_a + self.nnei_r
@@ -332,7 +332,7 @@ def enable_compression(
         check_frequency: int = -1,
         suffix: str = "",
     ) -> None:
-        """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
+        """Receive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
 
         Parameters
         ----------
diff --git a/deepmd/tf/entrypoints/freeze.py b/deepmd/tf/entrypoints/freeze.py
index cee6615abc..2658f565a6 100755
--- a/deepmd/tf/entrypoints/freeze.py
+++ b/deepmd/tf/entrypoints/freeze.py
@@ -59,7 +59,7 @@ def _transfer_fitting_net_trainable_variables(sess, old_graph_def, raw_graph_def
             raw_graph_def,  # The graph_def is used to retrieve the nodes
             [
                 n + "_1" for n in old_graph_nodes
-            ],  # The output node names are used to select the usefull nodes
+            ],  # The output node names are used to select the useful nodes
         )
     except AssertionError:
         # if there's no additional nodes
@@ -275,7 +275,7 @@ def freeze_graph(
     output_graph_def = tf.graph_util.convert_variables_to_constants(
         sess,  # The session is used to retrieve the weights
         input_graph,  # The graph_def is used to retrieve the nodes
-        output_node,  # The output node names are used to select the usefull nodes
+        output_node,  # The output node names are used to select the useful nodes
     )
 
     # If we need to transfer the fitting net variables
@@ -334,7 +334,7 @@ def freeze(
 
     # We import the meta graph and retrieve a Saver
     try:
-        # In case paralle training
+        # In case parallel training
         import horovod.tensorflow as HVD
     except ImportError:
         pass
diff --git a/deepmd/tf/entrypoints/ipi.py b/deepmd/tf/entrypoints/ipi.py
index 1183375119..a08a2293a9 100644
--- a/deepmd/tf/entrypoints/ipi.py
+++ b/deepmd/tf/entrypoints/ipi.py
@@ -13,7 +13,7 @@
 
 
 def _program(name: str, args: list[str]):
-    """Execuate a program.
+    """Execute a program.
 
     Parameters
     ----------
diff --git a/deepmd/tf/entrypoints/main.py b/deepmd/tf/entrypoints/main.py
index d9dff4eb4a..b8bfdef6d8 100644
--- a/deepmd/tf/entrypoints/main.py
+++ b/deepmd/tf/entrypoints/main.py
@@ -60,7 +60,7 @@ def main(args: Optional[Union[list[str], argparse.Namespace]] = None):
         args = parse_args(args=args)
 
     # do not set log handles for None, it is useless
-    # log handles for train will be set separatelly
+    # log handles for train will be set separately
     # when the use of MPI will be determined in `RunOptions`
     if args.command not in (None, "train"):
         set_log_handles(args.log_level, Path(args.log_path) if args.log_path else None)
diff --git a/deepmd/tf/entrypoints/train.py b/deepmd/tf/entrypoints/train.py
index 66622b3182..3d965ea71c 100755
--- a/deepmd/tf/entrypoints/train.py
+++ b/deepmd/tf/entrypoints/train.py
@@ -114,7 +114,7 @@ def train(
         mpi_log=mpi_log,
     )
     if run_opt.is_distrib and len(run_opt.gpus or []) > 1:
-        # avoid conflict of visible gpus among multipe tf sessions in one process
+        # avoid conflict of visible gpus among multiple tf sessions in one process
         reset_default_tf_session_config(cpu_only=True)
 
     # load json database
diff --git a/deepmd/tf/entrypoints/transfer.py b/deepmd/tf/entrypoints/transfer.py
index b93caf3cac..52bf56c4fd 100644
--- a/deepmd/tf/entrypoints/transfer.py
+++ b/deepmd/tf/entrypoints/transfer.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Module used for transfering parameters between models."""
+"""Module used for transferring parameters between models."""
 
 import logging
 import re
@@ -43,7 +43,7 @@ def convert_matrix(
     shape : Sequence[int]
         shape to cast resulting array to
     dtype : Optional[type]
-        type that finall array will be cast to, If None no casting will take place
+        type that final array will be cast to, If None no casting will take place
 
     Returns
     -------
@@ -58,7 +58,7 @@ def convert_matrix(
 
 
 def transfer(*, old_model: str, raw_model: str, output: str, **kwargs):
-    """Transfer operation from old fron graph to new prepared raw graph.
+    """Transfer operation from old from graph to new prepared raw graph.
 
     Parameters
     ----------
@@ -67,7 +67,7 @@ def transfer(*, old_model: str, raw_model: str, output: str, **kwargs):
     raw_model : str
         new model that will accept ops from old model
     output : str
-        new model with transfered parameters will be saved to this location
+        new model with transferred parameters will be saved to this location
     **kwargs
         additional arguments
     """
@@ -104,7 +104,7 @@ def load_graph(graph_name: str) -> tf.Graph:
 
 
 def transform_graph(raw_graph: tf.Graph, old_graph: tf.Graph) -> tf.Graph:
-    """Trasform old graph into new.
+    """Transform old graph into new.
 
     Parameters
     ----------
@@ -116,7 +116,7 @@ def transform_graph(raw_graph: tf.Graph, old_graph: tf.Graph) -> tf.Graph:
     Returns
     -------
     tf.Graph
-        new graph with parameters transfered form the old one
+        new graph with parameters transferred form the old one
     """
     old_graph_def = old_graph.as_graph_def()
     raw_graph_def = raw_graph.as_graph_def()
diff --git a/deepmd/tf/env.py b/deepmd/tf/env.py
index 5a66498dba..16ad4735fd 100644
--- a/deepmd/tf/env.py
+++ b/deepmd/tf/env.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Module that sets tensorflow working environment and exports inportant constants."""
+"""Module that sets tensorflow working environment and exports important constants."""
 
 import ctypes
 import logging
@@ -92,7 +92,7 @@ def filter(self, record):
 # https://keras.io/getting_started/#tensorflow--keras-2-backwards-compatibility
 # 2024/04/24: deepmd.tf doesn't import tf.keras any more
 
-# import tensorflow v1 compatability
+# import tensorflow v1 compatibility
 import tensorflow.compat.v1 as tf
 
 tf.get_logger().addFilter(TFWarningFilter())
@@ -339,7 +339,7 @@ def get_module(module_name: str) -> "ModuleType":
         try:
             module = tf.load_op_library(str(module_file))
         except tf.errors.NotFoundError as e:
-            # check CXX11_ABI_FLAG is compatiblity
+            # check CXX11_ABI_FLAG is compatibility
             # see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html
             # ABI should be the same
             if "CXX11_ABI_FLAG" in tf.__dict__:
@@ -377,7 +377,7 @@ def get_module(module_name: str) -> "ModuleType":
                     "instead."
                 ) from e
             error_message = (
-                "This deepmd-kit package is inconsitent with TensorFlow "
+                "This deepmd-kit package is inconsistent with TensorFlow "
                 f"Runtime, thus an error is raised when loading {module_name}. "
                 "You need to rebuild deepmd-kit against this TensorFlow "
                 "runtime."
diff --git a/deepmd/tf/fit/dipole.py b/deepmd/tf/fit/dipole.py
index 0e5b860fa2..fa8a5b680c 100644
--- a/deepmd/tf/fit/dipole.py
+++ b/deepmd/tf/fit/dipole.py
@@ -41,11 +41,11 @@ class DipoleFittingSeA(Fitting):
     Parameters
     ----------
     ntypes
-            The ntypes of the descrptor :math:`\mathcal{D}`
+            The ntypes of the descriptor :math:`\mathcal{D}`
     dim_descrpt
-            The dimension of the descrptor :math:`\mathcal{D}`
+            The dimension of the descriptor :math:`\mathcal{D}`
     embedding_width
-            The rotation matrix dimension of the descrptor :math:`\mathcal{D}`
+            The rotation matrix dimension of the descriptor :math:`\mathcal{D}`
     neuron : list[int]
             Number of neurons in each hidden layer of the fitting net
     resnet_dt : bool
@@ -320,7 +320,7 @@ def init_variables(
         )
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
-        """Reveive the mixed precision setting.
+        """Receive the mixed precision setting.
 
         Parameters
         ----------
diff --git a/deepmd/tf/fit/dos.py b/deepmd/tf/fit/dos.py
index ebc347c2fd..099cba0d12 100644
--- a/deepmd/tf/fit/dos.py
+++ b/deepmd/tf/fit/dos.py
@@ -62,9 +62,9 @@ class DOSFitting(Fitting):
     Parameters
     ----------
     ntypes
-            The ntypes of the descrptor :math:`\mathcal{D}`
+            The ntypes of the descriptor :math:`\mathcal{D}`
     dim_descrpt
-            The dimension of the descrptor :math:`\mathcal{D}`
+            The dimension of the descriptor :math:`\mathcal{D}`
     neuron
             Number of neurons :math:`N` in each hidden layer of the fitting net
     resnet_dt
@@ -187,7 +187,7 @@ def get_numb_dos(self) -> int:
 
     # not used
     def compute_output_stats(self, all_stat: dict, mixed_type: bool = False) -> None:
-        """Compute the ouput statistics.
+        """Compute the output statistics.
 
         Parameters
         ----------
@@ -628,7 +628,7 @@ def init_variables(
             pass
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
-        """Reveive the mixed precision setting.
+        """Receive the mixed precision setting.
 
         Parameters
         ----------
diff --git a/deepmd/tf/fit/ener.py b/deepmd/tf/fit/ener.py
index 330ea57179..1ba0fe3dfb 100644
--- a/deepmd/tf/fit/ener.py
+++ b/deepmd/tf/fit/ener.py
@@ -109,9 +109,9 @@ class EnerFitting(Fitting):
     Parameters
     ----------
     ntypes
-            The ntypes of the descrptor :math:`\mathcal{D}`
+            The ntypes of the descriptor :math:`\mathcal{D}`
     dim_descrpt
-            The dimension of the descrptor :math:`\mathcal{D}`
+            The dimension of the descriptor :math:`\mathcal{D}`
     neuron
             Number of neurons :math:`N` in each hidden layer of the fitting net
     resnet_dt
@@ -132,7 +132,7 @@ class EnerFitting(Fitting):
     seed
             Random seed for initializing the network parameters.
     atom_ener
-            Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+            Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descriptor should be set.
     activation_function
             The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN|
     precision
@@ -252,7 +252,7 @@ def get_numb_aparam(self) -> int:
         return self.numb_aparam
 
     def compute_output_stats(self, all_stat: dict, mixed_type: bool = False) -> None:
-        """Compute the ouput statistics.
+        """Compute the output statistics.
 
         Parameters
         ----------
@@ -828,7 +828,7 @@ def change_energy_bias(
         )
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
-        """Reveive the mixed precision setting.
+        """Receive the mixed precision setting.
 
         Parameters
         ----------
@@ -995,7 +995,7 @@ def change_energy_bias_lower(
     bias_adjust_mode : str
         The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
         'change-by-statistic' : perform predictions on energies of target dataset,
-                and do least sqaure on the errors to obtain the target shift as bias.
+                and do least square on the errors to obtain the target shift as bias.
         'set-by-statistic' : directly use the statistic energy bias in the target dataset.
     ntest : int
         The number of test samples in a system to change the energy bias.
diff --git a/deepmd/tf/fit/polar.py b/deepmd/tf/fit/polar.py
index cc79e3402a..b5a21012bd 100644
--- a/deepmd/tf/fit/polar.py
+++ b/deepmd/tf/fit/polar.py
@@ -46,11 +46,11 @@ class PolarFittingSeA(Fitting):
     Parameters
     ----------
     ntypes
-            The ntypes of the descrptor :math:`\mathcal{D}`
+            The ntypes of the descriptor :math:`\mathcal{D}`
     dim_descrpt
-            The dimension of the descrptor :math:`\mathcal{D}`
+            The dimension of the descriptor :math:`\mathcal{D}`
     embedding_width
-            The rotation matrix dimension of the descrptor :math:`\mathcal{D}`
+            The rotation matrix dimension of the descriptor :math:`\mathcal{D}`
     neuron : list[int]
             Number of neurons in each hidden layer of the fitting net
     resnet_dt : bool
@@ -221,7 +221,7 @@ def compute_output_stats(self, all_stat):
                 else:  # No atomic polar in this system, so it should have global polar
                     if (
                         not all_stat["find_polarizability"][ss] > 0.0
-                    ):  # This system is jsut a joke?
+                    ):  # This system is just a joke?
                         continue
                     # Till here, we have global polar
                     sys_matrix.append(
@@ -526,7 +526,7 @@ def init_variables(
         )
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
-        """Reveive the mixed precision setting.
+        """Receive the mixed precision setting.
 
         Parameters
         ----------
@@ -618,7 +618,7 @@ class GlobalPolarFittingSeA:
     Parameters
     ----------
     descrpt : tf.Tensor
-            The descrptor
+            The descriptor
     neuron : list[int]
             Number of neurons in each hidden layer of the fitting net
     resnet_dt : bool
@@ -745,7 +745,7 @@ def init_variables(
         )
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
-        """Reveive the mixed precision setting.
+        """Receive the mixed precision setting.
 
         Parameters
         ----------
diff --git a/deepmd/tf/infer/deep_dipole.py b/deepmd/tf/infer/deep_dipole.py
index e10d09564d..b493af8552 100644
--- a/deepmd/tf/infer/deep_dipole.py
+++ b/deepmd/tf/infer/deep_dipole.py
@@ -39,7 +39,7 @@ class DeepDipoleOld(DeepTensor):
     --------
     For developers: `DeepTensor` initializer must be called at the end after
     `self.tensors` are modified because it uses the data in `self.tensors` dict.
-    Do not chanage the order!
+    Do not change the order!
     """
 
     def __init__(
diff --git a/deepmd/tf/infer/deep_eval.py b/deepmd/tf/infer/deep_eval.py
index 56df7f782f..9527cb2ae8 100644
--- a/deepmd/tf/infer/deep_eval.py
+++ b/deepmd/tf/infer/deep_eval.py
@@ -111,7 +111,7 @@ def __init__(
             raise RuntimeError(
                 f"model in graph (version {self.model_version}) is incompatible"
                 f"with the model (version {MODEL_VERSION}) supported by the current code."
-                "See https://deepmd.rtfd.io/compatability/ for details."
+                "See https://deepmd.rtfd.io/compatibility/ for details."
             )
 
         # set default to False, as subclasses may not support
@@ -190,7 +190,7 @@ def _init_tensors(self):
             "numb_dos": "fitting_attr/numb_dos:0",
             # model attrs
             "sel_type": "model_attr/sel_type:0",
-            # additonal inputs
+            # additional inputs
             "efield": "t_efield:0",
             "fparam": "t_fparam:0",
             "aparam": "t_aparam:0",
@@ -312,12 +312,12 @@ def sess(self) -> tf.Session:
         return tf.Session(graph=self.graph, config=default_tf_session_config)
 
     def _graph_compatable(self) -> bool:
-        """Check the model compatability.
+        """Check the model compatibility.
 
         Returns
         -------
         bool
-            If the model stored in the graph file is compatable with the current code
+            If the model stored in the graph file is compatible with the current code
         """
         model_version_major = int(self.model_version.split(".")[0])
         model_version_minor = int(self.model_version.split(".")[1])
@@ -781,7 +781,7 @@ def _prepare_feed_dict(
         aparam=None,
         efield=None,
     ):
-        # standarize the shape of inputs
+        # standardize the shape of inputs
         natoms, nframes = self._get_natoms_and_nframes(
             coords,
             atom_types,
@@ -1118,7 +1118,7 @@ def get_has_efield(self) -> bool:
         return self.has_efield
 
     def get_model_def_script(self) -> dict:
-        """Get model defination script."""
+        """Get model definition script."""
         t_script = self._get_tensor("train_attr/training_script:0")
         [script] = run_sess(self.sess, [t_script], feed_dict={})
         model_def_script = script.decode("utf-8")
@@ -1171,7 +1171,7 @@ def __init__(
             raise RuntimeError(
                 f"model in graph (version {self.model_version}) is incompatible"
                 f"with the model (version {MODEL_VERSION}) supported by the current code."
-                "See https://deepmd.rtfd.io/compatability/ for details."
+                "See https://deepmd.rtfd.io/compatibility/ for details."
             )
 
         # set default to False, as subclasses may not support
@@ -1224,12 +1224,12 @@ def sess(self) -> tf.Session:
         return tf.Session(graph=self.graph, config=default_tf_session_config)
 
     def _graph_compatable(self) -> bool:
-        """Check the model compatability.
+        """Check the model compatibility.
 
         Returns
         -------
         bool
-            If the model stored in the graph file is compatable with the current code
+            If the model stored in the graph file is compatible with the current code
         """
         model_version_major = int(self.model_version.split(".")[0])
         model_version_minor = int(self.model_version.split(".")[1])
diff --git a/deepmd/tf/infer/deep_tensor.py b/deepmd/tf/infer/deep_tensor.py
index a20bbfe513..a1edaaa409 100644
--- a/deepmd/tf/infer/deep_tensor.py
+++ b/deepmd/tf/infer/deep_tensor.py
@@ -186,7 +186,7 @@ def eval(
             If atomic == False then of size nframes x output_dim
             else of size nframes x natoms x output_dim
         """
-        # standarize the shape of inputs
+        # standardize the shape of inputs
         if mixed_type:
             natoms = atom_types[0].size
             atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
@@ -330,7 +330,7 @@ def eval_full(
         """
         assert self._support_gfv, "do not support eval_full with old tensor model"
 
-        # standarize the shape of inputs
+        # standardize the shape of inputs
         if mixed_type:
             natoms = atom_types[0].size
             atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
diff --git a/deepmd/tf/loss/ener.py b/deepmd/tf/loss/ener.py
index 337046836b..95cc8adafb 100644
--- a/deepmd/tf/loss/ener.py
+++ b/deepmd/tf/loss/ener.py
@@ -673,7 +673,7 @@ def print_on_training(
             error_ae_train,
         ) = train_out
 
-        # than test data, if tensorboard log writter is present, commpute summary
+        # than test data, if tensorboard log writer is present, compute summary
         # and write tensorboard logs
         if tb_writer:
             summary_merged_op = tf.summary.merge(
diff --git a/deepmd/tf/model/ener.py b/deepmd/tf/model/ener.py
index b21c920d9c..57aaa2acf4 100644
--- a/deepmd/tf/model/ener.py
+++ b/deepmd/tf/model/ener.py
@@ -56,7 +56,7 @@ class EnerModel(StandardModel):
     use_srtab
             The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
     smin_alpha
-            The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided.
+            The short-range tabulated interaction will be switched according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided.
     sw_rmin
             The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.
     sw_rmin
@@ -516,7 +516,7 @@ def change_energy_bias(
         bias_adjust_mode : str
             The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
             'change-by-statistic' : perform predictions on energies of target dataset,
-                    and do least sqaure on the errors to obtain the target shift as bias.
+                    and do least square on the errors to obtain the target shift as bias.
             'set-by-statistic' : directly use the statistic energy bias in the target dataset.
         """
         self.fitting.change_energy_bias(
diff --git a/deepmd/tf/model/frozen.py b/deepmd/tf/model/frozen.py
index 05700dc64e..7501a5cbd1 100644
--- a/deepmd/tf/model/frozen.py
+++ b/deepmd/tf/model/frozen.py
@@ -250,7 +250,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/model/linear.py b/deepmd/tf/model/linear.py
index 4c75c2a1d5..7cf3c5194d 100644
--- a/deepmd/tf/model/linear.py
+++ b/deepmd/tf/model/linear.py
@@ -146,7 +146,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/model/model.py b/deepmd/tf/model/model.py
index 833f8364ae..03211d49d5 100644
--- a/deepmd/tf/model/model.py
+++ b/deepmd/tf/model/model.py
@@ -87,7 +87,7 @@ class Model(ABC, make_plugin_registry("model")):
     use_srtab
         The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
     smin_alpha
-        The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided.
+        The short-range tabulated interaction will be switched according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided.
     sw_rmin
         The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.
     sw_rmin
@@ -411,7 +411,7 @@ def change_energy_bias(
         bias_adjust_mode : str
             The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
             'change-by-statistic' : perform predictions on energies of target dataset,
-                    and do least sqaure on the errors to obtain the target shift as bias.
+                    and do least square on the errors to obtain the target shift as bias.
             'set-by-statistic' : directly use the statistic energy bias in the target dataset.
         """
         raise RuntimeError("Not supported")
@@ -524,7 +524,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
@@ -766,7 +766,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/model/pairtab.py b/deepmd/tf/model/pairtab.py
index d54940fec6..80e68d7825 100644
--- a/deepmd/tf/model/pairtab.py
+++ b/deepmd/tf/model/pairtab.py
@@ -244,7 +244,7 @@ def get_fitting(self) -> Union[Fitting, dict]:
 
     def get_loss(self, loss: dict, lr) -> Optional[Union[Loss, dict]]:
         """Get the loss function(s)."""
-        # nothing nees to do
+        # nothing needs to do
         return
 
     def get_rcut(self) -> float:
@@ -285,7 +285,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/model/pairwise_dprc.py b/deepmd/tf/model/pairwise_dprc.py
index c8a57d90b3..a0eaa1385f 100644
--- a/deepmd/tf/model/pairwise_dprc.py
+++ b/deepmd/tf/model/pairwise_dprc.py
@@ -421,7 +421,7 @@ def update_sel(
         Parameters
         ----------
         train_data : DeepmdDataSystem
-            data used to do neighbor statictics
+            data used to do neighbor statistics
         type_map : list[str], optional
             The name of each type of atoms
         local_jdata : dict
diff --git a/deepmd/tf/nvnmd/data/data.py b/deepmd/tf/nvnmd/data/data.py
index 55e7c51bc7..e1fcaac9f2 100644
--- a/deepmd/tf/nvnmd/data/data.py
+++ b/deepmd/tf/nvnmd/data/data.py
@@ -118,7 +118,7 @@
     "end": "",
 }
 
-# change the configuration accordng to the max_nnei
+# change the configuration according to the max_nnei
 jdata_config_v0_ni128 = jdata_config_v0.copy()
 jdata_config_v0_ni256 = jdata_config_v0.copy()
 jdata_config_v0_ni256["ctrl"] = {
@@ -250,7 +250,7 @@
     "end": "",
 }
 
-# change the configuration accordng to the max_nnei
+# change the configuration according to the max_nnei
 jdata_config_v1_ni128 = jdata_config_v1.copy()
 jdata_config_v1_ni256 = jdata_config_v1.copy()
 jdata_config_v1_ni256["ctrl"] = {
diff --git a/deepmd/tf/nvnmd/entrypoints/mapt.py b/deepmd/tf/nvnmd/entrypoints/mapt.py
index 8ee1967854..7a50ceae30 100644
--- a/deepmd/tf/nvnmd/entrypoints/mapt.py
+++ b/deepmd/tf/nvnmd/entrypoints/mapt.py
@@ -50,7 +50,7 @@ class MapTable:
     :math:`h_{ji} = \frac{s(r_{ji})}{r_{ji}}`, and
     :math:`\mathcal{G}_{ji}` is embedding matrix.
 
-    The mapping funciton can be define as:
+    The mapping function can be define as:
 
     | :math:`y = f(x) = y_{k} + (x - x_{k}) * dy_{k}`
     | :math:`y_{k} = f(x_{k})`
@@ -436,7 +436,7 @@ def run_u2s(self):
         # N = NUM_MAPT
         N = 512
         N2 = int(rc_max**2)
-        # N+1 ranther than N for calculating defference
+        # N+1 ranther than N for calculating difference
         keys = list(dic_ph.keys())
         vals = list(dic_ph.values())
 
@@ -446,7 +446,7 @@ def run_u2s(self):
 
         u2 = N2 * np.reshape(np.arange(0, N * 16 + 1) / (N * 16), [-1, 1])  # pylint: disable=no-explicit-dtype
         res_lst2 = run_sess(sess, vals, feed_dict={dic_ph["u"]: u2})
-        res_dic2 = dict(zip(keys, res_lst2))  # reference for commpare
+        res_dic2 = dict(zip(keys, res_lst2))  # reference for compare
 
         # change value
         for tt in range(ndim):
diff --git a/deepmd/tf/nvnmd/utils/encode.py b/deepmd/tf/nvnmd/utils/encode.py
index 21398fbf23..46209e5230 100644
--- a/deepmd/tf/nvnmd/utils/encode.py
+++ b/deepmd/tf/nvnmd/utils/encode.py
@@ -122,7 +122,7 @@ def check_dec(self, idec, nbit, signed=False, name=""):
 
     def extend_list(self, slbin, nfull):
         r"""Extend the list (slbin) to the length (nfull)
-        the attched element of list is 0.
+        the attached element of list is 0.
 
         such as, when
 
diff --git a/deepmd/tf/nvnmd/utils/network.py b/deepmd/tf/nvnmd/utils/network.py
index 76c80ed4e7..c0572a7fa7 100644
--- a/deepmd/tf/nvnmd/utils/network.py
+++ b/deepmd/tf/nvnmd/utils/network.py
@@ -240,7 +240,7 @@ def one_layer(
                 x = op_module.quantize_nvnmd(inputs, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                 inputs = tf.ensure_shape(x, [None, shape[1]])
             # wx
-            # normlize weight mode: 0 all | 1 column
+            # normalize weight mode: 0 all | 1 column
             norm_mode = 0 if final_layer else 1
             wx = op_module.matmul_fitnet_nvnmd(
                 inputs, w, NBIT_DATA_FL, NBIT_SHORT_FL, norm_mode
diff --git a/deepmd/tf/op/__init__.py b/deepmd/tf/op/__init__.py
index 421ef0b123..805dc148a7 100644
--- a/deepmd/tf/op/__init__.py
+++ b/deepmd/tf/op/__init__.py
@@ -18,7 +18,7 @@ def import_ops():
 
     Notes
     -----
-    Initialy this subdir is unpopulated. CMake will install all the op module python
+    Initially this subdir is unpopulated. CMake will install all the op module python
     files and shared libs.
     """
     for module_file in Path(__file__).parent.glob("*.py"):
diff --git a/deepmd/tf/op/_dotmul_flt_nvnmd_grad.py b/deepmd/tf/op/_dotmul_flt_nvnmd_grad.py
index 8a4ffb2d0c..b6aae52519 100644
--- a/deepmd/tf/op/_dotmul_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_dotmul_flt_nvnmd_grad.py
@@ -15,7 +15,7 @@
 def _DotmulFltNvnmdGrad(op, grad):
     x = op.inputs[0]
     w = op.inputs[1]
-    # calcualte
+    # calculate
     dx = op_module.mul_flt_nvnmd(grad, w)
     dw = op_module.mul_flt_nvnmd(grad, x)
     # add shape for output of matmul_nvnmd
diff --git a/deepmd/tf/op/_matmul_flt2fix_nvnmd.py b/deepmd/tf/op/_matmul_flt2fix_nvnmd.py
index 319fb90ec8..3b802ec56a 100644
--- a/deepmd/tf/op/_matmul_flt2fix_nvnmd.py
+++ b/deepmd/tf/op/_matmul_flt2fix_nvnmd.py
@@ -22,7 +22,7 @@ def _MatmulFlt2fixNvnmdGrad(op, grad):
     else:
         x_T = tf.transpose(x)
         w_T = tf.transpose(w)
-    # calcualte
+    # calculate
     # dx = tf.matmul(grad, w_T)
     # dw = tf.matmul(x_T, grad)
     dx = op_module.matmul_flt_nvnmd(grad, w_T, 1, 1)
diff --git a/deepmd/tf/op/_matmul_flt_nvnmd_grad.py b/deepmd/tf/op/_matmul_flt_nvnmd_grad.py
index 6493794b00..94e0dc2d67 100644
--- a/deepmd/tf/op/_matmul_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_matmul_flt_nvnmd_grad.py
@@ -24,7 +24,7 @@ def _MatmulFltNvnmdGrad(op, grad):
     else:
         x_T = tf.transpose(x)
         w_T = tf.transpose(w)
-    # calcualte
+    # calculate
     modex = (normx >> 4) & 15
     modew = (normw >> 4) & 15
     if modex:
diff --git a/deepmd/tf/train/run_options.py b/deepmd/tf/train/run_options.py
index c36b42e194..c7f7b92674 100644
--- a/deepmd/tf/train/run_options.py
+++ b/deepmd/tf/train/run_options.py
@@ -82,7 +82,7 @@ class RunOptions:
     gpus: Optional[list[int]]
         list of GPUs if any are present else None
     is_chief: bool
-        in distribured training it is true for tha main MPI process in serail it is
+        in distribured training it is true for the main MPI process in serail it is
         always true
     world_size: int
         total worker count
@@ -93,7 +93,7 @@ class RunOptions:
     node_list_ : list[str]
         the list of nodes of the current mpirun
     my_device: str
-        deviice type - gpu or cpu
+        device type - gpu or cpu
     """
 
     gpus: Optional[list[int]]
@@ -180,7 +180,7 @@ def _setup_logger(
         else:
             log.warning(
                 f"Log handles have already been set. It is not advisable to "
-                f"reset them{', especially when runnig with MPI!' if self._HVD else ''}"
+                f"reset them{', especially when running with MPI!' if self._HVD else ''}"
             )
 
     def _try_init_distrib(self):
@@ -193,7 +193,7 @@ def _try_init_distrib(self):
             log.warning("Switch to serial execution due to lack of horovod module.")
             self.is_distrib = False
 
-        # Do real intialization
+        # Do real initialization
         if self.is_distrib:
             self._init_distributed(HVD)
             self._HVD = HVD
diff --git a/deepmd/tf/train/trainer.py b/deepmd/tf/train/trainer.py
index 9f353f2e32..58be9e8176 100644
--- a/deepmd/tf/train/trainer.py
+++ b/deepmd/tf/train/trainer.py
@@ -409,7 +409,7 @@ def train(self, train_data=None, valid_data=None):
         stop_batch = self.stop_batch
         self._init_session()
 
-        # Before data shard is enabled, only cheif do evaluation and record it
+        # Before data shard is enabled, only chief do evaluation and record it
         # self.print_head()
         fp = None
         if self.run_opt.is_chief:
@@ -846,7 +846,7 @@ def _init_from_pretrained_model(
         bias_adjust_mode : str
             The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
             'change-by-statistic' : perform predictions on energies of target dataset,
-                    and do least sqaure on the errors to obtain the target shift as bias.
+                    and do least square on the errors to obtain the target shift as bias.
             'set-by-statistic' : directly use the statistic energy bias in the target dataset.
         """
         try:
@@ -940,7 +940,7 @@ def build(self) -> list[tf.Tensor]:
 
         def get_train_batch() -> list[np.ndarray]:
             batch_data = train_data.get_batch()
-            # convert dict to list of arryas
+            # convert dict to list of arrays
             batch_data = tuple([batch_data[kk] for kk in self.data_keys])
             return batch_data
 
diff --git a/deepmd/tf/utils/learning_rate.py b/deepmd/tf/utils/learning_rate.py
index 519bf20bd0..fee73ca9a3 100644
--- a/deepmd/tf/utils/learning_rate.py
+++ b/deepmd/tf/utils/learning_rate.py
@@ -58,7 +58,7 @@ def build(
         Parameters
         ----------
         global_step
-            The tf Tensor prividing the global training step
+            The tf Tensor providing the global training step
         stop_step
             The stop step. If provided, the decay_rate will be determined automatically and overwritten.
 
diff --git a/deepmd/tf/utils/neighbor_stat.py b/deepmd/tf/utils/neighbor_stat.py
index 4052c89821..37028b23bc 100644
--- a/deepmd/tf/utils/neighbor_stat.py
+++ b/deepmd/tf/utils/neighbor_stat.py
@@ -33,7 +33,7 @@
 
 
 class NeighborStatOP:
-    """Class for getting neighbor statics data information.
+    """Class for getting neighbor statistics data information.
 
     Parameters
     ----------
diff --git a/deepmd/tf/utils/network.py b/deepmd/tf/utils/network.py
index 7941b451af..c4a0646705 100644
--- a/deepmd/tf/utils/network.py
+++ b/deepmd/tf/utils/network.py
@@ -264,7 +264,7 @@ def embedding_net(
     stddev : float
         Standard deviation of initializing network parameters
     bavg : float
-        Mean of network intial bias
+        Mean of network initial bias
     seed : int
         Random seed for initializing network parameters
     trainable : boolean
diff --git a/deepmd/tf/utils/nlist.py b/deepmd/tf/utils/nlist.py
index 0f33ec883b..6e405e9adb 100644
--- a/deepmd/tf/utils/nlist.py
+++ b/deepmd/tf/utils/nlist.py
@@ -39,7 +39,7 @@ def extend_coord_with_ghosts(
     extended_atype: tf.Tensor
         extended atom type of shape [-1, nall].
     index_mapping: tf.Tensor
-        maping extended index to the local index
+        mapping extended index to the local index
 
     """
     # generated by GitHub Copilot, converted from PT codes
diff --git a/deepmd/tf/utils/sess.py b/deepmd/tf/utils/sess.py
index ca98980f89..3c179d6b96 100644
--- a/deepmd/tf/utils/sess.py
+++ b/deepmd/tf/utils/sess.py
@@ -10,7 +10,7 @@
 
 
 def run_sess(sess: tf.Session, *args, **kwargs):
-    """Run session with erorrs caught.
+    """Run session with errors caught.
 
     Parameters
     ----------
diff --git a/deepmd/tf/utils/tabulate.py b/deepmd/tf/utils/tabulate.py
index d68f5cadf7..588ebdd55e 100644
--- a/deepmd/tf/utils/tabulate.py
+++ b/deepmd/tf/utils/tabulate.py
@@ -97,7 +97,7 @@ def __init__(
         elif activation_fn == ACTIVATION_FN_DICT["sigmoid"]:
             self.functype = 6
         else:
-            raise RuntimeError("Unknown actication function type!")
+            raise RuntimeError("Unknown activation function type!")
         self.activation_fn = activation_fn
 
         # self.sess = tf.Session(graph = self.graph)
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index b3f3b26fd0..916e4de1b0 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -254,7 +254,7 @@ def descrpt_local_frame_args():
 def descrpt_se_a_args():
     doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -322,7 +322,7 @@ def descrpt_se_a_args():
 def descrpt_se_t_args():
     doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -391,7 +391,7 @@ def descrpt_se_a_tpe_args():
 def descrpt_se_r_args():
     doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -468,7 +468,7 @@ def descrpt_se_atten_common_args():
     doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -565,7 +565,7 @@ def descrpt_se_atten_args():
         "The input mode of the type embedding. Supported modes are ['concat', 'strip']."
         "- 'concat': Concatenate the type embedding with the smoothed radial information as the union input for the embedding network. "
         "When `type_one_side` is False, the input is `input_ij = concat([r_ij, tebd_j, tebd_i])`. When `type_one_side` is True, the input is `input_ij = concat([r_ij, tebd_j])`. "
-        "The output is `out_ij = embeding(input_ij)` for the pair-wise representation of atom i with neighbor j."
+        "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j."
         "- 'strip': Use a separated embedding network for the type embedding and combine the output with the radial embedding network output. "
         f"When `type_one_side` is False, the input is `input_t = concat([tebd_j, tebd_i])`. {doc_only_pt_supported} When `type_one_side` is True, the input is `input_t = tebd_j`. "
         "The output is `out_ij = embeding_t(input_t) * embeding_s(r_ij) + embeding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
@@ -665,7 +665,7 @@ def descrpt_se_e3_tebd_args():
     doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -687,7 +687,7 @@ def descrpt_se_e3_tebd_args():
         "The input mode of the type embedding. Supported modes are ['concat', 'strip']."
         "- 'concat': Concatenate the type embedding with the smoothed angular information as the union input for the embedding network. "
         "The input is `input_jk = concat([angle_jk, tebd_j, tebd_k])`. "
-        "The output is `out_jk = embeding(input_jk)` for the three-body representation of atom i with neighbors j and k."
+        "The output is `out_jk = embedding(input_jk)` for the three-body representation of atom i with neighbors j and k."
         "- 'strip': Use a separated embedding network for the type embedding and combine the output with the angular embedding network output. "
         "The input is `input_t = concat([tebd_j, tebd_k])`."
         "The output is `out_jk = embeding_t(input_t) * embeding_s(angle_jk) + embeding_s(angle_jk)` for the three-body representation of atom i with neighbors j and k."
@@ -952,7 +952,7 @@ def dpa2_repinit_args():
         "The input mode of the type embedding. Supported modes are ['concat', 'strip']."
         "- 'concat': Concatenate the type embedding with the smoothed radial information as the union input for the embedding network. "
         "When `type_one_side` is False, the input is `input_ij = concat([r_ij, tebd_j, tebd_i])`. When `type_one_side` is True, the input is `input_ij = concat([r_ij, tebd_j])`. "
-        "The output is `out_ij = embeding(input_ij)` for the pair-wise representation of atom i with neighbor j."
+        "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j."
         "- 'strip': Use a separated embedding network for the type embedding and combine the output with the radial embedding network output. "
         f"When `type_one_side` is False, the input is `input_t = concat([tebd_j, tebd_i])`. {doc_only_pt_supported} When `type_one_side` is True, the input is `input_t = tebd_j`. "
         "The output is `out_ij = embeding_t(input_t) * embeding_s(r_ij) + embeding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
@@ -1337,7 +1337,7 @@ def descrpt_se_a_ebd_v2_args():
 def descrpt_se_a_mask_args():
     doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
 
     doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
@@ -1398,7 +1398,7 @@ def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
         "se_atten_v2", "model[standard]/descriptor[se_atten_v2]"
     )
     link_se_a_mask = make_link("se_a_mask", "model[standard]/descriptor[se_a_mask]")
-    doc_descrpt_type = f"The type of the descritpor. See explanation below. \n\n\
+    doc_descrpt_type = f"The type of the descriptor. See explanation below. \n\n\
 - {link_lf}: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
 - {link_se_e2_a}: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
 - {link_se_e2_r}: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
@@ -1431,7 +1431,7 @@ def fitting_ener():
     doc_trainable = f"Whether the parameters in the fitting net are trainable. This option can be\n\n\
 - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
 - list of bool{doc_only_tf_supported}: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
-    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
+    doc_rcond = "The condition number used to determine the initial energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
     doc_seed = "Random seed for parameter initialization of the fitting net"
     doc_atom_ener = "Specify the atomic energy in vacuum for each type"
     doc_layer_name = (
@@ -1506,8 +1506,8 @@ def fitting_dos():
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
 - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1."
-    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
+- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
+    doc_rcond = "The condition number used to determine the initial energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
     doc_seed = "Random seed for parameter initialization of the fitting net"
     doc_numb_dos = (
         "The number of gridpoints on which the DOS is evaluated (NEDOS in VASP)"
@@ -1681,7 +1681,7 @@ def fitting_variant_type_args():
 - `ener`: Fit an energy model (potential energy surface).\n\n\
 - `dos` : Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has number of frames lines and number of energy grid columns (times number of atoms in `atom_dos.npy`). See `loss` parameter. \n\n\
 - `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.\n\n\
-- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n"
+- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file with has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n"
 
     return Variant(
         "type",
@@ -1765,7 +1765,7 @@ def model_args(exclude_hybrid=False):
     doc_type_embedding = "The type embedding."
     doc_modifier = "The modifier of model output."
     doc_use_srtab = "The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly."
-    doc_smin_alpha = "The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided."
+    doc_smin_alpha = "The short-range tabulated interaction will be switched according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided."
     doc_sw_rmin = "The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
     doc_sw_rmax = "The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
     doc_srtab_add_bias = "Whether add energy bias from the statistics of the data to short-range tabulated atomic energy. It only takes effect when `use_srtab` is provided."
@@ -1917,7 +1917,7 @@ def standard_model_args() -> Argument:
                 doc=doc_fitting,
             ),
         ],
-        doc="Stardard model, which contains a descriptor and a fitting.",
+        doc="Standard model, which contains a descriptor and a fitting.",
     )
     return ca
 
@@ -1962,7 +1962,7 @@ def pairtab_model_args() -> Argument:
     doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     ca = Argument(
         "pairtab",
         dict,
@@ -2053,7 +2053,7 @@ def learning_rate_variant_type_args():
 
 def learning_rate_args(fold_subdoc: bool = False) -> Argument:
     doc_scale_by_worker = "When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`."
-    doc_lr = "The definitio of learning rate"
+    doc_lr = "The definition of learning rate"
     return Argument(
         "learning_rate",
         dict,
@@ -2328,10 +2328,10 @@ def loss_dos():
     doc_start_pref_dos = start_pref("Density of State (DOS)")
     doc_limit_pref_dos = limit_pref("Density of State (DOS)")
     doc_start_pref_cdf = start_pref(
-        "Cumulative Distribution Function (cumulative intergral of DOS)"
+        "Cumulative Distribution Function (cumulative integral of DOS)"
     )
     doc_limit_pref_cdf = limit_pref(
-        "Cumulative Distribution Function (cumulative intergral of DOS)"
+        "Cumulative Distribution Function (cumulative integral of DOS)"
     )
     doc_start_pref_ados = start_pref("atomic DOS (site-projected DOS)")
     doc_limit_pref_ados = limit_pref("atomic DOS (site-projected DOS)")
@@ -2486,7 +2486,7 @@ def training_data_args():  # ! added by Ziyao: new specification style for data
     doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
 - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
 - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
-- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
+- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is divided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
     doc_sys_probs = (
         "A list of float if specified. "
         "Should be of the same length as `systems`, "
@@ -2551,7 +2551,7 @@ def validation_data_args():  # ! added by Ziyao: new specification style for dat
     doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
 - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
 - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
-- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
+- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is divided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
     doc_sys_probs = (
         "A list of float if specified. "
         "Should be of the same length as `systems`, "
@@ -2664,7 +2664,7 @@ def training_args(
         "doing least square on the errors to add the target shift on the bias."
     )
     doc_disp_training = "Displaying verbose information during training."
-    doc_time_training = "Timing durining training."
+    doc_time_training = "Timing during training."
     doc_profiling = "Export the profiling results to the Chrome JSON file for performance analysis, driven by the legacy TensorFlow profiling API or PyTorch Profiler. The output file will be saved to `profiling_file`."
     doc_profiling_file = "Output file for profiling."
     doc_enable_profiler = "Export the profiling results to the TensorBoard log for performance analysis, driven by TensorFlow Profiler (available in TensorFlow 2.3) or PyTorch Profiler. The log will be saved to `tensorboard_log_dir`."
diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index 0394993854..259fe93bdb 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -160,7 +160,7 @@ def execute_all(
         Parameters
         ----------
         callable : Callable
-            The method should accept *args and **kwargs as input and return the similiar array.
+            The method should accept *args and **kwargs as input and return the similar array.
         total_size : int
             Total size
         natoms : int
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 72e3d58660..7d58d65578 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -24,7 +24,7 @@
 class DeepmdData:
     """Class for a data system.
 
-    It loads data from hard disk, and mantains the data as a `data_dict`
+    It loads data from hard disk, and maintains the data as a `data_dict`
 
     Parameters
     ----------
@@ -43,7 +43,7 @@ class DeepmdData:
     trn_all_set
             [DEPRECATED] Deprecated. Now all sets are trained and tested.
     sort_atoms : bool
-            Sort atoms by atom types. Required to enable when the data is directly feeded to
+            Sort atoms by atom types. Required to enable when the data is directly fed to
             descriptors except mixed types.
     """
 
@@ -196,7 +196,7 @@ def reduce(self, key_out: str, key_in: str):
         assert key_out not in self.data_dict, "output key should not have been added"
         assert (
             self.data_dict[key_in]["repeat"] == 1
-        ), "reduced proerties should not have been repeated"
+        ), "reduced properties should not have been repeated"
 
         self.data_dict[key_out] = {
             "ndof": self.data_dict[key_in]["ndof"],
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 2b5fb6e6db..03a399106f 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -91,12 +91,12 @@ def __init__(
             - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()
             - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system
             - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." :
-                                the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`,
+                                the list of systems is divided into blocks. A block is specified by `stt_idx:end_idx:weight`,
                                 where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system,
                                 the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional
                 to the number of batches in the system.
         sort_atoms : bool
-            Sort atoms by atom types. Required to enable when the data is directly feeded to
+            Sort atoms by atom types. Required to enable when the data is directly fed to
             descriptors except mixed types.
         """
         # init data
@@ -184,7 +184,7 @@ def __init__(
         # ! altered by Marián Rynik
         # test size
         # now test size can be set as a percentage of systems data or test size
-        # can be set for each system individualy in the same manner as batch
+        # can be set for each system individually in the same manner as batch
         # size. This enables one to use systems with diverse number of
         # structures and different number of atoms.
         self.test_size = test_size
@@ -277,7 +277,7 @@ def add_dict(self, adict: dict[str, dict[str, Any]]) -> None:
                "repeat": repeat,
            }
 
-        For the explaination of the keys see `add`
+        For the explanation of the keys see `add`
         """
         for kk in adict:
             self.add(
@@ -759,7 +759,7 @@ def process_systems(systems: Union[str, list[str]]) -> list[str]:
         msg = "cannot find valid a data system"
         log.fatal(msg)
         raise OSError(msg, help_msg)
-    # rougly check all items in systems are valid
+    # roughly check all items in systems are valid
     for ii in systems:
         ii = DPPath(ii)
         if not ii.is_dir():
diff --git a/deepmd/utils/econf_embd.py b/deepmd/utils/econf_embd.py
index 99c7edf284..e33e07cee7 100644
--- a/deepmd/utils/econf_embd.py
+++ b/deepmd/utils/econf_embd.py
@@ -237,7 +237,7 @@ def make_econf_embedding(
 
 
 def transform_to_spin_rep(res: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
-    """Tranform electron occupation of 0/1/2 to -1,-1/-1,1/1,1."""
+    """Transform electron occupation of 0/1/2 to -1,-1/-1,1/1,1."""
     ret = {}
 
     def transform(ii):
diff --git a/deepmd/utils/out_stat.py b/deepmd/utils/out_stat.py
index bc765645dc..4d0d788f8b 100644
--- a/deepmd/utils/out_stat.py
+++ b/deepmd/utils/out_stat.py
@@ -21,7 +21,7 @@ def compute_stats_from_redu(
     """Compute the output statistics.
 
     Given the reduced output value and the number of atoms for each atom,
-    compute the least-squares solution as the atomic output bais and std.
+    compute the least-squares solution as the atomic output bias and std.
 
     Parameters
     ----------
@@ -93,7 +93,7 @@ def compute_stats_from_atomic(
     """Compute the output statistics.
 
     Given the output value and the type of atoms,
-    compute the atomic output bais and std.
+    compute the atomic output bias and std.
 
     Parameters
     ----------
diff --git a/deepmd/utils/summary.py b/deepmd/utils/summary.py
index e2118bf7e0..a35dd4db93 100644
--- a/deepmd/utils/summary.py
+++ b/deepmd/utils/summary.py
@@ -48,7 +48,7 @@ class SummaryPrinter(ABC):
     BUILD: ClassVar = {
         "installed to": "\n".join(deepmd.__path__),
         "source": GLOBAL_CONFIG["git_summ"],
-        "source brach": GLOBAL_CONFIG["git_branch"],
+        "source branch": GLOBAL_CONFIG["git_branch"],
         "source commit": GLOBAL_CONFIG["git_hash"],
         "source commit at": GLOBAL_CONFIG["git_date"],
         "use float prec": global_float_prec,
diff --git a/deepmd/utils/weight_avg.py b/deepmd/utils/weight_avg.py
index 7c75d18e68..8328be5fcf 100644
--- a/deepmd/utils/weight_avg.py
+++ b/deepmd/utils/weight_avg.py
@@ -7,7 +7,7 @@
 
 
 def weighted_average(errors: list[dict[str, tuple[float, float]]]) -> dict:
-    """Compute wighted average of prediction errors (MAE or RMSE) for model.
+    """Compute weighted average of prediction errors (MAE or RMSE) for model.
 
     Parameters
     ----------
diff --git a/doc/README b/doc/README
index 2f4ce66792..728481df15 100644
--- a/doc/README
+++ b/doc/README
@@ -1 +1 @@
-To run the HTML documention build, doxygen have to be installed.
+To run the HTML documentation build, doxygen have to be installed.
diff --git a/doc/development/coding-conventions.rst b/doc/development/coding-conventions.rst
index bf186d1231..4f82b34a60 100644
--- a/doc/development/coding-conventions.rst
+++ b/doc/development/coding-conventions.rst
@@ -72,7 +72,7 @@ Conventions`_ and `Typing Conventions`_ PEPs, clarified and extended as follows:
 
      f"something {'this' if x else 'that'}"
 
-* Use f-strings ``s = f"{x:.2f}"`` instead of old style formating with ``"%f" % x``.
+* Use f-strings ``s = f"{x:.2f}"`` instead of old style formatting with ``"%f" % x``.
   string format method ``"{x:.2f}".format()`` may be used sparsely where it is more
   convenient than f-strings.
 
diff --git a/doc/development/create-a-model-pt.md b/doc/development/create-a-model-pt.md
index 257dd8a25d..875067e2b8 100644
--- a/doc/development/create-a-model-pt.md
+++ b/doc/development/create-a-model-pt.md
@@ -6,7 +6,7 @@
 In the following context, we use the PyTorch backend as the example, while it also applies to other backends listed above.
 :::
 
-If you'd like to create a new model that isn't covered by the existing DeePMD-kit library, but reuse DeePMD-kit's other efficient modules such as data processing, trainner, etc, you may want to read this section.
+If you'd like to create a new model that isn't covered by the existing DeePMD-kit library, but reuse DeePMD-kit's other efficient modules such as data processing, trainer, etc, you may want to read this section.
 
 To incorporate your custom model you'll need to:
 
diff --git a/doc/development/create-a-model-tf.md b/doc/development/create-a-model-tf.md
index 95a2f66f23..cc7ad1999d 100644
--- a/doc/development/create-a-model-tf.md
+++ b/doc/development/create-a-model-tf.md
@@ -1,6 +1,6 @@
 # Create a model in TensorFlow {{ tensorflow_icon }}
 
-If you'd like to create a new model that isn't covered by the existing DeePMD-kit library, but reuse DeePMD-kit's other efficient modules such as data processing, trainner, etc, you may want to read this section.
+If you'd like to create a new model that isn't covered by the existing DeePMD-kit library, but reuse DeePMD-kit's other efficient modules such as data processing, trainer, etc, you may want to read this section.
 
 To incorporate your custom model you'll need to:
 
diff --git a/doc/getting-started/quick_start.ipynb b/doc/getting-started/quick_start.ipynb
index 0c9563b9e9..1ddb6f5fce 100644
--- a/doc/getting-started/quick_start.ipynb
+++ b/doc/getting-started/quick_start.ipynb
@@ -454,7 +454,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Checke dargs version and Install\n",
+    "# Check dargs version and Install\n",
     "!pip show dargs || pip install --upgrade dargs"
    ]
   },
@@ -523,7 +523,7 @@
        "  color: #bbbbff;\n",
        "}\n",
        "</style>\n",
-       "<div class=\"dargs-codeblock\"><code class=\"dargs-code dargs-linebegin\"></code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"model\"</code><span class=\"dargs-doc\">model: <br/>    type: <span class=\"dargs-doc-code\">dict</span></span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type_map\"</code><span class=\"dargs-doc\">type_map: <br/>    type: <span class=\"dargs-doc-code\">typing.list[str]</span>, optional<hr/>A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"H\",<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"C\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"descriptor\"</code><span class=\"dargs-doc\">descriptor: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The descriptor of atomic environment.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span><hr/>The type of the descritpor. See explanation below. <br/>- <span class=\"dargs-doc-code\">loc_frame</span>: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.<br/>- <span class=\"dargs-doc-code\">se_e2_a</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.<br/>- <span class=\"dargs-doc-code\">se_e2_r</span>: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.<br/>- <span class=\"dargs-doc-code\">se_e3</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_a_tpe</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_atten</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_atten_v2</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_a_mask</span>: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). <i>aparam</i> are required as an indicator matrix for the real/virtual sign of input atoms. <br/>- <span class=\"dargs-doc-code\">hybrid</span>: Concatenate of a list of descriptors as a new descriptor.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"se_e2_a\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"sel\"</code><span class=\"dargs-doc\">sel: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[int]</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This parameter set the number of selected neighbors for each type of atom. It can be:<br/>    - <span class=\"dargs-doc-code\">list[int]</span>. The length of the list should be the same as the number of atom types in the system. <span class=\"dargs-doc-code\">sel[i]</span> gives the selected number of type-i neighbors. <span class=\"dargs-doc-code\">sel[i]</span> is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.<br/>    - <span class=\"dargs-doc-code\">str</span>. Can be \"auto:factor\" or \"auto\". \"factor\" is a float number larger than 1. This option will automatically determine the <span class=\"dargs-doc-code\">sel</span>. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the \"factor\". Finally the number is wraped up to 4 divisible. The option \"auto\" is equivalent to \"auto:1.1\".</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"rcut_smth\"</code><span class=\"dargs-doc\">rcut_smth: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">0.5</span><hr/>Where to start smoothing. For example the 1/r term is smoothed from <span class=\"dargs-doc-code\">rcut</span> to <span class=\"dargs-doc-code\">rcut_smth</span></span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.5,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"rcut\"</code><span class=\"dargs-doc\">rcut: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">6.0</span><hr/>The cut-off radius.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">6.0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"neuron\"</code><span class=\"dargs-doc\">neuron: <br/>    type: <span class=\"dargs-doc-code\">typing.list[int]</span>, optional, default: <span class=\"dargs-doc-code\">[10, 20, 40]</span><hr/>Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  25,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  50,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  100<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"resnet_dt\"</code><span class=\"dargs-doc\">resnet_dt: <br/>    type: <span class=\"dargs-doc-code\">bool</span>, optional, default: <span class=\"dargs-doc-code\">False</span><hr/>Whether to use a \"Timestep\" in the skip connection</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">false,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"axis_neuron\"</code><span class=\"dargs-doc\">axis_neuron: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">4</span>, alias: <i>n_axis_neuron</i><hr/>Size of the submatrix of G (embedding matrix).</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">16,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>Random seed for parameter initialization</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"fitting_net\"</code><span class=\"dargs-doc\">fitting_net: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The fitting of physical properties.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"neuron\"</code><span class=\"dargs-doc\">neuron: <br/>    type: <span class=\"dargs-doc-code\">typing.list[int]</span>, optional, default: <span class=\"dargs-doc-code\">[120, 120, 120]</span>, alias: <i>n_neuron</i><hr/>The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"resnet_dt\"</code><span class=\"dargs-doc\">resnet_dt: <br/>    type: <span class=\"dargs-doc-code\">bool</span>, optional, default: <span class=\"dargs-doc-code\">True</span><hr/>Whether to use a \"Timestep\" in the skip connection</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">true,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>Random seed for parameter initialization of the fitting net</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"learning_rate\"</code><span class=\"dargs-doc\">learning_rate: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>The definitio of learning rate</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span>, default: <span class=\"dargs-doc-code\">exp</span><hr/>The type of the learning rate.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"exp\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"decay_steps\"</code><span class=\"dargs-doc\">decay_steps: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">5000</span><hr/>The learning rate is decaying every this number of training steps.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">50,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_lr\"</code><span class=\"dargs-doc\">start_lr: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">0.001</span><hr/>The learning rate at the start of the training.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.001,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"stop_lr\"</code><span class=\"dargs-doc\">stop_lr: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">1e-08</span><hr/>The desired learning rate at the end of the training.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">3.51e-08,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"loss\"</code><span class=\"dargs-doc\">loss: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>The definition of loss function. The loss type should be set to <span class=\"dargs-doc-code\">tensor</span>, <span class=\"dargs-doc-code\">ener</span> or left unset.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span>, default: <span class=\"dargs-doc-code\">ener</span><hr/>The type of the loss. When the fitting type is <span class=\"dargs-doc-code\">ener</span>, the loss type should be set to <span class=\"dargs-doc-code\">ener</span> or left unset. When the fitting type is <span class=\"dargs-doc-code\">dipole</span> or <span class=\"dargs-doc-code\">polar</span>, the loss type should be set to <span class=\"dargs-doc-code\">tensor</span>.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"ener\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_e\"</code><span class=\"dargs-doc\">start_pref_e: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.02</span><hr/>The prefactor of energy loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the energy label should be provided by file energy.npy in each data system. If both start_pref_e and limit_pref_e are set to 0, then the energy will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.02,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_e\"</code><span class=\"dargs-doc\">limit_pref_e: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1.0</span><hr/>The prefactor of energy loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_f\"</code><span class=\"dargs-doc\">start_pref_f: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The prefactor of force loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the force label should be provided by file force.npy in each data system. If both start_pref_f and limit_pref_f are set to 0, then the force will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_f\"</code><span class=\"dargs-doc\">limit_pref_f: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1.0</span><hr/>The prefactor of force loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_v\"</code><span class=\"dargs-doc\">start_pref_v: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.0</span><hr/>The prefactor of virial loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the virial label should be provided by file virial.npy in each data system. If both start_pref_v and limit_pref_v are set to 0, then the virial will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_v\"</code><span class=\"dargs-doc\">limit_pref_v: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.0</span><hr/>The prefactor of virial loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"training\"</code><span class=\"dargs-doc\">training: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The training options.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"training_data\"</code><span class=\"dargs-doc\">training_data: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>Configurations of training data.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"systems\"</code><span class=\"dargs-doc\">systems: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[str]</span><hr/>The data systems for training. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"../00.data/training_data\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"batch_size\"</code><span class=\"dargs-doc\">batch_size: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[int]</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This key can be <br/>- list: the length of which is the same as the <span class=\"dargs-doc-code\">systems <training/training_data/systems_></span>_. The batch size of each system is given by the elements of the list.<br/>- int: all <span class=\"dargs-doc-code\">systems <training/training_data/systems_></span>_ use the same batch size.<br/>- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.<br/>- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.<br/>- string \"mixed:N\": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.<br/>If MPI is used, the value should be considered as the batch size per task.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"validation_data\"</code><span class=\"dargs-doc\">validation_data: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">dict</span>, optional, default: <span class=\"dargs-doc-code\">None</span><hr/>Configurations of validation data. Similar to that of training data, except that a <span class=\"dargs-doc-code\">numb_btch</span> argument may be configured</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"systems\"</code><span class=\"dargs-doc\">systems: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[str]</span><hr/>The data systems for validation. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"../00.data/validation_data\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"batch_size\"</code><span class=\"dargs-doc\">batch_size: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[int]</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This key can be <br/>- list: the length of which is the same as the <span class=\"dargs-doc-code\">systems <training/validation_data/systems_></span>_. The batch size of each system is given by the elements of the list.<br/>- int: all <span class=\"dargs-doc-code\">systems <training/validation_data/systems_></span>_ use the same batch size.<br/>- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.<br/>- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"numb_btch\"</code><span class=\"dargs-doc\">numb_btch: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1</span>, alias: <i>numb_batch</i><hr/>An integer that specifies the number of batches to be sampled for each validation period.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"numb_steps\"</code><span class=\"dargs-doc\">numb_steps: <br/>    type: <span class=\"dargs-doc-code\">int</span>, alias: <i>stop_batch</i><hr/>Number of training batch. Each training uses one batch of data.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">10000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>The random seed for getting frames from the training data set.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">10,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"disp_file\"</code><span class=\"dargs-doc\">disp_file: <br/>    type: <span class=\"dargs-doc-code\">str</span>, optional, default: <span class=\"dargs-doc-code\">lcurve.out</span><hr/>The file for printing learning curve.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"lcurve.out\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"disp_freq\"</code><span class=\"dargs-doc\">disp_freq: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The frequency of printing learning curve.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">200,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"save_freq\"</code><span class=\"dargs-doc\">save_freq: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The frequency of saving check point.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">}</code><br/><code class=\"dargs-code dargs-linebegin\"></code><code class=\"dargs-code\">}</code><br/></div>"
+       "<div class=\"dargs-codeblock\"><code class=\"dargs-code dargs-linebegin\"></code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"model\"</code><span class=\"dargs-doc\">model: <br/>    type: <span class=\"dargs-doc-code\">dict</span></span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type_map\"</code><span class=\"dargs-doc\">type_map: <br/>    type: <span class=\"dargs-doc-code\">typing.list[str]</span>, optional<hr/>A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"H\",<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"C\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"descriptor\"</code><span class=\"dargs-doc\">descriptor: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The descriptor of atomic environment.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span><hr/>The type of the descriptor. See explanation below. <br/>- <span class=\"dargs-doc-code\">loc_frame</span>: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.<br/>- <span class=\"dargs-doc-code\">se_e2_a</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.<br/>- <span class=\"dargs-doc-code\">se_e2_r</span>: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.<br/>- <span class=\"dargs-doc-code\">se_e3</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_a_tpe</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_atten</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_atten_v2</span>: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.<br/>- <span class=\"dargs-doc-code\">se_a_mask</span>: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). <i>aparam</i> are required as an indicator matrix for the real/virtual sign of input atoms. <br/>- <span class=\"dargs-doc-code\">hybrid</span>: Concatenate of a list of descriptors as a new descriptor.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"se_e2_a\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"sel\"</code><span class=\"dargs-doc\">sel: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[int]</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This parameter set the number of selected neighbors for each type of atom. It can be:<br/>    - <span class=\"dargs-doc-code\">list[int]</span>. The length of the list should be the same as the number of atom types in the system. <span class=\"dargs-doc-code\">sel[i]</span> gives the selected number of type-i neighbors. <span class=\"dargs-doc-code\">sel[i]</span> is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.<br/>    - <span class=\"dargs-doc-code\">str</span>. Can be \"auto:factor\" or \"auto\". \"factor\" is a float number larger than 1. This option will automatically determine the <span class=\"dargs-doc-code\">sel</span>. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the \"factor\". Finally the number is wraped up to 4 divisible. The option \"auto\" is equivalent to \"auto:1.1\".</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"rcut_smth\"</code><span class=\"dargs-doc\">rcut_smth: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">0.5</span><hr/>Where to start smoothing. For example the 1/r term is smoothed from <span class=\"dargs-doc-code\">rcut</span> to <span class=\"dargs-doc-code\">rcut_smth</span></span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.5,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"rcut\"</code><span class=\"dargs-doc\">rcut: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">6.0</span><hr/>The cut-off radius.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">6.0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"neuron\"</code><span class=\"dargs-doc\">neuron: <br/>    type: <span class=\"dargs-doc-code\">typing.list[int]</span>, optional, default: <span class=\"dargs-doc-code\">[10, 20, 40]</span><hr/>Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  25,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  50,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  100<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"resnet_dt\"</code><span class=\"dargs-doc\">resnet_dt: <br/>    type: <span class=\"dargs-doc-code\">bool</span>, optional, default: <span class=\"dargs-doc-code\">False</span><hr/>Whether to use a \"Timestep\" in the skip connection</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">false,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"axis_neuron\"</code><span class=\"dargs-doc\">axis_neuron: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">4</span>, alias: <i>n_axis_neuron</i><hr/>Size of the submatrix of G (embedding matrix).</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">16,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>Random seed for parameter initialization</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"fitting_net\"</code><span class=\"dargs-doc\">fitting_net: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The fitting of physical properties.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"neuron\"</code><span class=\"dargs-doc\">neuron: <br/>    type: <span class=\"dargs-doc-code\">typing.list[int]</span>, optional, default: <span class=\"dargs-doc-code\">[120, 120, 120]</span>, alias: <i>n_neuron</i><hr/>The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240,<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  240<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"resnet_dt\"</code><span class=\"dargs-doc\">resnet_dt: <br/>    type: <span class=\"dargs-doc-code\">bool</span>, optional, default: <span class=\"dargs-doc-code\">True</span><hr/>Whether to use a \"Timestep\" in the skip connection</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">true,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>Random seed for parameter initialization of the fitting net</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"learning_rate\"</code><span class=\"dargs-doc\">learning_rate: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>The definition of learning rate</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span>, default: <span class=\"dargs-doc-code\">exp</span><hr/>The type of the learning rate.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"exp\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"decay_steps\"</code><span class=\"dargs-doc\">decay_steps: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">5000</span><hr/>The learning rate is decaying every this number of training steps.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">50,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_lr\"</code><span class=\"dargs-doc\">start_lr: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">0.001</span><hr/>The learning rate at the start of the training.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.001,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"stop_lr\"</code><span class=\"dargs-doc\">stop_lr: <br/>    type: <span class=\"dargs-doc-code\">float</span>, optional, default: <span class=\"dargs-doc-code\">1e-08</span><hr/>The desired learning rate at the end of the training.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">3.51e-08,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"loss\"</code><span class=\"dargs-doc\">loss: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>The definition of loss function. The loss type should be set to <span class=\"dargs-doc-code\">tensor</span>, <span class=\"dargs-doc-code\">ener</span> or left unset.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"type\"</code><span class=\"dargs-doc\">type:<br/>type: <span class=\"dargs-doc-code\">str</span>, default: <span class=\"dargs-doc-code\">ener</span><hr/>The type of the loss. When the fitting type is <span class=\"dargs-doc-code\">ener</span>, the loss type should be set to <span class=\"dargs-doc-code\">ener</span> or left unset. When the fitting type is <span class=\"dargs-doc-code\">dipole</span> or <span class=\"dargs-doc-code\">polar</span>, the loss type should be set to <span class=\"dargs-doc-code\">tensor</span>.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"ener\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_e\"</code><span class=\"dargs-doc\">start_pref_e: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.02</span><hr/>The prefactor of energy loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the energy label should be provided by file energy.npy in each data system. If both start_pref_e and limit_pref_e are set to 0, then the energy will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0.02,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_e\"</code><span class=\"dargs-doc\">limit_pref_e: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1.0</span><hr/>The prefactor of energy loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_f\"</code><span class=\"dargs-doc\">start_pref_f: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The prefactor of force loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the force label should be provided by file force.npy in each data system. If both start_pref_f and limit_pref_f are set to 0, then the force will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_f\"</code><span class=\"dargs-doc\">limit_pref_f: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1.0</span><hr/>The prefactor of force loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"start_pref_v\"</code><span class=\"dargs-doc\">start_pref_v: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.0</span><hr/>The prefactor of virial loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the virial label should be provided by file virial.npy in each data system. If both start_pref_v and limit_pref_v are set to 0, then the virial will be ignored.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"limit_pref_v\"</code><span class=\"dargs-doc\">limit_pref_v: <br/>    type: <span class=\"dargs-doc-code\">float</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">0.0</span><hr/>The prefactor of virial loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">0,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\" that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"training\"</code><span class=\"dargs-doc\">training: <br/>    type: <span class=\"dargs-doc-code\">dict</span><hr/>The training options.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"training_data\"</code><span class=\"dargs-doc\">training_data: <br/>    type: <span class=\"dargs-doc-code\">dict</span>, optional<hr/>Configurations of training data.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"systems\"</code><span class=\"dargs-doc\">systems: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[str]</span><hr/>The data systems for training. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"../00.data/training_data\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"batch_size\"</code><span class=\"dargs-doc\">batch_size: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[int]</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This key can be <br/>- list: the length of which is the same as the <span class=\"dargs-doc-code\">systems <training/training_data/systems_></span>_. The batch size of each system is given by the elements of the list.<br/>- int: all <span class=\"dargs-doc-code\">systems <training/training_data/systems_></span>_ use the same batch size.<br/>- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.<br/>- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.<br/>- string \"mixed:N\": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.<br/>If MPI is used, the value should be considered as the batch size per task.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"validation_data\"</code><span class=\"dargs-doc\">validation_data: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">dict</span>, optional, default: <span class=\"dargs-doc-code\">None</span><hr/>Configurations of validation data. Similar to that of training data, except that a <span class=\"dargs-doc-code\">numb_btch</span> argument may be configured</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">{</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"systems\"</code><span class=\"dargs-doc\">systems: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[str]</span><hr/>The data systems for validation. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">[<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>  \"../00.data/validation_data\"<br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code>],</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"batch_size\"</code><span class=\"dargs-doc\">batch_size: <br/>    type: <span class=\"dargs-doc-code\">str</span> | <span class=\"dargs-doc-code\">typing.list[int]</span> | <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">auto</span><hr/>This key can be <br/>- list: the length of which is the same as the <span class=\"dargs-doc-code\">systems <training/validation_data/systems_></span>_. The batch size of each system is given by the elements of the list.<br/>- int: all <span class=\"dargs-doc-code\">systems <training/validation_data/systems_></span>_ use the same batch size.<br/>- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.<br/>- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"auto\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"numb_btch\"</code><span class=\"dargs-doc\">numb_btch: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1</span>, alias: <i>numb_batch</i><hr/>An integer that specifies the number of batches to be sampled for each validation period.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"dargs-code\">},</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"numb_steps\"</code><span class=\"dargs-doc\">numb_steps: <br/>    type: <span class=\"dargs-doc-code\">int</span>, alias: <i>stop_batch</i><hr/>Number of training batch. Each training uses one batch of data.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">10000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"seed\"</code><span class=\"dargs-doc\">seed: <br/>    type: <span class=\"dargs-doc-code\">NoneType</span> | <span class=\"dargs-doc-code\">int</span>, optional<hr/>The random seed for getting frames from the training data set.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">10,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"disp_file\"</code><span class=\"dargs-doc\">disp_file: <br/>    type: <span class=\"dargs-doc-code\">str</span>, optional, default: <span class=\"dargs-doc-code\">lcurve.out</span><hr/>The file for printing learning curve.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"lcurve.out\",</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"disp_freq\"</code><span class=\"dargs-doc\">disp_freq: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The frequency of printing learning curve.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">200,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span class=\"dargs-key\"><code class=\"dargs-code\">\"save_freq\"</code><span class=\"dargs-doc\">save_freq: <br/>    type: <span class=\"dargs-doc-code\">int</span>, optional, default: <span class=\"dargs-doc-code\">1000</span><hr/>The frequency of saving check point.</span></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">1000,</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;&nbsp;&nbsp;</code><span><code class=\"dargs-code\">\"_comment\"</code></span><code class=\"dargs-code\">: </code><code class=\"dargs-code\">\"that's all\"</code><br/><code class=\"dargs-code dargs-linebegin\">&nbsp;&nbsp;</code><code class=\"dargs-code\">}</code><br/><code class=\"dargs-code dargs-linebegin\"></code><code class=\"dargs-code\">}</code><br/></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -682,7 +682,7 @@
       "DEEPMD INFO    See https://deepmd.rtfd.io/credits/ for details.\n",
       "DEEPMD INFO    installed to:         /root/miniconda3/envs/deepmd\n",
       "DEEPMD INFO    source :              v2.2.7\n",
-      "DEEPMD INFO    source brach:         HEAD\n",
+      "DEEPMD INFO    source branch:         HEAD\n",
       "DEEPMD INFO    source commit:        839f4fe7\n",
       "DEEPMD INFO    source commit at:     2023-10-27 21:10:24 +0800\n",
       "DEEPMD INFO    build float prec:     double\n",
@@ -1050,7 +1050,7 @@
       "DEEPMD INFO    See https://deepmd.rtfd.io/credits/ for details.\n",
       "DEEPMD INFO    installed to:         /root/miniconda3/envs/deepmd\n",
       "DEEPMD INFO    source :              v2.2.7\n",
-      "DEEPMD INFO    source brach:         HEAD\n",
+      "DEEPMD INFO    source branch:         HEAD\n",
       "DEEPMD INFO    source commit:        839f4fe7\n",
       "DEEPMD INFO    source commit at:     2023-10-27 21:10:24 +0800\n",
       "DEEPMD INFO    build float prec:     double\n",
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
index 3f65375865..07239cd3b7 100644
--- a/doc/install/install-from-source.md
+++ b/doc/install/install-from-source.md
@@ -319,7 +319,7 @@ pip install -U cmake
 
 You must enable at least one backend.
 If you enable two or more backends, these backend libraries must be built in a compatible way, e.g. using the same `_GLIBCXX_USE_CXX11_ABI` flag.
-We recommend using [conda pacakges](https://docs.deepmodeling.org/faq/conda.html) from [conda-forge](https://conda-forge.org), which are usually compatible to each other.
+We recommend using [conda packages](https://docs.deepmodeling.org/faq/conda.html) from [conda-forge](https://conda-forge.org), which are usually compatible to each other.
 
 ::::{tab-set}
 
@@ -427,7 +427,7 @@ See also [ROCm documentation](https://rocm.docs.amd.com/en/latest/conceptual/cma
 
 **Type**: `PATH`
 
-Only neccessary for using [LAMMPS plugin mode](./install-lammps.md#install-lammps-plugin-mode).
+Only necessary for using [LAMMPS plugin mode](./install-lammps.md#install-lammps-plugin-mode).
 The path to the [LAMMPS source code](install-lammps.md).
 LAMMPS 8Apr2021 or later is supported.
 If not assigned, the plugin mode will not be enabled.
diff --git a/doc/install/install-tf.2.12.md b/doc/install/install-tf.2.12.md
index 8523345d3d..ab6a9ed00a 100644
--- a/doc/install/install-tf.2.12.md
+++ b/doc/install/install-tf.2.12.md
@@ -2,7 +2,7 @@
 
 TensorFlow's C++ interface will be compiled from the source code. In this manual, we install TensorFlow 2.12.0. It is noted that the source code of TensorFlow 2.12.0 uses C++ 17, so one needs a C++ compiler that supports C++ 17.
 
-Firstly one installs Bazel. [bazelisk](https://github.com/bazelbuild/bazelisk) can be lanuched to use [bazel](https://github.com/bazelbuild/bazel).
+Firstly one installs Bazel. [bazelisk](https://github.com/bazelbuild/bazelisk) can be launched to use [bazel](https://github.com/bazelbuild/bazel).
 
 ```bash
 wget https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64 -O /some/workspace/bazel/bin/bazel
diff --git a/doc/install/install-tf.2.8.md b/doc/install/install-tf.2.8.md
index 4145ba01d1..5e9057492b 100644
--- a/doc/install/install-tf.2.8.md
+++ b/doc/install/install-tf.2.8.md
@@ -1,6 +1,6 @@
 # Install TensorFlow's C++ interface
 
-TensorFlow's C++ interface will be compiled from the source code. Firstly one installs Bazel. [bazelisk](https://github.com/bazelbuild/bazelisk) can be lanuched to use [bazel](https://github.com/bazelbuild/bazel).
+TensorFlow's C++ interface will be compiled from the source code. Firstly one installs Bazel. [bazelisk](https://github.com/bazelbuild/bazelisk) can be launched to use [bazel](https://github.com/bazelbuild/bazel).
 
 ```bash
 wget https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64 -O /some/workspace/bazel/bin/bazel
diff --git a/doc/model/dprc.md b/doc/model/dprc.md
index d9ce24b600..9f3eee244d 100644
--- a/doc/model/dprc.md
+++ b/doc/model/dprc.md
@@ -66,7 +66,7 @@ In a DPRc model, QM atoms and MM atoms have different atom types. Assuming we ha
 "type_map": ["C", "H", "HW", "O", "OW", "P"]
 ```
 
-As described in the paper, the DPRc model only corrects $E_\text{QM}$ and $E_\text{QM/MM}$ within the cutoff, so we use a hybrid descriptor to describe them separatedly:
+As described in the paper, the DPRc model only corrects $E_\text{QM}$ and $E_\text{QM/MM}$ within the cutoff, so we use a hybrid descriptor to describe them separately:
 
 ::::{tab-set}
 
diff --git a/doc/model/train-energy-spin.md b/doc/model/train-energy-spin.md
index ec169892f2..eda4ffa835 100644
--- a/doc/model/train-energy-spin.md
+++ b/doc/model/train-energy-spin.md
@@ -145,7 +145,7 @@ We list the details about spin system data format in TensorFlow backend:
 
 ### Spin data format in PyTorch/DP
 
-In the PyTorch backend, spin and magnetic forces are listed in seperate files, and the data format may contain the following files:
+In the PyTorch backend, spin and magnetic forces are listed in separate files, and the data format may contain the following files:
 
 ```
 type.raw
diff --git a/doc/model/train-se-a-mask.md b/doc/model/train-se-a-mask.md
index 69f344b138..93edfc999e 100644
--- a/doc/model/train-se-a-mask.md
+++ b/doc/model/train-se-a-mask.md
@@ -64,7 +64,7 @@ To make the `aparam.npy` used for descriptor `se_a_mask`, two variables in `fitt
 ```
 
 - `neuron`, `resnet_dt` and `seed` are the same as the {ref}`fitting_net <model[standard]/fitting_net[ener]>` section for fitting energy.
-- {ref}`numb_aparam <model[standard]/fitting_net[ener]/numb_aparam>` gives the dimesion of the `aparam.npy` file. In this example, it is set to 1 and stores the real/virtual sign of the atoms. For real/virtual atoms, the corresponding sign in `aparam.npy` is set to 1/0.
+- {ref}`numb_aparam <model[standard]/fitting_net[ener]/numb_aparam>` gives the dimension of the `aparam.npy` file. In this example, it is set to 1 and stores the real/virtual sign of the atoms. For real/virtual atoms, the corresponding sign in `aparam.npy` is set to 1/0.
 - {ref}`use_aparam_as_mask <model[standard]/fitting_net[ener]/use_aparam_as_mask>` is set to `true` to use the `aparam.npy` as the mask of the atoms in the descriptor `se_a_mask`.
 
 Finally, to make a reasonable fitting task with `se_a_mask` descriptor for DP/MM simulations, the loss function with `se_a_mask` is designed to include the atomic forces difference in specific atoms of the input particles only.
diff --git a/doc/nvnmd/nvnmd.md b/doc/nvnmd/nvnmd.md
index c415b275ec..279236ec96 100644
--- a/doc/nvnmd/nvnmd.md
+++ b/doc/nvnmd/nvnmd.md
@@ -78,7 +78,7 @@ where items are defined as:
 | --------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
 | version   | the version of network structure                                      | 0 or 1                                                                       |
 | max_nnei  | the maximum number of neighbors that do not distinguish element types | 128 or 256                                                                   |
-| net_size  | the size of nueral network                                            | 128                                                                          |
+| net_size  | the size of neural network                                            | 128                                                                          |
 | sel       | the number of neighbors                                               | version 0: integer list of lengths 1 to 4 are acceptable; version 1: integer |
 | rcut      | the cutoff radial                                                     | (0, 8.0]                                                                     |
 | rcut_smth | the smooth cutoff parameter                                           | (0, 8.0]                                                                     |
@@ -162,7 +162,7 @@ where items are defined as:
 
 | Item       | Mean                                                | Optional Value     |
 | ---------- | --------------------------------------------------- | ------------------ |
-| seed       | the randome seed                                    | a integer          |
+| seed       | the random seed                                     | a integer          |
 | stop_batch | the total training steps                            | a positive integer |
 | numb_test  | the accuracy is test by using {numb_test} sample    | a positive integer |
 | disp_file  | the log file where the training message display     | a string           |
@@ -213,7 +213,7 @@ where the frozen model file to import is given via the `-m` command line flag, t
 
 # Running MD in Bohrium
 
-After CNN and QNN training, you can upload the ML model to our online NVNMD system and run MD there through Bohrium (https://bohrium.dp.tech). Bohrium is a research platfrom designed for AI for Science Era. For more information, please refer to [Bohrium Introduction](https://bohrium-doc.dp.tech/en/docs/WhatIsBohrium/).
+After CNN and QNN training, you can upload the ML model to our online NVNMD system and run MD there through Bohrium (https://bohrium.dp.tech). Bohrium is a research platform designed for AI for Science Era. For more information, please refer to [Bohrium Introduction](https://bohrium-doc.dp.tech/en/docs/WhatIsBohrium/).
 
 ## Registration
 
diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md
index 6a16605bfc..4af3fe5096 100644
--- a/doc/third-party/lammps-command.md
+++ b/doc/third-party/lammps-command.md
@@ -15,7 +15,7 @@ All units in LAMMPS except `lj` are supported. `lj` is not supported.
 
 The most commonly used units are `metal`, since the internal units of distance, energy, force, and charge in DeePMD-kit are `\AA`, `eV`, `eV / \AA`, and `proton charge`, respectively. These units are consistent with the `metal` units in LAMMPS.
 
-If one wants to use other units like `real` or `si`, it is welcome to do so. There is no need to do the unit conversion mannualy. The unit conversion is done automatically by LAMMPS.
+If one wants to use other units like `real` or `si`, it is welcome to do so. There is no need to do the unit conversion manually. The unit conversion is done automatically by LAMMPS.
 
 The only thing that one needs to take care is the unit of the output of `compute deeptensor/atom`. Working with `metal` units for `compute deeptensor/atom` is totally fine, since there is no unit conversion. For other unit styles, we currently assume that the output of the `compute deeptensor/atom` command has the unit of distance and have applied the unit conversion factor of distance. If a user wants to infer quantities with units other than distance, the user is encouraged to open a GitHub feature request, so that the unit conversion factor can be added.
 
@@ -95,7 +95,7 @@ Evaluate the interaction of the system by using [Deep Potential][DP] or [Deep Po
 
 This pair style takes the deep potential defined in a model file that usually has the .pb extension. The model can be trained and frozen by package [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit), which can have either double or single float precision interface.
 
-The model deviation evalulates the consistency of the force predictions from multiple models. By default, only the maximal, minimal and average model deviations are output. If the key `atomic` is set, then the model deviation of force prediction of each atom will be output.
+The model deviation evaluates the consistency of the force predictions from multiple models. By default, only the maximal, minimal and average model deviations are output. If the key `atomic` is set, then the model deviation of force prediction of each atom will be output.
 The unit follows [LAMMPS units](#units) and the [scale factor](https://docs.lammps.org/pair_hybrid.html) is not applied.
 
 By default, the model deviation is output in absolute value. If the keyword `relative` is set, then the relative model deviation of the force will be output, including values output by the keyword `atomic`. The relative model deviation of the force on atom $i$ is defined by
diff --git a/doc/train/finetuning.md b/doc/train/finetuning.md
index 669d1319bd..e50109318d 100644
--- a/doc/train/finetuning.md
+++ b/doc/train/finetuning.md
@@ -106,7 +106,7 @@ $ dp --pt train input.json --finetune multitask_pretrained.pt --model-branch CHO
 ```
 
 :::{note}
-One can check the available model branches in multi-task pre-trained model by refering to the documentation of the pre-trained model or by using the following command:
+One can check the available model branches in multi-task pre-trained model by referring to the documentation of the pre-trained model or by using the following command:
 
 ```bash
 $ dp --pt show multitask_pretrained.pt model-branch
diff --git a/doc/troubleshooting/precision.md b/doc/troubleshooting/precision.md
index 56dbd51958..5ebef97122 100644
--- a/doc/troubleshooting/precision.md
+++ b/doc/troubleshooting/precision.md
@@ -14,7 +14,7 @@ Some common reasons are listed below.
 The unit of training data should follow what is listed in [data section](../data/system.md).
 Usually, the package to calculate the training data has different units from those of the DeePMD-kit.
 It is noted that some software label the energy gradient as forces, instead of the negative energy gradient.
-It is neccessary to check them carefully to avoid inconsistent data.
+It is necessary to check them carefully to avoid inconsistent data.
 
 ### SCF coverage and data accuracy
 
@@ -29,7 +29,7 @@ Here is a checklist for the accuracy of data:
 ### Enough data
 
 If the model performs good on the training data, but has bad accuracy on another data, this means some data space is not covered by the training data.
-It can be validated by evaluting the [model deviation](../test/model-deviation.md) with multiple models.
+It can be validated by evaluating the [model deviation](../test/model-deviation.md) with multiple models.
 If the model deviation of these data is high for some data, try to collect more data using [DP-GEN](../third-party/out-of-deepmd-kit.md#dp-gen).
 
 ### Values of data
diff --git a/pyproject.toml b/pyproject.toml
index 0a1b2e6731..f4f399156a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -331,14 +331,14 @@ legacy_tox_ini = """
 # be silenced
 
 # W504 - line break after binary operator - there is conflict between W503 and W504 in
-# some lintners. One recomends line bread after and one before binary operator so we
-# swith W504 off and recomend this coding style:
+# some lintners. One recommends line bread after and one before binary operator so we
+# switch W504 off and recommend this coding style:
 # a = (b +     -> instead of -> a = (b
 #      c)                            + c)
 [tool.autopep8]
 ignore = "W504"
 
-# D413 - Missing blank line after last section - makes no sense only adds empy lines in
+# D413 - Missing blank line after last section - makes no sense only adds empty lines in
 # docstrings
 # D416 - Section name should end with a colon - only applicable to RST type docstrings,
 # we are using numpy style
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 71b3dca1ea..805c6514e0 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -4,7 +4,7 @@ project(DeePMD)
 
 option(ENABLE_TENSORFLOW "Enable TensorFlow interface" OFF)
 option(ENABLE_PYTORCH "Enable PyTorch interface" OFF)
-option(BUILD_TESTING "Build test and enable converage" OFF)
+option(BUILD_TESTING "Build test and enable coverage" OFF)
 set(DEEPMD_C_ROOT
     ""
     CACHE PATH "Path to imported DeePMD-kit C library")
@@ -272,7 +272,7 @@ endif()
 
 # set op prec
 set(HIGH_PREC_DEF "HIGH_PREC")
-# this defination doesn't work, but leaving it empty will cause error
+# this definition doesn't work, but leaving it empty will cause error
 set(LOW_PREC_DEF "LOW_PREC")
 set(HIGH_PREC_VARIANT "")
 set(LOW_PREC_VARIANT "_low")
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 9d0310d99a..270bc94cc5 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -1286,7 +1286,8 @@ class DeepPotModelDevi {
     const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
     const int *atype_ = &atype[0];
 
-    // memory will be continous for std::vector but not std::vector<std::vector>
+    // memory will be continuous for std::vector but not
+    // std::vector<std::vector>
     std::vector<double> energy_flat(numb_models);
     std::vector<VALUETYPE> force_flat(static_cast<size_t>(numb_models) *
                                       natoms * 3);
@@ -1464,7 +1465,8 @@ class DeepPotModelDevi {
     const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
     const int *atype_ = &atype[0];
 
-    // memory will be continous for std::vector but not std::vector<std::vector>
+    // memory will be continuous for std::vector but not
+    // std::vector<std::vector>
     std::vector<double> energy_flat(numb_models);
     std::vector<VALUETYPE> force_flat(static_cast<size_t>(numb_models) *
                                       natoms * 3);
@@ -2326,7 +2328,7 @@ void inline read_file_to_string(std::string model, std::string &file_content) {
   int size;
   const char *c_file_content = DP_ReadFileToChar2(model.c_str(), &size);
   if (size < 0) {
-    // negtive size indicates error
+    // negative size indicates error
     std::string error_message = std::string(c_file_content, -size);
     DP_DeleteChar(c_file_content);
     throw deepmd::hpp::deepmd_exception(error_message);
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index 9ed37d04aa..56c5f9720f 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -1586,7 +1586,7 @@ const char* DP_ReadFileToChar2(const char* c_model, int* size) {
   try {
     deepmd::read_file_to_string(model, file_content);
   } catch (deepmd::deepmd_exception& ex) {
-    // use negtive size to indicate error
+    // use negative size to indicate error
     std::string error_message = std::string(ex.what());
     *size = -error_message.size();
     return string_to_char(error_message);
diff --git a/source/api_cc/include/DeepTensor.h b/source/api_cc/include/DeepTensor.h
index f355413d80..1ec14e3e7f 100644
--- a/source/api_cc/include/DeepTensor.h
+++ b/source/api_cc/include/DeepTensor.h
@@ -37,7 +37,7 @@ class DeepTensorBase {
                     const std::string& name_scope = "") = 0;
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
@@ -75,7 +75,7 @@ class DeepTensorBase {
   /** @} */
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
@@ -180,7 +180,8 @@ class DeepTensor {
 
   /**
    * @brief Evaluate the value by using this model.
-   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[out] value The value to evaluate, usually would be the atomic
+   *tensor.
    * @param[in] coord The coordinates of atoms. The array should be of size
    *natoms x 3.
    * @param[in] atype The atom types. The list should contain natoms ints.
@@ -193,7 +194,8 @@ class DeepTensor {
                const std::vector<VALUETYPE>& box);
   /**
    * @brief Evaluate the value by using this model.
-   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[out] value The value to evaluate, usually would be the atomic
+   *tensor.
    * @param[in] coord The coordinates of atoms. The array should be of size
    *natoms x 3.
    * @param[in] atype The atom types. The list should contain natoms ints.
@@ -210,7 +212,7 @@ class DeepTensor {
                const InputNlist& inlist);
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
@@ -229,7 +231,7 @@ class DeepTensor {
                const std::vector<VALUETYPE>& box);
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
@@ -252,7 +254,7 @@ class DeepTensor {
                const InputNlist& inlist);
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
@@ -277,7 +279,7 @@ class DeepTensor {
                const std::vector<VALUETYPE>& box);
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
diff --git a/source/api_cc/include/DeepTensorTF.h b/source/api_cc/include/DeepTensorTF.h
index 3ca316a29f..3fd8338b1f 100644
--- a/source/api_cc/include/DeepTensorTF.h
+++ b/source/api_cc/include/DeepTensorTF.h
@@ -39,7 +39,8 @@ class DeepTensorTF : public DeepTensorBase {
  private:
   /**
    * @brief Evaluate the value by using this model.
-   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[out] value The value to evaluate, usually would be the atomic
+   *tensor.
    * @param[in] coord The coordinates of atoms. The array should be of size
    *natoms x 3.
    * @param[in] atype The atom types. The list should contain natoms ints.
@@ -52,7 +53,8 @@ class DeepTensorTF : public DeepTensorBase {
                const std::vector<VALUETYPE>& box);
   /**
    * @brief Evaluate the value by using this model.
-   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[out] value The value to evaluate, usually would be the atomic
+   *tensor.
    * @param[in] coord The coordinates of atoms. The array should be of size
    *natoms x 3.
    * @param[in] atype The atom types. The list should contain natoms ints.
@@ -69,7 +71,7 @@ class DeepTensorTF : public DeepTensorBase {
                const InputNlist& inlist);
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
@@ -94,7 +96,7 @@ class DeepTensorTF : public DeepTensorBase {
                const std::vector<VALUETYPE>& box);
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
@@ -163,7 +165,7 @@ class DeepTensorTF : public DeepTensorBase {
 
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
@@ -201,7 +203,7 @@ class DeepTensorTF : public DeepTensorBase {
   /** @} */
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] global_tensor The global tensor to evaluate.
    * @param[out] force The component-wise force of the global tensor, size odim
    *x natoms x 3.
    * @param[out] virial The component-wise virial of the global tensor, size
diff --git a/source/api_cc/include/commonTF.h b/source/api_cc/include/commonTF.h
index 0c14597e30..003b330308 100644
--- a/source/api_cc/include/commonTF.h
+++ b/source/api_cc/include/commonTF.h
@@ -63,7 +63,7 @@ int session_get_dtype(tensorflow::Session* session,
  * @param[in] aparam_ Atom parameters.
  * @param[in] atommap Atom map.
  * @param[in] scope The scope of the tensors.
- * @param[in] aparam_nall Whether the atomic dimesion of atomic parameters is
+ * @param[in] aparam_nall Whether the atomic dimension of atomic parameters is
  * nall.
  */
 template <typename MODELTYPE, typename VALUETYPE>
@@ -93,7 +93,7 @@ int session_input_tensors(
  * @param[in] nghost Number of ghost atoms.
  * @param[in] ago Update the internal neighbour list if ago is 0.
  * @param[in] scope The scope of the tensors.
- * @param[in] aparam_nall Whether the atomic dimesion of atomic parameters is
+ * @param[in] aparam_nall Whether the atomic dimension of atomic parameters is
  * nall.
  */
 template <typename MODELTYPE, typename VALUETYPE>
@@ -126,7 +126,7 @@ int session_input_tensors(
  * @param[in] nghost Number of ghost atoms.
  * @param[in] ago Update the internal neighbour list if ago is 0.
  * @param[in] scope The scope of the tensors.
- * @param[in] aparam_nall Whether the atomic dimesion of atomic parameters is
+ * @param[in] aparam_nall Whether the atomic dimension of atomic parameters is
  * nall.
  */
 template <typename MODELTYPE, typename VALUETYPE>
diff --git a/source/api_cc/src/DataModifierTF.cc b/source/api_cc/src/DataModifierTF.cc
index aaa2252955..80cf6120a3 100644
--- a/source/api_cc/src/DataModifierTF.cc
+++ b/source/api_cc/src/DataModifierTF.cc
@@ -306,7 +306,7 @@ void DipoleChargeModifierTF::compute(
       dfcorr_2[pairs[ii].first * 3 + dd] += delef_[pairs[ii].second * 3 + dd];
     }
   }
-  // add ele contrinution
+  // add ele contribution
   dfcorr_ = dfcorr_2;
   for (int ii = 0; ii < nloc_real; ++ii) {
     int oii = real_bkw_map[ii];
diff --git a/source/api_cc/src/DeepPotTF.cc b/source/api_cc/src/DeepPotTF.cc
index d7a7edfb60..a990cecf8d 100644
--- a/source/api_cc/src/DeepPotTF.cc
+++ b/source/api_cc/src/DeepPotTF.cc
@@ -465,10 +465,10 @@ void DeepPotTF::init(const std::string& model,
   }
   if (!model_compatable(model_version)) {
     throw deepmd::deepmd_exception(
-        "incompatable model: version " + model_version +
+        "incompatible model: version " + model_version +
         " in graph, but version " + global_model_version +
         " supported "
-        "See https://deepmd.rtfd.io/compatability/ for details.");
+        "See https://deepmd.rtfd.io/compatibility/ for details.");
   }
   dtype = session_get_dtype(session, "descrpt_attr/rcut");
   if (dtype == tensorflow::DT_DOUBLE) {
diff --git a/source/api_cc/src/DeepTensorTF.cc b/source/api_cc/src/DeepTensorTF.cc
index c69b7c018e..1081473f25 100644
--- a/source/api_cc/src/DeepTensorTF.cc
+++ b/source/api_cc/src/DeepTensorTF.cc
@@ -65,10 +65,10 @@ void DeepTensorTF::init(const std::string &model,
   }
   if (!model_compatable(model_version)) {
     throw deepmd::deepmd_exception(
-        "incompatable model: version " + model_version +
+        "incompatible model: version " + model_version +
         " in graph, but version " + global_model_version +
         " supported "
-        "See https://deepmd.rtfd.io/compatability/ for details.");
+        "See https://deepmd.rtfd.io/compatibility/ for details.");
   }
   dtype = session_get_dtype(session, "descrpt_attr/rcut");
   if (dtype == tensorflow::DT_DOUBLE) {
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index baa257d60e..e84517ea7a 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -934,7 +934,7 @@ void deepmd::select_map(std::vector<VT>& out,
         for (int ii = 0; ii < in.size() / stride / nframes; ++ii) {
 #ifdef DEBUG
       assert(ii < idx_map.size() && "idx goes over the idx map size");
-      assert(idx_map[ii] < out.size() && "mappped idx goes over the out size");
+      assert(idx_map[ii] < out.size() && "mapped idx goes over the out size");
 #endif
       if (idx_map[ii] >= 0) {
         int to_ii = idx_map[ii];
diff --git a/source/cmake/Findtensorflow.cmake b/source/cmake/Findtensorflow.cmake
index 6321d4872b..d579af7679 100644
--- a/source/cmake/Findtensorflow.cmake
+++ b/source/cmake/Findtensorflow.cmake
@@ -366,7 +366,7 @@ elseif(NOT DEFINED OP_CXX_ABI)
            AND ${CPP_CXX_ABI_COMPILE_RESULT_VAR1})
       message(
         WARNING
-          "Both _GLIBCXX_USE_CXX11_ABI=0 and 1 work. The reason may be that your C++ compiler (e.g. Red Hat Developer Toolset) does not support the custom cxx11 abi flag. For convience, we set _GLIBCXX_USE_CXX11_ABI=1."
+          "Both _GLIBCXX_USE_CXX11_ABI=0 and 1 work. The reason may be that your C++ compiler (e.g. Red Hat Developer Toolset) does not support the custom cxx11 abi flag. For convenience, we set _GLIBCXX_USE_CXX11_ABI=1."
       )
       set(OP_CXX_ABI 1)
     else()
diff --git a/source/cmake/tf_version.cpp b/source/cmake/tf_version.cpp
index 390bd4c375..6d09e33493 100644
--- a/source/cmake/tf_version.cpp
+++ b/source/cmake/tf_version.cpp
@@ -6,7 +6,7 @@
 int main(int argc, char* argv[]) {
   // See
   // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h
-  // TF_VERSION_STRING has been avaiable since TensorFlow v0.6
+  // TF_VERSION_STRING has been available since TensorFlow v0.6
   std::cout << TF_VERSION_STRING;
   return 0;
 }
diff --git a/source/gmx/dp_gmx_patch b/source/gmx/dp_gmx_patch
index 4dacaea835..8df3f12cc6 100644
--- a/source/gmx/dp_gmx_patch
+++ b/source/gmx/dp_gmx_patch
@@ -128,6 +128,6 @@ do
         v) VERSION=${OPTARG} && DEEPMD_PATCH_ROOT=${DEEPMD_PATCH_ROOT}/${VERSION} ;;
         p) check_version ${VERSION} && dp_gmx_patch ${GMX_ROOT} ;;
         r) check_version ${VERSION} && dp_gmx_revert ${GMX_ROOT} ;;
-        *) echo "- ERROR: Invaild option ${opt}" && exit 1 ;;
+        *) echo "- ERROR: Invalid option ${opt}" && exit 1 ;;
     esac
 done
diff --git a/source/gmx/src/gmx_plugin.cpp b/source/gmx/src/gmx_plugin.cpp
index 15c4fa84ae..53f02f1fbe 100644
--- a/source/gmx/src/gmx_plugin.cpp
+++ b/source/gmx/src/gmx_plugin.cpp
@@ -103,7 +103,7 @@ void DeepmdPlugin::init_from_json(char* json_file) {
 
     std::cout << "Successfully init plugin!" << std::endl;
   } else {
-    std::cerr << "Invaild json file: " << json_file << std::endl;
+    std::cerr << "Invalid json file: " << json_file << std::endl;
     exit(1);
   }
 }
diff --git a/source/install/build_tf.py b/source/install/build_tf.py
index a9e1e247cd..0239ebfa46 100755
--- a/source/install/build_tf.py
+++ b/source/install/build_tf.py
@@ -19,7 +19,7 @@
 if sys.version_info[0] < 3:  # noqa: UP036
     raise Exception("Python 3 or a more recent version is required.")
 
-# The script should only rely on the stardard Python libraries.
+# The script should only rely on the standard Python libraries.
 
 import argparse
 import hashlib
@@ -333,7 +333,7 @@ def copytree2(src: Path, dst: Path, *args, **kwargs):
         call(
             [
                 "/bin/cp",
-                # archieve, recursive, force, do not create one inside
+                # achieve, recursive, force, do not create one inside
                 # https://stackoverflow.com/a/24486142/9567349
                 "-arfT",
                 str(tmpdst),
@@ -386,7 +386,7 @@ def call(commands: list[str], env={}, **kwargs):
 
 # online resources to download
 RESOURCES = {
-    # bazelisk is used to warpper bazel
+    # bazelisk is used to wrapper bazel
     "bazelisk-1.11.0": OnlineResource(
         "bazel-linux-amd64-1.11.0",
         "https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64",
diff --git a/source/lib/include/ComputeDescriptor.h b/source/lib/include/ComputeDescriptor.h
index 7c3eaf4cd2..733cb1ee0c 100644
--- a/source/lib/include/ComputeDescriptor.h
+++ b/source/lib/include/ComputeDescriptor.h
@@ -501,7 +501,7 @@ void compute_descriptor(std::vector<double> &descrpt_a,
       if (fmt_nlist_a[nei_iter] < 0) {
         break;
       }
-      // drdS, stored in tranposed form
+      // drdS, stored in transposed form
       double dtrdST[4][3];
       double *rr = &sel_a_diff[nei_iter][0];
       double tr[3];
diff --git a/source/lib/include/coord.h b/source/lib/include/coord.h
index 699a90898c..6621d714a5 100644
--- a/source/lib/include/coord.h
+++ b/source/lib/include/coord.h
@@ -18,7 +18,7 @@ void normalize_coord_cpu(FPTYPE* coord,
 //	in_c, in_t, nloc, mem_nall, rc, region
 //	mem_nall is the size of allocated memory for out_c, out_t, mapping
 // returns
-//	0: succssful
+//	0: successful
 //	1: the memory is not large enough to hold all copied coords and types.
 //	   i.e. nall > mem_nall
 template <typename FPTYPE>
@@ -66,7 +66,7 @@ void normalize_coord_gpu(FPTYPE* coord,
 // box_info 	mem_nall is the size of allocated memory for out_c, out_t,
 // mapping
 // returns
-//	0: succssful
+//	0: successful
 //	1: the memory is not large enough to hold all copied coords and types.
 //	   i.e. nall > mem_nall
 template <typename FPTYPE>
diff --git a/source/lib/include/neighbor_list.h b/source/lib/include/neighbor_list.h
index b99827b552..95f5cb6174 100644
--- a/source/lib/include/neighbor_list.h
+++ b/source/lib/include/neighbor_list.h
@@ -126,7 +126,7 @@ int max_numneigh(const InputNlist& to_nlist);
 //	c_cpy, nloc, nall, mem_size, rcut, region
 //	mem_size is the size of allocated memory for jlist.
 // returns
-//	0: succssful
+//	0: successful
 //	1: the memory is not large enough to hold all neighbors.
 //	   i.e. max_list_size > mem_nall
 template <typename FPTYPE>
@@ -190,7 +190,7 @@ void use_nlist_map(int* nlist,
 //	c_cpy, nloc, nall, mem_size, rcut, region
 //	mem_size is the size of allocated memory for jlist.
 // returns
-//	0: succssful
+//	0: successful
 //	1: the memory is not large enough to hold all neighbors.
 //	   i.e. max_list_size > mem_nall
 template <typename FPTYPE>
diff --git a/source/lib/include/prod_force.h b/source/lib/include/prod_force.h
index b5ae68bdce..2d88607131 100644
--- a/source/lib/include/prod_force.h
+++ b/source/lib/include/prod_force.h
@@ -29,7 +29,7 @@ void prod_force_a_cpu(FPTYPE* force,
 /**
  * @brief Produce force from net_deriv and in_deriv.
  * @details This function is used for multi-threading. Only part of atoms
- *         are computed in this thread. They will be comptued in parallel.
+ *         are computed in this thread. They will be computed in parallel.
  *
  * @tparam FPTYPE float or double
  * @param[out] force Atomic forces.
diff --git a/source/lib/src/gpu/tabulate.cu b/source/lib/src/gpu/tabulate.cu
index 71ea17ced5..e0723b81af 100644
--- a/source/lib/src/gpu/tabulate.cu
+++ b/source/lib/src/gpu/tabulate.cu
@@ -272,7 +272,7 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
   bool enable_se_atten = two_embed != nullptr;
   GPU_DYNAMIC_SHARED_MEM_DECL(int, _data);
   const int_64 block_idx = blockIdx.x;  // nloc
-  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usally 128 here~
+  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usually 128 here~
   int warp_idx = GpuShuffleSync(0xffffffff, threadIdx.x / WARP_SIZE, 0);
   int lane_idx = threadIdx.x % WARP_SIZE;
   int breakpoint = nnei - 1;
@@ -531,7 +531,7 @@ __global__ void tabulate_fusion_se_t_grad_fifth_order_polynomial(
     const int last_layer_size) {
   GPU_DYNAMIC_SHARED_MEM_DECL(int, _data);
   const int_64 block_idx = blockIdx.x;  // nloc
-  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usally 128 here~
+  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usually 128 here~
   int warp_idx = GpuShuffleSync(0xffffffff, threadIdx.x / WARP_SIZE, 0);
   int lane_idx = threadIdx.x % WARP_SIZE;
   FPTYPE* iteratorA = (FPTYPE*)&_data[0];  // dy
@@ -678,7 +678,7 @@ __global__ void tabulate_fusion_se_r_grad_fifth_order_polynomial(
     const int nnei,
     const int last_layer_size) {
   const int_64 block_idx = blockIdx.x;  // nloc
-  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usally 128 here~
+  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usually 128 here~
   int warp_idx = GpuShuffleSync(0xffffffff, thread_idx / WARP_SIZE, 0);
   int lane_idx = thread_idx % WARP_SIZE;
   __syncthreads();
diff --git a/source/lib/tests/test_fmt_nlist.cc b/source/lib/tests/test_fmt_nlist.cc
index bc79c92ea6..6cd24b556a 100644
--- a/source/lib/tests/test_fmt_nlist.cc
+++ b/source/lib/tests/test_fmt_nlist.cc
@@ -134,7 +134,7 @@ class TestEncodingDecodingNborInfo : public ::testing::Test {
   void TearDown() override {}
 };
 
-// orginal implementation. copy ghost
+// original implementation. copy ghost
 TEST_F(TestFormatNlist, orig_cpy) {
   std::vector<std::vector<int>> nlist_a, nlist_r;
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
@@ -155,7 +155,7 @@ TEST_F(TestFormatNlist, orig_cpy) {
   }
 }
 
-// orginal implementation. copy ghost should be equal to pbc
+// original implementation. copy ghost should be equal to pbc
 TEST_F(TestFormatNlist, orig_pbc) {
   std::vector<std::vector<int>> nlist_a_1, nlist_r_1;
   build_nlist(nlist_a_1, nlist_r_1, posi, rc, rc, ncell, region);
@@ -174,7 +174,7 @@ TEST_F(TestFormatNlist, orig_pbc) {
   }
 }
 
-// orginal implementation. copy ghost should be equal to pbc
+// original implementation. copy ghost should be equal to pbc
 TEST_F(TestFormatNlist, orig_cpy_equal_pbc) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
   build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
@@ -251,7 +251,7 @@ TEST_F(TestFormatNlist, cpu) {
   }
 }
 
-// orginal implementation. copy ghost
+// original implementation. copy ghost
 TEST_F(TestFormatNlistShortSel, orig_cpy) {
   std::vector<std::vector<int>> nlist_a, nlist_r;
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
diff --git a/source/lmp/pppm_dplr.cpp b/source/lmp/pppm_dplr.cpp
index 613a9f1c93..e1bdb828af 100644
--- a/source/lmp/pppm_dplr.cpp
+++ b/source/lmp/pppm_dplr.cpp
@@ -92,7 +92,7 @@ void PPPMDPLR::compute(int eflag, int vflag) {
     return;
   }
 
-  // convert atoms from box to lamda coords
+  // convert atoms from box to lambda coords
 
   if (triclinic == 0) {
     boxlo = domain->boxlo;
@@ -266,7 +266,7 @@ void PPPMDPLR::compute(int eflag, int vflag) {
     slabcorr();
   }
 
-  // convert atoms back from lamda to box coords
+  // convert atoms back from lambda to box coords
 
   if (triclinic) {
     domain->lamda2x(atom->nlocal);
diff --git a/source/op/tf/descrpt.cc b/source/op/tf/descrpt.cc
index 6362b8d37a..db3b0ca8e5 100644
--- a/source/op/tf/descrpt.cc
+++ b/source/op/tf/descrpt.cc
@@ -293,7 +293,7 @@ class DescrptOp : public OpKernel {
       } else if (nei_mode == -1) {
         ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
       } else {
-        throw deepmd::deepmd_exception("unknow neighbor mode");
+        throw deepmd::deepmd_exception("unknown neighbor mode");
       }
 
       // loop over atoms, compute descriptors for each atom
diff --git a/source/op/tf/descrpt_se_a_ef.cc b/source/op/tf/descrpt_se_a_ef.cc
index 96c953f167..18dda3d8b0 100644
--- a/source/op/tf/descrpt_se_a_ef.cc
+++ b/source/op/tf/descrpt_se_a_ef.cc
@@ -310,7 +310,7 @@ class DescrptSeAEfOp : public OpKernel {
       } else if (nei_mode == -1) {
         ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
       } else {
-        throw deepmd::deepmd_exception("unknow neighbor mode");
+        throw deepmd::deepmd_exception("unknown neighbor mode");
       }
 
       // loop over atoms, compute descriptors for each atom
diff --git a/source/op/tf/descrpt_se_a_ef_para.cc b/source/op/tf/descrpt_se_a_ef_para.cc
index 6dc4442ee6..0f34de3f4f 100644
--- a/source/op/tf/descrpt_se_a_ef_para.cc
+++ b/source/op/tf/descrpt_se_a_ef_para.cc
@@ -310,7 +310,7 @@ class DescrptSeAEfParaOp : public OpKernel {
       } else if (nei_mode == -1) {
         ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
       } else {
-        throw deepmd::deepmd_exception("unknow neighbor mode");
+        throw deepmd::deepmd_exception("unknown neighbor mode");
       }
 
       // loop over atoms, compute descriptors for each atom
diff --git a/source/op/tf/descrpt_se_a_ef_vert.cc b/source/op/tf/descrpt_se_a_ef_vert.cc
index 9899e29f06..b4eb30d9ee 100644
--- a/source/op/tf/descrpt_se_a_ef_vert.cc
+++ b/source/op/tf/descrpt_se_a_ef_vert.cc
@@ -310,7 +310,7 @@ class DescrptSeAEfVertOp : public OpKernel {
       } else if (nei_mode == -1) {
         ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
       } else {
-        throw deepmd::deepmd_exception("unknow neighbor mode");
+        throw deepmd::deepmd_exception("unknown neighbor mode");
       }
 
       // loop over atoms, compute descriptors for each atom
diff --git a/source/op/tf/descrpt_se_a_mask.cc b/source/op/tf/descrpt_se_a_mask.cc
index e27ea099ab..28e4a575db 100644
--- a/source/op/tf/descrpt_se_a_mask.cc
+++ b/source/op/tf/descrpt_se_a_mask.cc
@@ -181,7 +181,7 @@ class DescrptSeAMaskOp : public OpKernel {
           for (int jj = 0; jj < natoms * 3; ++jj) {
             rij(kk, ii * natoms * 3 + jj) = 0.;
           }
-          // Save the neighbor atoms indicies.
+          // Save the neighbor atoms indices.
           for (int jj = 0; jj < natoms; jj++) {
             nlist(kk, ii * natoms + jj) = -1;
           }
@@ -304,7 +304,7 @@ class DescrptSeAMaskOp : public OpKernel {
         for (int jj = 0; jj < natoms * 3; ++jj) {
           rij(kk, ii * natoms * 3 + jj) = rij_atom[jj];
         }
-        // Save the neighbor atoms indicies.
+        // Save the neighbor atoms indices.
         for (int jj = 0; jj < natoms; ++jj) {
           nlist(kk, ii * natoms + jj) = sorted_nlist[jj];
         }
diff --git a/source/op/tf/neighbor_stat.cc b/source/op/tf/neighbor_stat.cc
index d2a6b3ab31..26f13b0c84 100644
--- a/source/op/tf/neighbor_stat.cc
+++ b/source/op/tf/neighbor_stat.cc
@@ -243,7 +243,7 @@ class NeighborStatOp : public OpKernel {
       } else if (nei_mode == -1) {
         ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, -1, rcut, NULL);
       } else {
-        throw deepmd::deepmd_exception("unknow neighbor mode");
+        throw deepmd::deepmd_exception("unknown neighbor mode");
       }
 
       int MAX_NNEI = 0;
diff --git a/source/op/tf/pairwise.cc b/source/op/tf/pairwise.cc
index 8ed140a14a..ba1e5e6475 100644
--- a/source/op/tf/pairwise.cc
+++ b/source/op/tf/pairwise.cc
@@ -78,7 +78,7 @@ class PairwiseIdxOp : public OpKernel {
       backward_qm_maps.push_back(backward_qm_map);
       forward_qmmm_maps.push_back(forward_qmmm_map);
       backward_qmmm_maps.push_back(backward_qmmm_map);
-      // get the maximun
+      // get the maximum
       int nghost_qm_ii = nall_qm_ii - nloc_qm_ii,
           nghost_qmmm_ii = nall_qmmm_ii - nloc_qmmm_ii;
       nloc_qm.push_back(nloc_qm_ii);
diff --git a/source/op/tf/prod_env_mat_multi_device.cc b/source/op/tf/prod_env_mat_multi_device.cc
index 7037a00a6c..e374102224 100644
--- a/source/op/tf/prod_env_mat_multi_device.cc
+++ b/source/op/tf/prod_env_mat_multi_device.cc
@@ -485,7 +485,7 @@ class ProdEnvMatAOp : public OpKernel {
     const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
     const int* p_type = type_tensor.flat<int>().data();
 
-    // must declar out of if, otherwise the memory will be destroyed!
+    // must declare out of if, otherwise the memory will be destroyed!
     Tensor int_temp;
     Tensor uint64_temp;
     std::vector<Tensor> tensor_list(7);
@@ -791,7 +791,7 @@ class ProdEnvMatROp : public OpKernel {
     const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
     const int* p_type = type_tensor.flat<int>().data();
 
-    // must declar out of if, otherwise the memory will be destroyed!
+    // must declare out of if, otherwise the memory will be destroyed!
     Tensor int_temp;
     Tensor uint64_temp;
     std::vector<Tensor> tensor_list(7);
@@ -1144,7 +1144,7 @@ class ProdEnvMatAMixOp : public OpKernel {
       }
     }
 
-    // must declar out of if, otherwise the memory will be destroyed!
+    // must declare out of if, otherwise the memory will be destroyed!
     Tensor int_temp;
     Tensor uint64_temp;
     std::vector<Tensor> tensor_list(7);
diff --git a/source/op/tf/prod_env_mat_multi_device_nvnmd.cc b/source/op/tf/prod_env_mat_multi_device_nvnmd.cc
index d9f9275b86..57390077ef 100644
--- a/source/op/tf/prod_env_mat_multi_device_nvnmd.cc
+++ b/source/op/tf/prod_env_mat_multi_device_nvnmd.cc
@@ -45,7 +45,7 @@ REGISTER_OP("ProdEnvMatANvnmdQuantize")
     .Output("descrpt_deriv: T")
     .Output("rij: T")
     .Output("nlist: int32");
-// only sel_a and rcut_r uesd.
+// only sel_a and rcut_r used.
 
 // ProdEnvMatAMixNvnmd
 REGISTER_OP("ProdEnvMatAMixNvnmdQuantize")
@@ -68,7 +68,7 @@ REGISTER_OP("ProdEnvMatAMixNvnmdQuantize")
     .Output("nlist: int32")
     .Output("ntype: int32")
     .Output("nmask: bool");
-// only sel_a and rcut_r uesd.
+// only sel_a and rcut_r used.
 
 template <typename FPTYPE>
 static int _norm_copy_coord_cpu(std::vector<FPTYPE>& coord_cpy,
@@ -463,7 +463,7 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
     const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
     const int* p_type = type_tensor.flat<int>().data();
 
-    // must declar out of if, otherwise the memory will be destroyed!
+    // must declare out of if, otherwise the memory will be destroyed!
     Tensor int_temp;
     Tensor uint64_temp;
     std::vector<Tensor> tensor_list(7);
@@ -734,7 +734,7 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
       }
     }
 
-    // must declar out of if, otherwise the memory will be destroyed!
+    // must declare out of if, otherwise the memory will be destroyed!
     Tensor int_temp;
     Tensor uint64_temp;
     std::vector<Tensor> tensor_list(7);
diff --git a/source/tests/common/dpmodel/test_pairtab_preprocess.py b/source/tests/common/dpmodel/test_pairtab_preprocess.py
index da3b9251f7..7f4058dedd 100644
--- a/source/tests/common/dpmodel/test_pairtab_preprocess.py
+++ b/source/tests/common/dpmodel/test_pairtab_preprocess.py
@@ -72,7 +72,7 @@ def test_preprocess(self):
         )
 
         # for this test case, the table does not decay to zero at rcut = 0.22,
-        # in the cubic spline code, we use a fixed size grid, if will be a problem if we introduce variable gird size.
+        # in the cubic spline code, we use a fixed size grid, if will be a problem if we introduce variable grid size.
         # we will do post process to overwrite spline coefficient `a3`,`a2`,`a1`,`a0`, to ensure energy decays to `0`.
         np.testing.assert_allclose(
             self.tab3.vdata,
diff --git a/source/tests/common/test_argument_parser.py b/source/tests/common/test_argument_parser.py
index 1404185607..2c67c1f6cb 100644
--- a/source/tests/common/test_argument_parser.py
+++ b/source/tests/common/test_argument_parser.py
@@ -156,7 +156,7 @@ def run_test(self, *, command: str, mapping: "TEST_DICT"):
                 namespace = parse_args(cmd_args)
         except SystemExit as e:
             raise SystemExit(
-                f"Encountered expection when parsing arguments ->\n\n"
+                f"Encountered exception when parsing arguments ->\n\n"
                 f"{buffer.getvalue()}\n"
                 f"passed in arguments were: {cmd_args}\n"
                 f"built from dict {mapping}"
@@ -188,7 +188,7 @@ def run_test(self, *, command: str, mapping: "TEST_DICT"):
                 namespace = parse_args(cmd_args)
         except SystemExit as e:
             raise SystemExit(
-                f"Encountered expection when parsing DEFAULT arguments ->\n\n"
+                f"Encountered exception when parsing DEFAULT arguments ->\n\n"
                 f"{buffer.getvalue()}\n"
                 f"passed in arguments were: {cmd_args}\n"
                 f"built from dict {mapping}"
diff --git a/source/tests/consistent/common.py b/source/tests/consistent/common.py
index e3bf808978..885662c766 100644
--- a/source/tests/consistent/common.py
+++ b/source/tests/consistent/common.py
@@ -500,7 +500,7 @@ def tearDown(self) -> None:
 def parameterized(*attrs: tuple, **subblock_attrs: tuple) -> Callable:
     """Parameterized test.
 
-    Orginal class will not be actually generated. Avoid inherbiting from it.
+    Original class will not be actually generated. Avoid inherbiting from it.
     New classes are generated with the name of the original class and the
     parameters.
 
diff --git a/source/tests/consistent/test_type_embedding.py b/source/tests/consistent/test_type_embedding.py
index e2836c7a6c..a4b516ef16 100644
--- a/source/tests/consistent/test_type_embedding.py
+++ b/source/tests/consistent/test_type_embedding.py
@@ -90,7 +90,7 @@ def addtional_data(self) -> dict:
             use_econf_tebd,
             use_tebd_bias,
         ) = self.param
-        # implict argument not input by users
+        # implicit argument not input by users
         return {
             "ntypes": self.ntypes,
             "padding": padding,
diff --git a/source/tests/pt/model/test_descriptor_dpa1.py b/source/tests/pt/model/test_descriptor_dpa1.py
index a3d696516a..ddd5dc6c3c 100644
--- a/source/tests/pt/model/test_descriptor_dpa1.py
+++ b/source/tests/pt/model/test_descriptor_dpa1.py
@@ -249,7 +249,7 @@ def test_descriptor_block(self):
         coord = self.coord
         atype = self.atype
         box = self.cell
-        # handel type_embedding
+        # handle type_embedding
         type_embedding = TypeEmbedNet(ntypes, 8, use_tebd_bias=True).to(env.DEVICE)
         type_embedding.load_state_dict(
             torch.load(self.file_type_embed, weights_only=True)
diff --git a/source/tests/pt/model/test_embedding_net.py b/source/tests/pt/model/test_embedding_net.py
index 1566eb2416..2cfcaa820e 100644
--- a/source/tests/pt/model/test_embedding_net.py
+++ b/source/tests/pt/model/test_embedding_net.py
@@ -181,7 +181,7 @@ def test_consistency(self):
                 key = gen_key(worb=m[2], depth=int(m[1]) + 1, elemid=int(m[0]))
                 var = dp_vars[key]
                 with torch.no_grad():
-                    # Keep parameter value consistency between 2 implentations
+                    # Keep parameter value consistency between 2 implementations
                     param.data.copy_(torch.from_numpy(var))
 
         pt_coord = self.torch_batch["coord"].to(env.DEVICE)
diff --git a/source/tests/pt/model/test_fitting_net.py b/source/tests/pt/model/test_fitting_net.py
index ecff0d47e6..e08eed4f8b 100644
--- a/source/tests/pt/model/test_fitting_net.py
+++ b/source/tests/pt/model/test_fitting_net.py
@@ -133,7 +133,7 @@ def test_consistency(self):
             assert key is not None
             var = values[key]
             with torch.no_grad():
-                # Keep parameter value consistency between 2 implentations
+                # Keep parameter value consistency between 2 implementations
                 param.data.copy_(torch.from_numpy(var))
         embedding = torch.from_numpy(self.embedding)
         embedding = embedding.view(4, -1, self.embedding_width)
diff --git a/source/tests/pt/model/test_make_hessian_model.py b/source/tests/pt/model/test_make_hessian_model.py
index ef615554ef..df58d115a2 100644
--- a/source/tests/pt/model/test_make_hessian_model.py
+++ b/source/tests/pt/model/test_make_hessian_model.py
@@ -99,7 +99,7 @@ def test(
         aparam = torch.rand(
             [nf, natoms * nap], dtype=dtype, device=env.DEVICE, generator=generator
         )
-        # forward hess and valu models
+        # forward hess and value models
         ret_dict0 = self.model_hess.forward_common(
             coord, atype, box=cell, fparam=fparam, aparam=aparam
         )
diff --git a/source/tests/pt/model/test_model.py b/source/tests/pt/model/test_model.py
index 8fdbdaf413..84f5a113a3 100644
--- a/source/tests/pt/model/test_model.py
+++ b/source/tests/pt/model/test_model.py
@@ -300,7 +300,7 @@ def test_consistency(self):
             limit_pref_f=self.limit_pref_f,
         )
 
-        # Keep statistics consistency between 2 implentations
+        # Keep statistics consistency between 2 implementations
         my_em = my_model.get_descriptor()
         mean = stat_dict["descriptor.mean"].reshape([self.ntypes, my_em.get_nsel(), 4])
         stddev = stat_dict["descriptor.stddev"].reshape(
@@ -314,7 +314,7 @@ def test_consistency(self):
             stat_dict["fitting_net.bias_atom_e"], device=DEVICE
         )
 
-        # Keep parameter value consistency between 2 implentations
+        # Keep parameter value consistency between 2 implementations
         for name, param in my_model.named_parameters():
             name = name.replace("sea.", "")
             var_name = torch2tf(name, last_layer_id=len(self.n_neuron))
diff --git a/source/tests/pt/model/test_nlist.py b/source/tests/pt/model/test_nlist.py
index c4401b2cdd..7558a2a7d9 100644
--- a/source/tests/pt/model/test_nlist.py
+++ b/source/tests/pt/model/test_nlist.py
@@ -44,7 +44,7 @@ def setUp(self):
         self.rcut = 1.01
         self.prec = 1e-10
         self.nsel = [10, 10]
-        # genrated by preprocess.build_neighbor_list
+        # generated by preprocess.build_neighbor_list
         # ref_nlist, _, _ = legacy_build_neighbor_list(
         #   2, ecoord[0], eatype[0],
         #   self.rcut,
diff --git a/source/tests/pt/model/test_unused_params.py b/source/tests/pt/model/test_unused_params.py
index 3f068d5e5b..98bbe7040e 100644
--- a/source/tests/pt/model/test_unused_params.py
+++ b/source/tests/pt/model/test_unused_params.py
@@ -38,10 +38,10 @@ def test_unused(self):
             [True],
         ):
             if (not drrd) and (not grrg) and h2:
-                # skip the case h2 is not envolved
+                # skip the case h2 is not involved
                 continue
             if (not grrg) and (not conv):
-                # skip the case g2 is not envolved
+                # skip the case g2 is not involved
                 continue
             model = copy.deepcopy(model_dpa2)
             model["descriptor"]["repformer"]["nlayers"] = 2
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
index fa9e5c138a..a7fcedcede 100644
--- a/source/tests/pt/test_training.py
+++ b/source/tests/pt/test_training.py
@@ -477,7 +477,7 @@ def test_dp_train(self):
         trainer.run()
         state_dict_trained = trainer.wrapper.model.state_dict()
 
-        # test fine-tuning using diffferent fitting_net, here using property fitting
+        # test fine-tuning using different fitting_net, here using property fitting
         finetune_model = self.config["training"].get("save_ckpt", "model.ckpt") + ".pt"
         self.config_property["model"], finetune_links = get_finetune_rules(
             finetune_model,
diff --git a/source/tests/tf/common.py b/source/tests/tf/common.py
index 705e9f7faa..2b912c7a10 100644
--- a/source/tests/tf/common.py
+++ b/source/tests/tf/common.py
@@ -969,7 +969,7 @@ def __init__(self, systems, set_prefix, batch_size, test_size, rcut, run_opt=Non
             chk_ret = self.data_systems[ii].check_test_size(test_size)
             if chk_ret is not None:
                 warnings.warn(
-                    "WARNNING: system %s required test size %d is larger than the size %d of the dataset %s"
+                    "WARNING: system %s required test size %d is larger than the size %d of the dataset %s"
                     % (self.system_dirs[ii], test_size, chk_ret[1], chk_ret[0])
                 )
 
diff --git a/source/tests/tf/test_model_pairtab.py b/source/tests/tf/test_model_pairtab.py
index 5caeb0a053..0a09e70430 100644
--- a/source/tests/tf/test_model_pairtab.py
+++ b/source/tests/tf/test_model_pairtab.py
@@ -42,7 +42,7 @@ def test_model(self):
         rcut = jdata["model"]["rcut"]
 
         def pair_pot(r: float):
-            # LJ, as exmaple
+            # LJ, as example
             return 4 * (1 / r**12 - 1 / r**6)
 
         dx = 1e-4
diff --git a/source/tests/universal/common/cases/atomic_model/utils.py b/source/tests/universal/common/cases/atomic_model/utils.py
index bfd2e2cd5f..97a6cf707b 100644
--- a/source/tests/universal/common/cases/atomic_model/utils.py
+++ b/source/tests/universal/common/cases/atomic_model/utils.py
@@ -40,7 +40,7 @@ class AtomicModelTestCase:
     expected_has_message_passing: bool
     """Expected whether having message passing."""
     forward_wrapper: Callable[[Any], Any]
-    """Calss wrapper for forward method."""
+    """Class wrapper for forward method."""
     aprec_dict: dict[str, Optional[float]]
     """Dictionary of absolute precision in each test."""
     rprec_dict: dict[str, Optional[float]]

From 40b3ea1854438d19037864ea0770dc2241b293b6 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 28 Oct 2024 19:56:39 -0400
Subject: [PATCH 07/14] docs: document the floating-point precision of the
 model (#4240)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Added a new section on `precision` in the documentation, enhancing
navigation.
- Introduced detailed guidelines on floating-point precision settings
for the model.
- Included structured instructions for creating models with the PyTorch
backend.

- **Documentation**
- Expanded troubleshooting documentation related to model precision
issues, including data accuracy and training recommendations.
- Enhanced guidelines for integrating new components into user
configurations and ensuring model integrity across different backends.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 doc/development/create-a-model-pt.md |  9 +++++++++
 doc/model/index.rst                  |  1 +
 doc/model/precision.md               | 15 +++++++++++++++
 doc/troubleshooting/precision.md     |  1 +
 4 files changed, 26 insertions(+)
 create mode 100644 doc/model/precision.md

diff --git a/doc/development/create-a-model-pt.md b/doc/development/create-a-model-pt.md
index 875067e2b8..8d2b1494b5 100644
--- a/doc/development/create-a-model-pt.md
+++ b/doc/development/create-a-model-pt.md
@@ -137,6 +137,15 @@ class SomeAtomicModel(BaseAtomicModel, torch.nn.Module):
         pass
 ```
 
+### Floating-point precision
+
+When creating a new component, the floating-point precision should obey the [Floating-point precision of the model](../model/precision.md) section.
+In implementation, the component should
+
+- store parameters in the component precision, except those for output normalization;
+- store output normalization parameters in {py:data}`deepmd.pt.utils.env.GLOBAL_PT_FLOAT_PRECISION`;
+- before input normalization, cast the input tensor to the component precision; before output normalization, cast the output tensor to the {py:data}`deepmd.pt.utils.env.GLOBAL_PT_FLOAT_PRECISION`.
+
 ## Register new arguments
 
 To let someone uses your new component in their input file, you need to create a new method that returns some `Argument` of your new component, and then register new arguments. For example, the code below
diff --git a/doc/model/index.rst b/doc/model/index.rst
index 8409d4ce97..c067ea4207 100644
--- a/doc/model/index.rst
+++ b/doc/model/index.rst
@@ -24,3 +24,4 @@ Model
    linear
    pairtab
    change-bias
+   precision
diff --git a/doc/model/precision.md b/doc/model/precision.md
new file mode 100644
index 0000000000..d8643c9c61
--- /dev/null
+++ b/doc/model/precision.md
@@ -0,0 +1,15 @@
+# Floating-point precision of the model
+
+The following options control the precision of the model:
+
+- The environment variable {envvar}`DP_INTERFACE_PREC` controls the interface precision of the model, the descriptor, and the fitting, the precision of the environmental matrix, and the precision of the normalized parameters for the environmental matrix and the fitting output.
+- The training parameters {ref}`precision <model[standard]/fitting_net[ener]/precision>` in the descriptor, the fitting, and the type embedding control the precision of neural networks in those components, and the subsequent operations after the output of neural networks.
+- The reduced output (e.g. total energy) is always `float64`.
+
+Usually, the following two combinations of options are recommended:
+
+- Setting {envvar}`DP_INTERFACE_PREC` to `high` (default) and all {ref}`precision <model[standard]/fitting_net[ener]/precision>` options to `float64` (default).
+- Setting {envvar}`DP_INTERFACE_PREC` to `high` (default) and all {ref}`precision <model[standard]/fitting_net[ener]/precision>` options to `float32`.
+
+The Python and C++ inference interfaces accept both `float64` and `float32` as the input and output arguments, whatever the floating-point precision of the model interface is.
+Usually, the MD programs (such as LAMMPS) only use `float64` in their interfaces.
diff --git a/doc/troubleshooting/precision.md b/doc/troubleshooting/precision.md
index 5ebef97122..a754dbeb53 100644
--- a/doc/troubleshooting/precision.md
+++ b/doc/troubleshooting/precision.md
@@ -60,6 +60,7 @@ See [FAQ: How to tune Fitting/embedding-net size](./howtoset_netsize.md) for det
 
 In some cases, one may want to use the FP32 precision to make the model faster.
 For some applications, FP32 is enough and thus is recommended, but one should still be aware that the precision of FP32 is not as high as that of FP64.
+See [Floating-point precision of the model](../model/precision.md) section for how to set the precision.
 
 ## Training
 

From 95f0ed59aba6fedea1b2ca28c1a17b837f7236ff Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 28 Oct 2024 23:37:54 -0400
Subject: [PATCH 08/14] fix(lmp): apply NEIGHMASK to neighbor list (#4269)

Fix #4250.

See https://github.com/lammps/lammps/pull/581#issuecomment-316351879 for
an explanation of `NEIGHMASK`.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced a new function to set a mask for neighbor lists, enhancing
configurability.
	- Added a method to the `InputNlist` structure for setting the mask.
- Enhanced `ComputeDeeptensorAtom` and `FixDPLR` classes to utilize
neighbor list masks in computations.

- **Bug Fixes**
- Improved validation of bonded pairs in the `FixDPLR` class with
enhanced error handling.

- **Documentation**
- Updated documentation for new methods and functionalities related to
neighbor list management.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_c/include/c_api.h           | 12 +++++++++++-
 source/api_c/include/deepmd.hpp        |  4 ++++
 source/api_c/src/c_api.cc              |  1 +
 source/api_cc/src/common.cc            |  3 +++
 source/lib/include/neighbor_list.h     |  6 ++++++
 source/lmp/compute_deeptensor_atom.cpp |  1 +
 source/lmp/fix_dplr.cpp                |  1 +
 source/lmp/pair_deepmd.cpp             |  2 ++
 8 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/source/api_c/include/c_api.h b/source/api_c/include/c_api.h
index 2f88f25e43..cd940edc0d 100644
--- a/source/api_c/include/c_api.h
+++ b/source/api_c/include/c_api.h
@@ -12,7 +12,7 @@ extern "C" {
 /** C API version. Bumped whenever the API is changed.
  * @since API version 22
  */
-#define DP_C_API_VERSION 22
+#define DP_C_API_VERSION 23
 
 /**
  * @brief Neighbor list.
@@ -68,6 +68,16 @@ extern DP_Nlist* DP_NewNlist_comm(int inum_,
                                   int* recvproc,
                                   void* world);
 
+/*
+ * @brief Set mask for a neighbor list.
+ *
+ * @param nl Neighbor list.
+ * @param mask mask.
+ * @since API version 23
+ *
+ **/
+extern void DP_NlistSetMask(DP_Nlist* nl, int mask);
+
 /**
  * @brief Delete a neighbor list.
  *
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 270bc94cc5..f1e04ef3bc 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -611,6 +611,10 @@ struct InputNlist {
   int *numneigh;
   /// @brief Array stores the core region atom's neighbor index
   int **firstneigh;
+  /**
+   * @brief Set mask for this neighbor list.
+   */
+  void set_mask(int mask) { DP_NlistSetMask(nl, mask); };
 };
 
 /**
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index 56c5f9720f..9bb9e8a775 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -42,6 +42,7 @@ DP_Nlist* DP_NewNlist_comm(int inum_,
   DP_Nlist* new_nl = new DP_Nlist(nl);
   return new_nl;
 }
+void DP_NlistSetMask(DP_Nlist* nl, int mask) { nl->nl.set_mask(mask); }
 void DP_DeleteNlist(DP_Nlist* nl) { delete nl; }
 
 DP_DeepPot::DP_DeepPot() {}
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index e84517ea7a..bd3f18c579 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -241,6 +241,9 @@ void deepmd::NeighborListData::copy_from_nlist(const InputNlist& inlist) {
     int jnum = inlist.numneigh[ii];
     jlist[ii].resize(jnum);
     memcpy(&jlist[ii][0], inlist.firstneigh[ii], jnum * sizeof(int));
+    for (int jj = 0; jj < jnum; ++jj) {
+      jlist[ii][jj] &= inlist.mask;
+    }
   }
 }
 
diff --git a/source/lib/include/neighbor_list.h b/source/lib/include/neighbor_list.h
index 95f5cb6174..bb4b8cf13c 100644
--- a/source/lib/include/neighbor_list.h
+++ b/source/lib/include/neighbor_list.h
@@ -42,6 +42,8 @@ struct InputNlist {
   int* recvproc;
   /// MPI_comm data in lmp
   void* world;
+  /// mask to the neighbor index
+  int mask = 0xFFFFFFFF;
   InputNlist()
       : inum(0),
         ilist(NULL),
@@ -93,6 +95,10 @@ struct InputNlist {
         recvproc(recvproc),
         world(world) {};
   ~InputNlist() {};
+  /**
+   * @brief Set mask for this neighbor list.
+   */
+  void set_mask(int mask_) { mask = mask_; };
 };
 
 /**
diff --git a/source/lmp/compute_deeptensor_atom.cpp b/source/lmp/compute_deeptensor_atom.cpp
index 6e6e9508b7..68c97a629e 100644
--- a/source/lmp/compute_deeptensor_atom.cpp
+++ b/source/lmp/compute_deeptensor_atom.cpp
@@ -136,6 +136,7 @@ void ComputeDeeptensorAtom::compute_peratom() {
   neighbor->build_one(list);
   deepmd_compat::InputNlist lmp_list(list->inum, list->ilist, list->numneigh,
                                      list->firstneigh);
+  lmp_list.set_mask(NEIGHMASK);
 
   // declare outputs
   std::vector<VALUETYPE> gtensor, force, virial, atensor, avirial;
diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
index 9f2b0eadb1..8a6be7d840 100644
--- a/source/lmp/fix_dplr.cpp
+++ b/source/lmp/fix_dplr.cpp
@@ -463,6 +463,7 @@ void FixDPLR::pre_force(int vflag) {
   NeighList *list = pair_deepmd->list;
   deepmd_compat::InputNlist lmp_list(list->inum, list->ilist, list->numneigh,
                                      list->firstneigh);
+  lmp_list.set_mask(NEIGHMASK);
   // declear output
   vector<FLOAT_PREC> tensor;
   // compute
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 2cb6cfacd4..09d97fe460 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -565,6 +565,7 @@ void PairDeepMD::compute(int eflag, int vflag) {
         commdata_->nswap, commdata_->sendnum, commdata_->recvnum,
         commdata_->firstrecv, commdata_->sendlist, commdata_->sendproc,
         commdata_->recvproc, &world);
+    lmp_list.set_mask(NEIGHMASK);
     deepmd_compat::InputNlist extend_lmp_list;
     if (atom->sp_flag) {
       extend(extend_inum, extend_ilist, extend_numneigh, extend_neigh,
@@ -574,6 +575,7 @@ void PairDeepMD::compute(int eflag, int vflag) {
       extend_lmp_list =
           deepmd_compat::InputNlist(extend_inum, &extend_ilist[0],
                                     &extend_numneigh[0], &extend_firstneigh[0]);
+      extend_lmp_list.set_mask(NEIGHMASK);
     }
     if (single_model || multi_models_no_mod_devi) {
       // cvflag_atom is the right flag for the cvatom matrix

From abd1c9cd9575942e1030dac8e4ac91166a93af8f Mon Sep 17 00:00:00 2001
From: "A bot of @njzjz" <48687836+njzjz-bot@users.noreply.github.com>
Date: Mon, 28 Oct 2024 23:40:59 -0400
Subject: [PATCH 09/14] docs: replace sphinx-rtd-theme with sphinx-book-theme
 (#4266)

Compared to `sphinx-rtd-theme`, `sphinx-book-theme` is a clean and
modern Sphinx theme.

Generated by the task: https://github.com/njzjz-bot/njzjz-bot/issues/11.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Updated documentation theme to enhance visual presentation with the
new "sphinx_book_theme."
	- Introduced a new extension to manage table of contents visibility.
	- Enhanced dark mode styles for improved user experience.

- **Bug Fixes**
- Improved compatibility with documentation tools by updating
dependencies in the project configuration.
	- Updated version constraints for documentation-related dependencies.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/_static/css/custom.css |  2 +-
 doc/conf.py                | 15 ++++++++++++---
 pyproject.toml             |  5 +++--
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/doc/_static/css/custom.css b/doc/_static/css/custom.css
index d0b761e71d..8bcfdd3c7e 100644
--- a/doc/_static/css/custom.css
+++ b/doc/_static/css/custom.css
@@ -11,7 +11,7 @@ pre {
 img.platform-icon {
   height: 2ex;
 }
-@media (prefers-color-scheme: dark) {
+html[data-theme="dark"] {
   .wy-side-nav-search .wy-dropdown > a img.logo,
   .wy-side-nav-search > a img.logo {
     content: url("../logo-dark.svg");
diff --git a/doc/conf.py b/doc/conf.py
index 51d463fd1f..c72e05bf8a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -44,7 +44,7 @@
 # ones.
 # extensions = [
 #     'recommonmark',
-#     "sphinx_rtd_theme",
+#     "sphinx_book_theme",
 #     'myst_parser',
 #     'sphinx_markdown_tables',
 #     'sphinx.ext.autosummary'
@@ -53,7 +53,7 @@
 extensions = [
     "deepmodeling_sphinx",
     "dargs.sphinx",
-    "sphinx_rtd_theme",
+    "sphinx_book_theme",
     "myst_nb",
     "sphinx.ext.autosummary",
     "sphinx.ext.mathjax",
@@ -70,6 +70,7 @@
     "autoapi.extension",
     "sphinxcontrib.programoutput",
     "sphinxcontrib.moderncmakedomain",
+    "sphinx_remove_toctrees",
 ]
 
 # breathe_domain_by_extension = {
@@ -175,9 +176,15 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = "sphinx_rtd_theme"
+html_theme = "sphinx_book_theme"
 html_logo = "_static/logo.svg"
 
+html_theme_options = {
+    "logo": {
+        "image_light": "_static/logo.svg",
+        "image_dark": "_static/logo-dark.svg",
+    }
+}
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
@@ -215,3 +222,5 @@
 napoleon_numpy_docstring = False
 
 bibtex_bibfiles = ["../CITATIONS.bib"]
+
+remove_from_toctrees = ["autoapi/**/*", "API_CC/*", "api_c/*", "api_core/*"]
diff --git a/pyproject.toml b/pyproject.toml
index f4f399156a..1faacb973c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -89,7 +89,7 @@ test = [
 ]
 docs = [
     "sphinx>=3.1.1",
-    "sphinx_rtd_theme>=1.0.0rc1",
+    "sphinx-book-theme",
     "myst-nb>=1.0.0rc0",
     "myst-parser>=0.19.2",
     "sphinx-design",
@@ -97,7 +97,7 @@ docs = [
     "exhale>=0.3.7",
     "numpydoc",
     "ase",
-    "deepmodeling-sphinx>=0.1.0",
+    "deepmodeling-sphinx>=0.3.0",
     "dargs>=0.3.4",
     "sphinx-argparse<0.5.0",
     "pygments-lammps",
@@ -105,6 +105,7 @@ docs = [
     "sphinx-autoapi>=3.0.0",
     "sphinxcontrib-programoutput",
     "sphinxcontrib-moderncmakedomain",
+    "sphinx-remove-toctrees",
 ]
 lmp = [
     "lammps~=2024.8.29.1.0",

From b647547b212b48b84134c5d5afe45eb2d093fac8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 29 Oct 2024 03:55:05 +0000
Subject: [PATCH 10/14] [pre-commit.ci] pre-commit autoupdate (#4268)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.7.0 →
v0.7.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.0...v0.7.1)
- [github.com/asottile/blacken-docs: 1.19.0 →
1.19.1](https://github.com/asottile/blacken-docs/compare/1.19.0...1.19.1)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 53fdd9b71c..6cb534fd22 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
         exclude: ^source/3rdparty
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.7.0
+    rev: v0.7.1
     hooks:
       - id: ruff
         args: ["--fix"]
@@ -55,7 +55,7 @@ repos:
         exclude: ^source/3rdparty
   # Python inside docs
   - repo: https://github.com/asottile/blacken-docs
-    rev: 1.19.0
+    rev: 1.19.1
     hooks:
       - id: blacken-docs
   # C++

From 82aaa0db8b2e484d2179112b509bb8bcadc6ab1f Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 29 Oct 2024 14:51:23 -0400
Subject: [PATCH 11/14] feat(jax): neighbor stat (#4258)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced `NeighborStat` and `NeighborStatOP` classes for enhanced
neighbor statistics computation.
- Added `AutoBatchSize` class to manage automatic batch sizing in deep
learning applications.

- **Improvements**
- Enhanced `JAXBackend` functionality with implemented properties for
neighbor statistics and serialization.
- Refactored neighbor counting logic for better clarity and modularity.

- **Tests**
- Updated unit tests for `neighbor_stat` to support multiple backends
(TensorFlow, PyTorch, NumPy, JAX).
  - Removed outdated test files to streamline testing processes.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 deepmd/backend/jax.py                         |  10 +-
 deepmd/dpmodel/utils/neighbor_stat.py         |  35 +++---
 deepmd/jax/utils/auto_batch_size.py           |  59 ++++++++++
 deepmd/jax/utils/neighbor_stat.py             | 104 ++++++++++++++++++
 .../common/dpmodel/test_neighbor_stat.py      |  69 ------------
 .../{pt => consistent}/test_neighbor_stat.py  |  24 +++-
 source/tests/tf/test_neighbor_stat.py         |  68 ------------
 7 files changed, 210 insertions(+), 159 deletions(-)
 create mode 100644 deepmd/jax/utils/auto_batch_size.py
 create mode 100644 deepmd/jax/utils/neighbor_stat.py
 delete mode 100644 source/tests/common/dpmodel/test_neighbor_stat.py
 rename source/tests/{pt => consistent}/test_neighbor_stat.py (77%)
 delete mode 100644 source/tests/tf/test_neighbor_stat.py

diff --git a/deepmd/backend/jax.py b/deepmd/backend/jax.py
index bb2fba5a7c..7131f4d534 100644
--- a/deepmd/backend/jax.py
+++ b/deepmd/backend/jax.py
@@ -33,9 +33,9 @@ class JAXBackend(Backend):
     """The formal name of the backend."""
     features: ClassVar[Backend.Feature] = (
         Backend.Feature.IO
-        # Backend.Feature.ENTRY_POINT
+        | Backend.Feature.ENTRY_POINT
         # | Backend.Feature.DEEP_EVAL
-        # | Backend.Feature.NEIGHBOR_STAT
+        | Backend.Feature.NEIGHBOR_STAT
     )
     """The features of the backend."""
     suffixes: ClassVar[list[str]] = [".jax"]
@@ -82,7 +82,11 @@ def neighbor_stat(self) -> type["NeighborStat"]:
         type[NeighborStat]
             The neighbor statistics of the backend.
         """
-        raise NotImplementedError
+        from deepmd.jax.utils.neighbor_stat import (
+            NeighborStat,
+        )
+
+        return NeighborStat
 
     @property
     def serialize_hook(self) -> Callable[[str], dict]:
diff --git a/deepmd/dpmodel/utils/neighbor_stat.py b/deepmd/dpmodel/utils/neighbor_stat.py
index 43ca2cadd1..3aea8ceeb9 100644
--- a/deepmd/dpmodel/utils/neighbor_stat.py
+++ b/deepmd/dpmodel/utils/neighbor_stat.py
@@ -6,6 +6,7 @@
     Optional,
 )
 
+import array_api_compat
 import numpy as np
 
 from deepmd.dpmodel.common import (
@@ -68,42 +69,42 @@ def call(
         np.ndarray
             The maximal number of neighbors
         """
+        xp = array_api_compat.array_namespace(coord, atype)
         nframes = coord.shape[0]
-        coord = coord.reshape(nframes, -1, 3)
+        coord = xp.reshape(coord, (nframes, -1, 3))
         nloc = coord.shape[1]
-        coord = coord.reshape(nframes, nloc * 3)
+        coord = xp.reshape(coord, (nframes, nloc * 3))
         extend_coord, extend_atype, _ = extend_coord_with_ghosts(
             coord, atype, cell, self.rcut
         )
 
-        coord1 = extend_coord.reshape(nframes, -1)
+        coord1 = xp.reshape(extend_coord, (nframes, -1))
         nall = coord1.shape[1] // 3
         coord0 = coord1[:, : nloc * 3]
         diff = (
-            coord1.reshape([nframes, -1, 3])[:, None, :, :]
-            - coord0.reshape([nframes, -1, 3])[:, :, None, :]
+            xp.reshape(coord1, [nframes, -1, 3])[:, None, :, :]
+            - xp.reshape(coord0, [nframes, -1, 3])[:, :, None, :]
         )
         assert list(diff.shape) == [nframes, nloc, nall, 3]
         # remove the diagonal elements
-        mask = np.eye(nloc, nall, dtype=bool)
-        diff[:, mask] = np.inf
-        rr2 = np.sum(np.square(diff), axis=-1)
-        min_rr2 = np.min(rr2, axis=-1)
+        mask = xp.eye(nloc, nall, dtype=xp.bool)
+        mask = xp.tile(mask[None, :, :, None], (nframes, 1, 1, 3))
+        diff = xp.where(mask, xp.full_like(diff, xp.inf), diff)
+        rr2 = xp.sum(xp.square(diff), axis=-1)
+        min_rr2 = xp.min(rr2, axis=-1)
         # count the number of neighbors
         if not self.mixed_types:
             mask = rr2 < self.rcut**2
-            nnei = np.zeros((nframes, nloc, self.ntypes), dtype=int)
+            nneis = []
             for ii in range(self.ntypes):
-                nnei[:, :, ii] = np.sum(
-                    mask & (extend_atype == ii)[:, None, :], axis=-1
-                )
+                nneis.append(xp.sum(mask & (extend_atype == ii)[:, None, :], axis=-1))
+            nnei = xp.stack(nneis, axis=-1)
         else:
             mask = rr2 < self.rcut**2
             # virtual type (<0) are not counted
-            nnei = np.sum(mask & (extend_atype >= 0)[:, None, :], axis=-1).reshape(
-                nframes, nloc, 1
-            )
-        max_nnei = np.max(nnei, axis=1)
+            nnei = xp.sum(mask & (extend_atype >= 0)[:, None, :], axis=-1)
+            nnei = xp.reshape(nnei, (nframes, nloc, 1))
+        max_nnei = xp.max(nnei, axis=1)
         return min_rr2, max_nnei
 
 
diff --git a/deepmd/jax/utils/auto_batch_size.py b/deepmd/jax/utils/auto_batch_size.py
new file mode 100644
index 0000000000..eec6766ae2
--- /dev/null
+++ b/deepmd/jax/utils/auto_batch_size.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import jaxlib
+
+from deepmd.jax.env import (
+    jax,
+)
+from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase
+
+
+class AutoBatchSize(AutoBatchSizeBase):
+    """Auto batch size.
+
+    Parameters
+    ----------
+    initial_batch_size : int, default: 1024
+        initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE
+        is not set
+    factor : float, default: 2.
+        increased factor
+
+    """
+
+    def __init__(
+        self,
+        initial_batch_size: int = 1024,
+        factor: float = 2.0,
+    ):
+        super().__init__(
+            initial_batch_size=initial_batch_size,
+            factor=factor,
+        )
+
+    def is_gpu_available(self) -> bool:
+        """Check if GPU is available.
+
+        Returns
+        -------
+        bool
+            True if GPU is available
+        """
+        return jax.devices()[0].platform == "gpu"
+
+    def is_oom_error(self, e: Exception) -> bool:
+        """Check if the exception is an OOM error.
+
+        Parameters
+        ----------
+        e : Exception
+            Exception
+        """
+        # several sources think CUSOLVER_STATUS_INTERNAL_ERROR is another out-of-memory error,
+        # such as https://github.com/JuliaGPU/CUDA.jl/issues/1924
+        # (the meaningless error message should be considered as a bug in cusolver)
+        if isinstance(e, (jaxlib.xla_extension.XlaRuntimeError, ValueError)) and (
+            "RESOURCE_EXHAUSTED:" in e.args[0]
+        ):
+            return True
+        return False
diff --git a/deepmd/jax/utils/neighbor_stat.py b/deepmd/jax/utils/neighbor_stat.py
new file mode 100644
index 0000000000..6d9bc872e8
--- /dev/null
+++ b/deepmd/jax/utils/neighbor_stat.py
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from collections.abc import (
+    Iterator,
+)
+from typing import (
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.common import (
+    to_numpy_array,
+)
+from deepmd.dpmodel.utils.neighbor_stat import (
+    NeighborStatOP,
+)
+from deepmd.jax.common import (
+    to_jax_array,
+)
+from deepmd.jax.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat
+
+
+class NeighborStat(BaseNeighborStat):
+    """Neighbor statistics using JAX.
+
+    Parameters
+    ----------
+    ntypes : int
+        The num of atom types
+    rcut : float
+        The cut-off radius
+    mixed_type : bool, optional, default=False
+        Treat all types as a single type.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        mixed_type: bool = False,
+    ) -> None:
+        super().__init__(ntypes, rcut, mixed_type)
+        self.op = NeighborStatOP(ntypes, rcut, mixed_type)
+        self.auto_batch_size = AutoBatchSize()
+
+    def iterator(
+        self, data: DeepmdDataSystem
+    ) -> Iterator[tuple[np.ndarray, float, str]]:
+        """Iterator method for producing neighbor statistics data.
+
+        Yields
+        ------
+        np.ndarray
+            The maximal number of neighbors
+        float
+            The squared minimal distance between two atoms
+        str
+            The directory of the data system
+        """
+        for ii in range(len(data.system_dirs)):
+            for jj in data.data_systems[ii].dirs:
+                data_set = data.data_systems[ii]
+                data_set_data = data_set._load_set(jj)
+                minrr2, max_nnei = self.auto_batch_size.execute_all(
+                    self._execute,
+                    data_set_data["coord"].shape[0],
+                    data_set.get_natoms(),
+                    data_set_data["coord"],
+                    data_set_data["type"],
+                    data_set_data["box"] if data_set.pbc else None,
+                )
+                yield np.max(max_nnei, axis=0), np.min(minrr2), jj
+
+    def _execute(
+        self,
+        coord: np.ndarray,
+        atype: np.ndarray,
+        cell: Optional[np.ndarray],
+    ):
+        """Execute the operation.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+        atype
+            The atom types.
+        cell
+            The cell.
+        """
+        minrr2, max_nnei = self.op(
+            to_jax_array(coord),
+            to_jax_array(atype),
+            to_jax_array(cell),
+        )
+        minrr2 = to_numpy_array(minrr2)
+        max_nnei = to_numpy_array(max_nnei)
+        return minrr2, max_nnei
diff --git a/source/tests/common/dpmodel/test_neighbor_stat.py b/source/tests/common/dpmodel/test_neighbor_stat.py
deleted file mode 100644
index 8dd700f608..0000000000
--- a/source/tests/common/dpmodel/test_neighbor_stat.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import shutil
-import unittest
-
-import dpdata
-import numpy as np
-
-from deepmd.entrypoints.neighbor_stat import (
-    neighbor_stat,
-)
-
-from ...seed import (
-    GLOBAL_SEED,
-)
-
-
-def gen_sys(nframes):
-    rng = np.random.default_rng(GLOBAL_SEED)
-    natoms = 1000
-    data = {}
-    X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j]
-    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T  # + 0.1
-    data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0)
-    data["forces"] = rng.random([nframes, natoms, 3])
-    data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape(
-        1, 3, 3
-    )
-    data["energies"] = rng.random([nframes, 1])
-    data["atom_names"] = ["TYPE"]
-    data["atom_numbs"] = [27]
-    data["atom_types"] = np.repeat(0, 27)
-    return data
-
-
-class TestNeighborStat(unittest.TestCase):
-    def setUp(self):
-        data0 = gen_sys(1)
-        sys0 = dpdata.LabeledSystem()
-        sys0.data = data0
-        sys0.to_deepmd_npy("system_0", set_size=1)
-
-    def tearDown(self):
-        shutil.rmtree("system_0")
-
-    def test_neighbor_stat(self):
-        for rcut in (0.0, 1.0, 2.0, 4.0):
-            for mixed_type in (True, False):
-                with self.subTest(rcut=rcut, mixed_type=mixed_type):
-                    rcut += 1e-3  # prevent numerical errors
-                    min_nbor_dist, max_nbor_size = neighbor_stat(
-                        system="system_0",
-                        rcut=rcut,
-                        type_map=["TYPE", "NO_THIS_TYPE"],
-                        mixed_type=mixed_type,
-                        backend="numpy",
-                    )
-                    upper = np.ceil(rcut) + 1
-                    X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper]
-                    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T
-                    # distance to (0,0,0)
-                    distance = np.linalg.norm(positions, axis=1)
-                    expected_neighbors = np.count_nonzero(
-                        np.logical_and(distance > 0, distance <= rcut)
-                    )
-                    self.assertAlmostEqual(min_nbor_dist, 1.0, 6)
-                    ret = [expected_neighbors]
-                    if not mixed_type:
-                        ret.append(0)
-                    np.testing.assert_array_equal(max_nbor_size, ret)
diff --git a/source/tests/pt/test_neighbor_stat.py b/source/tests/consistent/test_neighbor_stat.py
similarity index 77%
rename from source/tests/pt/test_neighbor_stat.py
rename to source/tests/consistent/test_neighbor_stat.py
index 08ba453d74..55181a6903 100644
--- a/source/tests/pt/test_neighbor_stat.py
+++ b/source/tests/consistent/test_neighbor_stat.py
@@ -12,6 +12,11 @@
 from ..seed import (
     GLOBAL_SEED,
 )
+from .common import (
+    INSTALLED_JAX,
+    INSTALLED_PT,
+    INSTALLED_TF,
+)
 
 
 def gen_sys(nframes):
@@ -42,7 +47,7 @@ def setUp(self):
     def tearDown(self):
         shutil.rmtree("system_0")
 
-    def test_neighbor_stat(self):
+    def run_neighbor_stat(self, backend):
         for rcut in (0.0, 1.0, 2.0, 4.0):
             for mixed_type in (True, False):
                 with self.subTest(rcut=rcut, mixed_type=mixed_type):
@@ -52,7 +57,7 @@ def test_neighbor_stat(self):
                         rcut=rcut,
                         type_map=["TYPE", "NO_THIS_TYPE"],
                         mixed_type=mixed_type,
-                        backend="pytorch",
+                        backend=backend,
                     )
                     upper = np.ceil(rcut) + 1
                     X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper]
@@ -67,3 +72,18 @@ def test_neighbor_stat(self):
                     if not mixed_type:
                         ret.append(0)
                     np.testing.assert_array_equal(max_nbor_size, ret)
+
+    @unittest.skipUnless(INSTALLED_TF, "tensorflow is not installed")
+    def test_neighbor_stat_tf(self):
+        self.run_neighbor_stat("tensorflow")
+
+    @unittest.skipUnless(INSTALLED_PT, "pytorch is not installed")
+    def test_neighbor_stat_pt(self):
+        self.run_neighbor_stat("pytorch")
+
+    def test_neighbor_stat_dp(self):
+        self.run_neighbor_stat("numpy")
+
+    @unittest.skipUnless(INSTALLED_JAX, "jax is not installed")
+    def test_neighbor_stat_jax(self):
+        self.run_neighbor_stat("jax")
diff --git a/source/tests/tf/test_neighbor_stat.py b/source/tests/tf/test_neighbor_stat.py
deleted file mode 100644
index 22b7790958..0000000000
--- a/source/tests/tf/test_neighbor_stat.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import shutil
-import unittest
-
-import dpdata
-import numpy as np
-
-from deepmd.tf.entrypoints.neighbor_stat import (
-    neighbor_stat,
-)
-
-from ..seed import (
-    GLOBAL_SEED,
-)
-
-
-def gen_sys(nframes):
-    rng = np.random.default_rng(GLOBAL_SEED)
-    natoms = 1000
-    data = {}
-    X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j]
-    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T  # + 0.1
-    data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0)
-    data["forces"] = rng.random([nframes, natoms, 3])
-    data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape(
-        1, 3, 3
-    )
-    data["energies"] = rng.random([nframes, 1])
-    data["atom_names"] = ["TYPE"]
-    data["atom_numbs"] = [27]
-    data["atom_types"] = np.repeat(0, 27)
-    return data
-
-
-class TestNeighborStat(unittest.TestCase):
-    def setUp(self):
-        data0 = gen_sys(1)
-        sys0 = dpdata.LabeledSystem()
-        sys0.data = data0
-        sys0.to_deepmd_npy("system_0", set_size=1)
-
-    def tearDown(self):
-        shutil.rmtree("system_0")
-
-    def test_neighbor_stat(self):
-        for rcut in (0.0, 1.0, 2.0, 4.0):
-            for mixed_type in (True, False):
-                with self.subTest(rcut=rcut, mixed_type=mixed_type):
-                    rcut += 1e-3  # prevent numerical errors
-                    min_nbor_dist, max_nbor_size = neighbor_stat(
-                        system="system_0",
-                        rcut=rcut,
-                        type_map=["TYPE", "NO_THIS_TYPE"],
-                        mixed_type=mixed_type,
-                    )
-                    upper = np.ceil(rcut) + 1
-                    X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper]
-                    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T
-                    # distance to (0,0,0)
-                    distance = np.linalg.norm(positions, axis=1)
-                    expected_neighbors = np.count_nonzero(
-                        np.logical_and(distance > 0, distance <= rcut)
-                    )
-                    self.assertAlmostEqual(min_nbor_dist, 1.0, 6)
-                    ret = [expected_neighbors]
-                    if not mixed_type:
-                        ret.append(0)
-                    np.testing.assert_array_equal(max_nbor_size, ret)

From dd36e6c59d0983176013191a444ae0b4491f8b10 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 29 Oct 2024 14:51:35 -0400
Subject: [PATCH 12/14] docs: document JAX backend (#4259)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced support for the JAX backend, expanding user options for
model training and execution.
- Added installation instructions for JAX within the source installation
documentation.
- Included new environment variables related to JAX to enhance
configuration options.

- **Documentation Updates**
- Updated various documentation files to reflect the addition of JAX,
including sections on model commands, supported backends, and
environment variables.
- Enhanced documentation with a visual representation for JAX through an
icon.
- Improved clarity and organization of installation instructions for
DeePMD-kit.
- Updated the README to highlight JAX as a supported backend and reflect
changes in version history.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 README.md                          |  4 +-
 doc/_static/jax.svg                |  1 +
 doc/backend.md                     |  9 +++
 doc/conf.py                        |  1 +
 doc/env.md                         |  1 +
 doc/install/easy-install-dev.md    |  4 +-
 doc/install/easy-install.md        | 96 ++++++++++++++++++++++++++----
 doc/install/install-from-source.md | 15 +++++
 doc/model/sel.md                   |  8 +++
 doc/model/train-energy.md          |  4 +-
 doc/model/train-fitting-dos.md     |  4 +-
 doc/model/train-se-atten.md        |  4 +-
 doc/model/train-se-e2-a.md         |  4 +-
 doc/model/train-se-e2-r.md         |  4 +-
 14 files changed, 131 insertions(+), 28 deletions(-)
 create mode 100644 doc/_static/jax.svg

diff --git a/README.md b/README.md
index e821a29768..55f927d62b 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ For more information, check the [documentation](https://deepmd.readthedocs.io/).
 
 ### Highlighted features
 
-- **interfaced with multiple backends**, including TensorFlow and PyTorch, the most popular deep learning frameworks, making the training process highly automatic and efficient.
+- **interfaced with multiple backends**, including TensorFlow, PyTorch, and JAX, the most popular deep learning frameworks, making the training process highly automatic and efficient.
 - **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, including LAMMPS, i-PI, AMBER, CP2K, GROMACS, OpenMM, and ABUCUS.
 - **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems, including organic molecules, metals, semiconductors, insulators, etc.
 - **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing.
@@ -72,7 +72,7 @@ See [our latest paper](https://doi.org/10.1063/5.0155600) for details of all fea
 
 #### v3
 
-- Multiple backends supported. Add a PyTorch backend.
+- Multiple backends supported. Add PyTorch and JAX backends.
 - The DPA-2 model.
 
 ## Install and use DeePMD-kit
diff --git a/doc/_static/jax.svg b/doc/_static/jax.svg
new file mode 100644
index 0000000000..360a6624d4
--- /dev/null
+++ b/doc/_static/jax.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 451 260.81"><defs></defs><g id="Layer_2" data-name="Layer 2"><g id="Layer_1-2" data-name="Layer 1"><polygon class="cls-1" points="50.5 130.4 25.5 173.71 75.5 173.71 100.5 130.4 50.5 130.4"/><polygon class="cls-1" points="0.5 217.01 25.5 173.71 75.5 173.71 50.5 217.01 0.5 217.01"/><polygon class="cls-1" points="125.5 173.71 75.5 173.71 50.5 217.01 100.5 217.01 125.5 173.71"/><polygon class="cls-1" points="175.5 173.71 125.5 173.71 100.5 217.01 150.5 217.01 175.5 173.71"/><polygon class="cls-1" points="150.5 130.4 125.5 173.71 175.5 173.71 200.5 130.4 150.5 130.4"/><polygon class="cls-1" points="175.5 87.1 150.5 130.4 200.5 130.4 225.5 87.1 175.5 87.1"/><polygon class="cls-1" points="200.5 43.8 175.5 87.1 225.5 87.1 250.5 43.8 200.5 43.8"/><polygon class="cls-1" points="225.5 0.5 200.5 43.8 250.5 43.8 275.5 0.5 225.5 0.5"/><polygon class="cls-2" points="0.5 217.01 25.5 260.31 75.5 260.31 50.5 217.01 0.5 217.01"/><polygon class="cls-2" points="125.5 260.31 75.5 260.31 50.5 217.01 100.5 217.01 125.5 260.31"/><polygon class="cls-2" points="175.5 260.31 125.5 260.31 100.5 217.01 150.5 217.01 175.5 260.31"/><polygon class="cls-3" points="200.5 217.01 175.5 173.71 150.5 217.01 175.5 260.31 200.5 217.01"/><polygon class="cls-3" points="250.5 130.4 225.5 87.1 200.5 130.4 250.5 130.4"/><polygon class="cls-3" points="250.5 43.8 225.5 87.1 250.5 130.4 275.5 87.1 250.5 43.8"/><polygon class="cls-4" points="125.5 173.71 100.5 130.4 75.5 173.71 125.5 173.71"/><polygon class="cls-5" points="250.5 130.4 200.5 130.4 175.5 173.71 225.5 173.71 250.5 130.4"/><polygon class="cls-5" points="300.5 130.4 250.5 130.4 225.5 173.71 275.5 173.71 300.5 130.4"/><polygon class="cls-6" points="350.5 43.8 325.5 0.5 300.5 43.8 325.5 87.1 350.5 43.8"/><polygon class="cls-6" points="375.5 87.1 350.5 43.8 325.5 87.1 350.5 130.4 375.5 87.1"/><polygon class="cls-6" points="400.5 130.4 375.5 87.1 350.5 130.4 375.5 173.71 400.5 130.4"/><polygon class="cls-6" points="425.5 173.71 400.5 130.4 375.5 173.71 400.5 217.01 425.5 173.71"/><polygon class="cls-6" points="450.5 217.01 425.5 173.71 400.5 217.01 425.5 260.31 450.5 217.01"/><polygon class="cls-6" points="425.5 0.5 400.5 43.8 425.5 87.1 450.5 43.8 425.5 0.5"/><polygon class="cls-6" points="375.5 87.1 400.5 43.8 425.5 87.1 400.5 130.4 375.5 87.1"/><polygon class="cls-6" points="350.5 130.4 325.5 173.71 350.5 217.01 375.5 173.71 350.5 130.4"/><polygon class="cls-6" points="325.5 260.31 300.5 217.01 325.5 173.71 350.5 217.01 325.5 260.31"/><polygon class="cls-7" points="275.5 260.31 250.5 217.01 300.5 217.01 325.5 260.31 275.5 260.31"/><polygon class="cls-8" points="225.5 173.71 175.5 173.71 200.5 217.01 250.5 217.01 225.5 173.71"/><polygon class="cls-8" points="275.5 173.71 225.5 173.71 250.5 217.01 275.5 173.71"/><polygon class="cls-8" points="275.5 87.1 300.5 130.4 350.5 130.4 325.5 87.1 275.5 87.1"/><polygon class="cls-8" points="300.5 43.8 250.5 43.8 275.5 87.1 325.5 87.1 300.5 43.8"/><polygon class="cls-8" points="425.5 260.31 400.5 217.01 350.5 217.01 375.5 260.31 425.5 260.31"/><polygon class="cls-8" points="375.5 173.71 350.5 217.01 400.5 217.01 375.5 173.71"/><polygon class="cls-9" points="325.5 0.5 275.5 0.5 250.5 43.8 300.5 43.8 325.5 0.5"/><polygon class="cls-9" points="325.5 173.71 275.5 173.71 250.5 217.01 300.5 217.01 325.5 173.71"/><polygon class="cls-9" points="350.5 130.4 300.5 130.4 275.5 173.71 325.5 173.71 350.5 130.4"/><polygon class="cls-9" points="425.5 0.5 375.5 0.5 350.5 43.8 400.5 43.8 425.5 0.5"/><polygon class="cls-9" points="375.5 87.1 350.5 43.8 400.5 43.8 375.5 87.1"/></g></g></svg>
diff --git a/doc/backend.md b/doc/backend.md
index f6eaf0e45b..cf99eea9cb 100644
--- a/doc/backend.md
+++ b/doc/backend.md
@@ -23,6 +23,15 @@ DeePMD-kit does not use the TensorFlow v2 API but uses the TensorFlow v1 API (`t
 [PyTorch](https://pytorch.org/) 2.0 or above is required.
 While `.pth` and `.pt` are the same in the PyTorch package, they have different meanings in the DeePMD-kit to distinguish the model and the checkpoint.
 
+### JAX {{ jax_icon }}
+
+- Model filename extension: `.xlo`
+- Checkpoint filename extension: `.jax`
+
+[JAX](https://jax.readthedocs.io/) 0.4.33 (which requires Python 3.10 or above) or above is required.
+Both `.xlo` and `.jax` are customized format extensions defined in DeePMD-kit, since JAX has no convention for file extensions.
+Currently, this backend is developed actively, and has no support for training and the C++ interface.
+
 ### DP {{ dpmodel_icon }}
 
 :::{note}
diff --git a/doc/conf.py b/doc/conf.py
index c72e05bf8a..eca7665712 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -168,6 +168,7 @@
 myst_substitutions = {
     "tensorflow_icon": """![TensorFlow](/_static/tensorflow.svg){class=platform-icon}""",
     "pytorch_icon": """![PyTorch](/_static/pytorch.svg){class=platform-icon}""",
+    "jax_icon": """![JAX](/_static/jax.svg){class=platform-icon}""",
     "dpmodel_icon": """![DP](/_static/logo_icon.svg){class=platform-icon}""",
 }
 
diff --git a/doc/env.md b/doc/env.md
index 65a50ff163..3cf42b724a 100644
--- a/doc/env.md
+++ b/doc/env.md
@@ -31,6 +31,7 @@ See [How to control the parallelism of a job](./troubleshooting/howtoset_num_nod
 - If ROCm is used, [ROCm environment variables](https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#environment-variables) can be used to control ROCm devices.
 - {{ tensorflow_icon }} If TensorFlow is used, TensorFlow environment variables can be used.
 - {{ pytorch_icon }} If PyTorch is used, [PyTorch environment variables](https://pytorch.org/docs/stable/torch_environment_variables.html) can be used.
+- {{ jax_icon }} [`JAX_PLATFORMS`](https://jax.readthedocs.io/en/latest/faq.html#controlling-data-and-computation-placement-on-devices) and [`XLA_FLAGS`](https://jax.readthedocs.io/en/latest/gpu_performance_tips.html#xla-performance-flags) are commonly used.
 
 ## Python interface only
 
diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md
index bb68272ace..54309a8582 100644
--- a/doc/install/easy-install-dev.md
+++ b/doc/install/easy-install-dev.md
@@ -16,14 +16,12 @@ For CUDA 11.8 support, use the `devel_cu11` tag.
 
 ## Install with pip
 
-Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`:
+Follow [the documentation for the stable version](easy-install.md#install-python-interface-with-pip), but add `--pre` and `--extra-index-url` options like below:
 
 ```sh
 pip install -U --pre deepmd-kit[gpu,cu12,lmp,torch] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple
 ```
 
-`cu12` and `lmp` are optional, which is the same as the stable version.
-
 ## Download pre-compiled C Library {{ tensorflow_icon }}
 
 :::{note}
diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
index 99962d08b8..c2260b58b6 100644
--- a/doc/install/easy-install.md
+++ b/doc/install/easy-install.md
@@ -104,44 +104,114 @@ docker pull ghcr.io/deepmodeling/deepmd-kit:2.2.8_cuda12.0_gpu
 
 ## Install Python interface with pip
 
-If you have no existing TensorFlow installed, you can use `pip` to install the pre-built package of the Python interface with CUDA 12 supported:
+[Create a new environment](https://docs.deepmodeling.com/faq/conda.html#how-to-create-a-new-conda-pip-environment), and then execute the following command:
+
+:::::::{tab-set}
+
+::::::{tab-item} TensorFlow {{ tensorflow_icon }}
+
+:::::{tab-set}
+
+::::{tab-item} CUDA 12
 
 ```bash
-pip install deepmd-kit[gpu,cu12,torch]
+pip install deepmd-kit[gpu,cu12]
 ```
 
 `cu12` is required only when CUDA Toolkit and cuDNN were not installed.
 
-To install the package built against CUDA 11.8, use
+::::
+
+::::{tab-item} CUDA 11
 
 ```bash
-pip install torch --index-url https://download.pytorch.org/whl/cu118
 pip install deepmd-kit-cu11[gpu,cu11]
 ```
 
-Or install the CPU version without CUDA supported:
+::::
+
+::::{tab-item} CPU
 
 ```bash
-pip install torch --index-url https://download.pytorch.org/whl/cpu
 pip install deepmd-kit[cpu]
 ```
 
+::::
+
+:::::
+
 [The LAMMPS module](../third-party/lammps-command.md) and [the i-PI driver](../third-party/ipi.md) are only provided on Linux and macOS for the TensorFlow backend. To install LAMMPS and/or i-PI, add `lmp` and/or `ipi` to extras:
 
 ```bash
-pip install deepmd-kit[gpu,cu12,torch,lmp,ipi]
+pip install deepmd-kit[gpu,cu12,lmp,ipi]
 ```
 
 MPICH is required for parallel running.
 
-:::{Warning}
-When installing from pip, only the TensorFlow {{ tensorflow_icon }} backend is supported with LAMMPS and i-PI.
-:::
+::::::
+
+::::::{tab-item} PyTorch {{ pytorch_icon }}
+
+:::::{tab-set}
+
+::::{tab-item} CUDA 12
+
+```bash
+pip install deepmd-kit[torch]
+```
+
+::::
+
+::::{tab-item} CUDA 11.8
+
+```bash
+pip install torch --index-url https://download.pytorch.org/whl/cu118
+pip install deepmd-kit-cu11
+```
+
+::::
+
+::::{tab-item} CPU
+
+```bash
+pip install torch --index-url https://download.pytorch.org/whl/cpu
+pip install deepmd-kit
+```
+
+::::
+
+:::::
+
+::::::
+
+::::::{tab-item} JAX {{ jax_icon }}
+
+:::::{tab-set}
+
+::::{tab-item} CUDA 12
+
+```bash
+pip install deepmd-kit[jax] jax[cuda12]
+```
+
+::::
+
+::::{tab-item} CPU
+
+```bash
+pip install deepmd-kit[jax]
+```
+
+::::
+
+:::::
+
+::::::
+
+:::::::
 
-It is suggested to install the package into an isolated environment.
 The supported platform includes Linux x86-64 and aarch64 with GNU C Library 2.28 or above, macOS x86-64 and arm64, and Windows x86-64.
-A specific version of TensorFlow and PyTorch which is compatible with DeePMD-kit will be also installed.
 
 :::{Warning}
-If your platform is not supported, or you want to build against the installed TensorFlow, or you want to enable ROCM support, please [build from source](install-from-source.md).
+If your platform is not supported, or you want to build against the installed backends, or you want to enable ROCM support, please [build from source](install-from-source.md).
 :::
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
index 07239cd3b7..4a0a104b7e 100644
--- a/doc/install/install-from-source.md
+++ b/doc/install/install-from-source.md
@@ -78,6 +78,21 @@ One can also [use conda](https://docs.deepmodeling.org/faq/conda.html) to instal
 
 :::
 
+:::{tab-item} JAX {{ jax_icon }}
+
+To install [JAX AI Stack](https://github.com/jax-ml/jax-ai-stack), run
+
+```sh
+pip install jax-ai-stack
+```
+
+One can also install packages in JAX AI Stack manually.
+Follow [JAX documentation](https://jax.readthedocs.io/en/latest/installation.html) to install JAX built against different CUDA versions or without CUDA.
+
+One can also [use conda](https://docs.deepmodeling.org/faq/conda.html) to install JAX from [conda-forge](https://conda-forge.org).
+
+:::
+
 ::::
 
 It is important that every time a new shell is started and one wants to use `DeePMD-kit`, the virtual environment should be activated by
diff --git a/doc/model/sel.md b/doc/model/sel.md
index 4908954618..babea1d463 100644
--- a/doc/model/sel.md
+++ b/doc/model/sel.md
@@ -24,6 +24,14 @@ dp --pt neighbor-stat -s data -r 6.0 -t O H
 
 :::
 
+:::{tab-item} JAX {{ jax_icon }}
+
+```sh
+dp --jax neighbor-stat -s data -r 6.0 -t O H
+```
+
+:::
+
 ::::
 
 where `data` is the directory of data, `6.0` is the cutoff radius, and `O` and `H` is the type map. The program will give the `max_nbor_size`. For example, `max_nbor_size` of the water example is `[38, 72]`, meaning an atom may have 38 O neighbors and 72 H neighbors in the training data.
diff --git a/doc/model/train-energy.md b/doc/model/train-energy.md
index 75d31d4670..484564b14f 100644
--- a/doc/model/train-energy.md
+++ b/doc/model/train-energy.md
@@ -1,7 +1,7 @@
-# Fit energy {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+# Fit energy {{ tensorflow_icon }} {{ pytorch_icon }} {{ jax_icon }} {{ dpmodel_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
 :::
 
 In this section, we will take `$deepmd_source_dir/examples/water/se_e2_a/input.json` as an example of the input file.
diff --git a/doc/model/train-fitting-dos.md b/doc/model/train-fitting-dos.md
index d04dbc669c..fb4a3677e5 100644
--- a/doc/model/train-fitting-dos.md
+++ b/doc/model/train-fitting-dos.md
@@ -1,7 +1,7 @@
-# Fit electronic density of states (DOS) {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+# Fit electronic density of states (DOS) {{ tensorflow_icon }} {{ pytorch_icon }} {{ jax_icon }} {{ dpmodel_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
 :::
 
 Here we present an API to DeepDOS model, which can be used to fit electronic density of state (DOS) (which is a vector).
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
index bebce78365..3e88a4e950 100644
--- a/doc/model/train-se-atten.md
+++ b/doc/model/train-se-atten.md
@@ -1,7 +1,7 @@
-# Descriptor `"se_atten"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+# Descriptor `"se_atten"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ jax_icon }} {{ dpmodel_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
 :::
 
 ## DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular Simulation
diff --git a/doc/model/train-se-e2-a.md b/doc/model/train-se-e2-a.md
index 81b95399e0..d4a4510a31 100644
--- a/doc/model/train-se-e2-a.md
+++ b/doc/model/train-se-e2-a.md
@@ -1,7 +1,7 @@
-# Descriptor `"se_e2_a"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+# Descriptor `"se_e2_a"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ jax_icon }} {{ dpmodel_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
 :::
 
 The notation of `se_e2_a` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from all information (both angular and radial) of atomic configurations. The `e2` stands for the embedding with two-atoms information. This descriptor was described in detail in [the DeepPot-SE paper](https://arxiv.org/abs/1805.09003).
diff --git a/doc/model/train-se-e2-r.md b/doc/model/train-se-e2-r.md
index 316bde43b4..baff6d6331 100644
--- a/doc/model/train-se-e2-r.md
+++ b/doc/model/train-se-e2-r.md
@@ -1,7 +1,7 @@
-# Descriptor `"se_e2_r"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+# Descriptor `"se_e2_r"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ jax_icon }} {{ dpmodel_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
 :::
 
 The notation of `se_e2_r` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from the radial information of atomic configurations. The `e2` stands for the embedding with two-atom information.

From 159361dd7b1335315d280786326ff02e9ed58b08 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 29 Oct 2024 14:52:08 -0400
Subject: [PATCH 13/14] feat(jax): force & virial (#4251)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced new methods `forward_common_atomic` in multiple classes to
enhance atomic model predictions and derivative calculations.
- Added a new function `get_leading_dims` for better handling of output
dimensions.
- Added a new function `scatter_sum` for performing reduction operations
on tensors.
- Updated test methods to include flexible handling of results with the
new `SKIP_FLAG` variable.

- **Bug Fixes**
- Improved numerical stability in calculations by ensuring small values
are handled appropriately.

- **Tests**
- Expanded test outputs to include additional data like forces and
virials for more comprehensive testing.
- Enhanced backend handling in tests to accommodate new return values
based on backend availability.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/dpmodel/model/make_model.py         |  30 +++++-
 deepmd/dpmodel/model/transform_output.py   |  84 +++++++++++++++--
 deepmd/dpmodel/utils/env_mat.py            |   4 +-
 deepmd/jax/common.py                       |  10 ++
 deepmd/jax/env.py                          |   1 +
 deepmd/jax/model/base_model.py             | 101 +++++++++++++++++++++
 deepmd/jax/model/ener_model.py             |  26 ++++++
 source/tests/consistent/common.py          |   4 +
 source/tests/consistent/model/common.py    |   2 +-
 source/tests/consistent/model/test_ener.py |  39 +++++++-
 10 files changed, 284 insertions(+), 17 deletions(-)

diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py
index afe2eaffb6..e36182e712 100644
--- a/deepmd/dpmodel/model/make_model.py
+++ b/deepmd/dpmodel/model/make_model.py
@@ -222,22 +222,42 @@ def call_lower(
                 extended_coord, fparam=fparam, aparam=aparam
             )
             del extended_coord, fparam, aparam
-            atomic_ret = self.atomic_model.forward_common_atomic(
+            model_predict = self.forward_common_atomic(
                 cc_ext,
                 extended_atype,
                 nlist,
                 mapping=mapping,
                 fparam=fp,
                 aparam=ap,
+                do_atomic_virial=do_atomic_virial,
+            )
+            model_predict = self.output_type_cast(model_predict, input_prec)
+            return model_predict
+
+        def forward_common_atomic(
+            self,
+            extended_coord: np.ndarray,
+            extended_atype: np.ndarray,
+            nlist: np.ndarray,
+            mapping: Optional[np.ndarray] = None,
+            fparam: Optional[np.ndarray] = None,
+            aparam: Optional[np.ndarray] = None,
+            do_atomic_virial: bool = False,
+        ):
+            atomic_ret = self.atomic_model.forward_common_atomic(
+                extended_coord,
+                extended_atype,
+                nlist,
+                mapping=mapping,
+                fparam=fparam,
+                aparam=aparam,
             )
-            model_predict = fit_output_to_model_output(
+            return fit_output_to_model_output(
                 atomic_ret,
                 self.atomic_output_def(),
-                cc_ext,
+                extended_coord,
                 do_atomic_virial=do_atomic_virial,
             )
-            model_predict = self.output_type_cast(model_predict, input_prec)
-            return model_predict
 
         forward_lower = call_lower
 
diff --git a/deepmd/dpmodel/model/transform_output.py b/deepmd/dpmodel/model/transform_output.py
index 107455a6d5..af1429ce25 100644
--- a/deepmd/dpmodel/model/transform_output.py
+++ b/deepmd/dpmodel/model/transform_output.py
@@ -9,6 +9,7 @@
 from deepmd.dpmodel.output_def import (
     FittingOutputDef,
     ModelOutputDef,
+    OutputVariableDef,
     get_deriv_name,
     get_reduce_name,
 )
@@ -47,6 +48,28 @@ def fit_output_to_model_output(
     return model_ret
 
 
+def get_leading_dims(
+    vv: np.ndarray,
+    vdef: OutputVariableDef,
+):
+    """Get the dimensions of nf x nloc.
+
+    Parameters
+    ----------
+    vv : np.ndarray
+        The input array from which to compute the leading dimensions.
+    vdef : OutputVariableDef
+        The output variable definition containing the shape to exclude from `vv`.
+
+    Returns
+    -------
+    list
+        A list of leading dimensions of `vv`, excluding the last `len(vdef.shape)` dimensions.
+    """
+    vshape = vv.shape
+    return list(vshape[: (len(vshape) - len(vdef.shape))])
+
+
 def communicate_extended_output(
     model_ret: dict[str, np.ndarray],
     model_output_def: ModelOutputDef,
@@ -57,6 +80,7 @@ def communicate_extended_output(
     local and ghost (extended) atoms to local atoms.
 
     """
+    xp = array_api_compat.get_namespace(mapping)
     new_ret = {}
     for kk in model_output_def.keys_outp():
         vv = model_ret[kk]
@@ -65,15 +89,63 @@ def communicate_extended_output(
         if vdef.reducible:
             kk_redu = get_reduce_name(kk)
             new_ret[kk_redu] = model_ret[kk_redu]
+            kk_derv_r, kk_derv_c = get_deriv_name(kk)
+            mldims = list(mapping.shape)
+            vldims = get_leading_dims(vv, vdef)
             if vdef.r_differentiable:
-                kk_derv_r, kk_derv_c = get_deriv_name(kk)
-                # name holders
-                new_ret[kk_derv_r] = None
+                if model_ret[kk_derv_r] is not None:
+                    derv_r_ext_dims = list(vdef.shape) + [3]  # noqa:RUF005
+                    mapping = xp.reshape(mapping, (mldims + [1] * len(derv_r_ext_dims)))
+                    mapping = xp.tile(mapping, [1] * len(mldims) + derv_r_ext_dims)
+                    force = xp.zeros(vldims + derv_r_ext_dims, dtype=vv.dtype)
+                    # jax only
+                    if array_api_compat.is_jax_array(force):
+                        from deepmd.jax.common import (
+                            scatter_sum,
+                        )
+
+                        force = scatter_sum(
+                            force,
+                            1,
+                            mapping,
+                            model_ret[kk_derv_r],
+                        )
+                    else:
+                        raise NotImplementedError("Only JAX arrays are supported.")
+                    new_ret[kk_derv_r] = force
+                else:
+                    # name holders
+                    new_ret[kk_derv_r] = None
             if vdef.c_differentiable:
                 assert vdef.r_differentiable
-                kk_derv_r, kk_derv_c = get_deriv_name(kk)
-                new_ret[kk_derv_c] = None
-                new_ret[kk_derv_c + "_redu"] = None
+                if model_ret[kk_derv_c] is not None:
+                    derv_c_ext_dims = list(vdef.shape) + [9]  # noqa:RUF005
+                    mapping = xp.tile(
+                        mapping, [1] * (len(mldims) + len(vdef.shape)) + [3]
+                    )
+                    virial = xp.zeros(
+                        vldims + derv_c_ext_dims,
+                        dtype=vv.dtype,
+                    )
+                    # jax only
+                    if array_api_compat.is_jax_array(virial):
+                        from deepmd.jax.common import (
+                            scatter_sum,
+                        )
+
+                        virial = scatter_sum(
+                            virial,
+                            1,
+                            mapping,
+                            model_ret[kk_derv_c],
+                        )
+                    else:
+                        raise NotImplementedError("Only JAX arrays are supported.")
+                    new_ret[kk_derv_c] = virial
+                    new_ret[kk_derv_c + "_redu"] = xp.sum(new_ret[kk_derv_c], axis=1)
+                else:
+                    new_ret[kk_derv_c] = None
+                    new_ret[kk_derv_c + "_redu"] = None
                 if not do_atomic_virial:
                     # pop atomic virial, because it is not correctly calculated.
                     new_ret.pop(kk_derv_c)
diff --git a/deepmd/dpmodel/utils/env_mat.py b/deepmd/dpmodel/utils/env_mat.py
index f4bc333a03..aa8520202e 100644
--- a/deepmd/dpmodel/utils/env_mat.py
+++ b/deepmd/dpmodel/utils/env_mat.py
@@ -61,7 +61,9 @@ def _make_env_mat(
     # nf x nloc x nnei x 3
     diff = coord_r - coord_l
     # nf x nloc x nnei
-    length = xp.linalg.vector_norm(diff, axis=-1, keepdims=True)
+    # the grad of JAX vector_norm is NaN at x=0
+    diff_ = xp.where(xp.abs(diff) < 1e-30, xp.full_like(diff, 1e-30), diff)
+    length = xp.linalg.vector_norm(diff_, axis=-1, keepdims=True)
     # for index 0 nloc atom
     length = length + xp.astype(~xp.expand_dims(mask, axis=-1), length.dtype)
     t0 = 1 / (length + protection)
diff --git a/deepmd/jax/common.py b/deepmd/jax/common.py
index f372e97eb5..59f36d11ad 100644
--- a/deepmd/jax/common.py
+++ b/deepmd/jax/common.py
@@ -95,3 +95,13 @@ def __dlpack__(self, *args, **kwargs):
 
     def __dlpack_device__(self, *args, **kwargs):
         return self.value.__dlpack_device__(*args, **kwargs)
+
+
+def scatter_sum(input, dim, index: jnp.ndarray, src: jnp.ndarray) -> jnp.ndarray:
+    """Reduces all values from the src tensor to the indices specified in the index tensor."""
+    idx = jnp.arange(input.size, dtype=jnp.int64).reshape(input.shape)
+    new_idx = jnp.take_along_axis(idx, index, axis=dim).ravel()
+    shape = input.shape
+    input = input.ravel()
+    input = input.at[new_idx].add(src.ravel())
+    return input.reshape(shape)
diff --git a/deepmd/jax/env.py b/deepmd/jax/env.py
index 5a5a7f6bf0..ee11e17125 100644
--- a/deepmd/jax/env.py
+++ b/deepmd/jax/env.py
@@ -10,6 +10,7 @@
 )
 
 jax.config.update("jax_enable_x64", True)
+# jax.config.update("jax_debug_nans", True)
 
 __all__ = [
     "jax",
diff --git a/deepmd/jax/model/base_model.py b/deepmd/jax/model/base_model.py
index fee4855da3..8631c85d16 100644
--- a/deepmd/jax/model/base_model.py
+++ b/deepmd/jax/model/base_model.py
@@ -1,6 +1,107 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
 from deepmd.dpmodel.model.base_model import (
     make_base_model,
 )
+from deepmd.dpmodel.output_def import (
+    get_deriv_name,
+    get_reduce_name,
+)
+from deepmd.jax.env import (
+    jax,
+    jnp,
+)
 
 BaseModel = make_base_model()
+
+
+def forward_common_atomic(
+    self,
+    extended_coord: jnp.ndarray,
+    extended_atype: jnp.ndarray,
+    nlist: jnp.ndarray,
+    mapping: Optional[jnp.ndarray] = None,
+    fparam: Optional[jnp.ndarray] = None,
+    aparam: Optional[jnp.ndarray] = None,
+    do_atomic_virial: bool = False,
+):
+    atomic_ret = self.atomic_model.forward_common_atomic(
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping=mapping,
+        fparam=fparam,
+        aparam=aparam,
+    )
+    atomic_output_def = self.atomic_output_def()
+    model_predict = {}
+    for kk, vv in atomic_ret.items():
+        model_predict[kk] = vv
+        vdef = atomic_output_def[kk]
+        shap = vdef.shape
+        atom_axis = -(len(shap) + 1)
+        if vdef.reducible:
+            kk_redu = get_reduce_name(kk)
+            model_predict[kk_redu] = jnp.sum(vv, axis=atom_axis)
+            kk_derv_r, kk_derv_c = get_deriv_name(kk)
+            if vdef.c_differentiable:
+
+                def eval_output(
+                    cc_ext,
+                    extended_atype,
+                    nlist,
+                    mapping,
+                    fparam,
+                    aparam,
+                    *,
+                    _kk=kk,
+                    _atom_axis=atom_axis,
+                ):
+                    atomic_ret = self.atomic_model.forward_common_atomic(
+                        cc_ext[None, ...],
+                        extended_atype[None, ...],
+                        nlist[None, ...],
+                        mapping=mapping[None, ...] if mapping is not None else None,
+                        fparam=fparam[None, ...] if fparam is not None else None,
+                        aparam=aparam[None, ...] if aparam is not None else None,
+                    )
+                    return jnp.sum(atomic_ret[_kk][0], axis=_atom_axis)
+
+                # extended_coord: [nf, nall, 3]
+                # ff: [nf, *def, nall, 3]
+                ff = -jax.vmap(jax.jacrev(eval_output, argnums=0))(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                    mapping,
+                    fparam,
+                    aparam,
+                )
+                # extended_force: [nf, nall, *def, 3]
+                def_ndim = len(vdef.shape)
+                extended_force = jnp.transpose(
+                    ff, [0, def_ndim + 1, *range(1, def_ndim + 1), def_ndim + 2]
+                )
+
+                model_predict[kk_derv_r] = extended_force
+            if vdef.c_differentiable:
+                assert vdef.r_differentiable
+                # avr: [nf, *def, nall, 3, 3]
+                avr = jnp.einsum("f...ai,faj->f...aij", ff, extended_coord)
+                # avr: [nf, *def, nall, 9]
+                avr = jnp.reshape(avr, [*ff.shape[:-1], 9])
+                # extended_virial: [nf, nall, *def, 9]
+                extended_virial = jnp.transpose(
+                    avr, [0, def_ndim + 1, *range(1, def_ndim + 1), def_ndim + 2]
+                )
+
+                # the correction sums to zero, which does not contribute to global virial
+                # cannot jit
+                # if do_atomic_virial:
+                #     raise NotImplementedError("Atomic virial is not implemented yet.")
+                # to [...,3,3] -> [...,9]
+                model_predict[kk_derv_c] = extended_virial
+    return model_predict
diff --git a/deepmd/jax/model/ener_model.py b/deepmd/jax/model/ener_model.py
index 79c5a29e88..b1bf568544 100644
--- a/deepmd/jax/model/ener_model.py
+++ b/deepmd/jax/model/ener_model.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Any,
+    Optional,
 )
 
 from deepmd.dpmodel.model import EnergyModel as EnergyModelDP
@@ -10,8 +11,12 @@
 from deepmd.jax.common import (
     flax_module,
 )
+from deepmd.jax.env import (
+    jnp,
+)
 from deepmd.jax.model.base_model import (
     BaseModel,
+    forward_common_atomic,
 )
 
 
@@ -22,3 +27,24 @@ def __setattr__(self, name: str, value: Any) -> None:
         if name == "atomic_model":
             value = DPAtomicModel.deserialize(value.serialize())
         return super().__setattr__(name, value)
+
+    def forward_common_atomic(
+        self,
+        extended_coord: jnp.ndarray,
+        extended_atype: jnp.ndarray,
+        nlist: jnp.ndarray,
+        mapping: Optional[jnp.ndarray] = None,
+        fparam: Optional[jnp.ndarray] = None,
+        aparam: Optional[jnp.ndarray] = None,
+        do_atomic_virial: bool = False,
+    ):
+        return forward_common_atomic(
+            self,
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
diff --git a/source/tests/consistent/common.py b/source/tests/consistent/common.py
index 885662c766..bcad7c4502 100644
--- a/source/tests/consistent/common.py
+++ b/source/tests/consistent/common.py
@@ -69,6 +69,8 @@
     "INSTALLED_ARRAY_API_STRICT",
 ]
 
+SKIP_FLAG = object()
+
 
 class CommonTest(ABC):
     data: ClassVar[dict]
@@ -362,6 +364,8 @@ def test_dp_consistent_with_ref(self):
         data2 = dp_obj.serialize()
         np.testing.assert_equal(data1, data2)
         for rr1, rr2 in zip(ret1, ret2):
+            if rr1 is SKIP_FLAG or rr2 is SKIP_FLAG:
+                continue
             np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
             assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
 
diff --git a/source/tests/consistent/model/common.py b/source/tests/consistent/model/common.py
index 4112e09cff..11940d9bdf 100644
--- a/source/tests/consistent/model/common.py
+++ b/source/tests/consistent/model/common.py
@@ -51,7 +51,7 @@ def build_tf_model(self, obj, natoms, coords, atype, box, suffix):
             {},
             suffix=suffix,
         )
-        return [ret["energy"], ret["atom_ener"]], {
+        return [ret["energy"], ret["atom_ener"], ret["force"], ret["virial"]], {
             t_coord: coords,
             t_type: atype,
             t_natoms: natoms,
diff --git a/source/tests/consistent/model/test_ener.py b/source/tests/consistent/model/test_ener.py
index 78a2aac703..2a358ba7e0 100644
--- a/source/tests/consistent/model/test_ener.py
+++ b/source/tests/consistent/model/test_ener.py
@@ -16,6 +16,7 @@
     INSTALLED_JAX,
     INSTALLED_PT,
     INSTALLED_TF,
+    SKIP_FLAG,
     CommonTest,
     parameterized,
 )
@@ -94,6 +95,21 @@ def data(self) -> dict:
     jax_class = EnergyModelJAX
     args = model_args()
 
+    def get_reference_backend(self):
+        """Get the reference backend.
+
+        We need a reference backend that can reproduce forces.
+        """
+        if not self.skip_pt:
+            return self.RefBackend.PT
+        if not self.skip_tf:
+            return self.RefBackend.TF
+        if not self.skip_jax:
+            return self.RefBackend.JAX
+        if not self.skip_dp:
+            return self.RefBackend.DP
+        raise ValueError("No available reference")
+
     @property
     def skip_tf(self):
         return (
@@ -195,11 +211,26 @@ def eval_jax(self, jax_obj: Any) -> Any:
     def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         # shape not matched. ravel...
         if backend is self.RefBackend.DP:
-            return (ret["energy_redu"].ravel(), ret["energy"].ravel())
+            return (
+                ret["energy_redu"].ravel(),
+                ret["energy"].ravel(),
+                SKIP_FLAG,
+                SKIP_FLAG,
+            )
         elif backend is self.RefBackend.PT:
-            return (ret["energy"].ravel(), ret["atom_energy"].ravel())
+            return (
+                ret["energy"].ravel(),
+                ret["atom_energy"].ravel(),
+                ret["force"].ravel(),
+                ret["virial"].ravel(),
+            )
         elif backend is self.RefBackend.TF:
-            return (ret[0].ravel(), ret[1].ravel())
+            return (ret[0].ravel(), ret[1].ravel(), ret[2].ravel(), ret[3].ravel())
         elif backend is self.RefBackend.JAX:
-            return (ret["energy_redu"].ravel(), ret["energy"].ravel())
+            return (
+                ret["energy_redu"].ravel(),
+                ret["energy"].ravel(),
+                ret["energy_derv_r"].ravel(),
+                ret["energy_derv_c_redu"].ravel(),
+            )
         raise ValueError(f"Unknown backend: {backend}")

From d165fee9d93626b314f734097c01f2ba5ee4a099 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 29 Oct 2024 20:50:28 -0400
Subject: [PATCH 14/14] feat(jax): freeze to StableXLO & DeepEval (#4256)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

## Release Notes

- **New Features**
- Introduced support for `.hlo` file extensions in model loading and
saving functionalities.
- Added a `DeepEval` class for enhanced deep learning model evaluation
in molecular simulations.
- Implemented a new `HLO` class for managing model predictions within a
deep learning framework.

- **Bug Fixes**
- Improved handling of suffixes and backend names in test cases for
better consistency.

- **Documentation**
	- Added SPDX license identifier to relevant files.

- **Chores**
	- Refactored internal methods to streamline model prediction processes.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 deepmd/backend/jax.py                 |  10 +-
 deepmd/dpmodel/descriptor/se_e2_a.py  |   2 +-
 deepmd/dpmodel/model/make_model.py    | 130 +++++++--
 deepmd/dpmodel/utils/serialization.py |   4 +-
 deepmd/jax/env.py                     |   2 +
 deepmd/jax/infer/__init__.py          |   1 +
 deepmd/jax/infer/deep_eval.py         | 391 ++++++++++++++++++++++++++
 deepmd/jax/model/hlo.py               | 311 ++++++++++++++++++++
 deepmd/jax/utils/serialization.py     |  50 ++++
 source/tests/consistent/io/test_io.py |  15 +-
 10 files changed, 875 insertions(+), 41 deletions(-)
 create mode 100644 deepmd/jax/infer/__init__.py
 create mode 100644 deepmd/jax/infer/deep_eval.py
 create mode 100644 deepmd/jax/model/hlo.py

diff --git a/deepmd/backend/jax.py b/deepmd/backend/jax.py
index 7131f4d534..cfb0936bda 100644
--- a/deepmd/backend/jax.py
+++ b/deepmd/backend/jax.py
@@ -34,11 +34,11 @@ class JAXBackend(Backend):
     features: ClassVar[Backend.Feature] = (
         Backend.Feature.IO
         | Backend.Feature.ENTRY_POINT
-        # | Backend.Feature.DEEP_EVAL
+        | Backend.Feature.DEEP_EVAL
         | Backend.Feature.NEIGHBOR_STAT
     )
     """The features of the backend."""
-    suffixes: ClassVar[list[str]] = [".jax"]
+    suffixes: ClassVar[list[str]] = [".hlo", ".jax"]
     """The suffixes of the backend."""
 
     def is_available(self) -> bool:
@@ -71,7 +71,11 @@ def deep_eval(self) -> type["DeepEvalBackend"]:
         type[DeepEvalBackend]
             The Deep Eval backend of the backend.
         """
-        raise NotImplementedError
+        from deepmd.jax.infer.deep_eval import (
+            DeepEval,
+        )
+
+        return DeepEval
 
     @property
     def neighbor_stat(self) -> type["NeighborStat"]:
diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
index feebe57af7..6c0efb94d4 100644
--- a/deepmd/dpmodel/descriptor/se_e2_a.py
+++ b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -555,7 +555,7 @@ def call(
             coord_ext, atype_ext, nlist, self.davg, self.dstd
         )
         nf, nloc, nnei, _ = rr.shape
-        sec = xp.asarray(self.sel_cumsum)
+        sec = self.sel_cumsum
 
         ng = self.neuron[-1]
         gr = xp.zeros([nf * nloc, ng, 4], dtype=self.dstd.dtype)
diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py
index e36182e712..b6379573e1 100644
--- a/deepmd/dpmodel/model/make_model.py
+++ b/deepmd/dpmodel/model/make_model.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Callable,
     Optional,
 )
 
@@ -39,6 +40,95 @@
 )
 
 
+def model_call_from_call_lower(
+    *,  # enforce keyword-only arguments
+    call_lower: Callable[
+        [
+            np.ndarray,
+            np.ndarray,
+            np.ndarray,
+            Optional[np.ndarray],
+            Optional[np.ndarray],
+            bool,
+        ],
+        dict[str, np.ndarray],
+    ],
+    rcut: float,
+    sel: list[int],
+    mixed_types: bool,
+    model_output_def: ModelOutputDef,
+    coord: np.ndarray,
+    atype: np.ndarray,
+    box: Optional[np.ndarray] = None,
+    fparam: Optional[np.ndarray] = None,
+    aparam: Optional[np.ndarray] = None,
+    do_atomic_virial: bool = False,
+):
+    """Return model prediction from lower interface.
+
+    Parameters
+    ----------
+    coord
+        The coordinates of the atoms.
+        shape: nf x (nloc x 3)
+    atype
+        The type of atoms. shape: nf x nloc
+    box
+        The simulation box. shape: nf x 9
+    fparam
+        frame parameter. nf x ndf
+    aparam
+        atomic parameter. nf x nloc x nda
+    do_atomic_virial
+        If calculate the atomic virial.
+
+    Returns
+    -------
+    ret_dict
+        The result dict of type dict[str,np.ndarray].
+        The keys are defined by the `ModelOutputDef`.
+
+    """
+    nframes, nloc = atype.shape[:2]
+    cc, bb, fp, ap = coord, box, fparam, aparam
+    del coord, box, fparam, aparam
+    if bb is not None:
+        coord_normalized = normalize_coord(
+            cc.reshape(nframes, nloc, 3),
+            bb.reshape(nframes, 3, 3),
+        )
+    else:
+        coord_normalized = cc.copy()
+    extended_coord, extended_atype, mapping = extend_coord_with_ghosts(
+        coord_normalized, atype, bb, rcut
+    )
+    nlist = build_neighbor_list(
+        extended_coord,
+        extended_atype,
+        nloc,
+        rcut,
+        sel,
+        distinguish_types=not mixed_types,
+    )
+    extended_coord = extended_coord.reshape(nframes, -1, 3)
+    model_predict_lower = call_lower(
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping,
+        fparam=fp,
+        aparam=ap,
+        do_atomic_virial=do_atomic_virial,
+    )
+    model_predict = communicate_extended_output(
+        model_predict_lower,
+        model_output_def,
+        mapping,
+        do_atomic_virial=do_atomic_virial,
+    )
+    return model_predict
+
+
 def make_model(T_AtomicModel: type[BaseAtomicModel]):
     """Make a model as a derived class of an atomic model.
 
@@ -130,45 +220,23 @@ def call(
                 The keys are defined by the `ModelOutputDef`.
 
             """
-            nframes, nloc = atype.shape[:2]
             cc, bb, fp, ap, input_prec = self.input_type_cast(
                 coord, box=box, fparam=fparam, aparam=aparam
             )
             del coord, box, fparam, aparam
-            if bb is not None:
-                coord_normalized = normalize_coord(
-                    cc.reshape(nframes, nloc, 3),
-                    bb.reshape(nframes, 3, 3),
-                )
-            else:
-                coord_normalized = cc.copy()
-            extended_coord, extended_atype, mapping = extend_coord_with_ghosts(
-                coord_normalized, atype, bb, self.get_rcut()
-            )
-            nlist = build_neighbor_list(
-                extended_coord,
-                extended_atype,
-                nloc,
-                self.get_rcut(),
-                self.get_sel(),
-                distinguish_types=not self.mixed_types(),
-            )
-            extended_coord = extended_coord.reshape(nframes, -1, 3)
-            model_predict_lower = self.call_lower(
-                extended_coord,
-                extended_atype,
-                nlist,
-                mapping,
+            model_predict = model_call_from_call_lower(
+                call_lower=self.call_lower,
+                rcut=self.get_rcut(),
+                sel=self.get_sel(),
+                mixed_types=self.mixed_types(),
+                model_output_def=self.model_output_def(),
+                coord=cc,
+                atype=atype,
+                box=bb,
                 fparam=fp,
                 aparam=ap,
                 do_atomic_virial=do_atomic_virial,
             )
-            model_predict = communicate_extended_output(
-                model_predict_lower,
-                self.model_output_def(),
-                mapping,
-                do_atomic_virial=do_atomic_virial,
-            )
             model_predict = self.output_type_cast(model_predict, input_prec)
             return model_predict
 
diff --git a/deepmd/dpmodel/utils/serialization.py b/deepmd/dpmodel/utils/serialization.py
index 5e70ec6769..37702cc9f0 100644
--- a/deepmd/dpmodel/utils/serialization.py
+++ b/deepmd/dpmodel/utils/serialization.py
@@ -90,7 +90,7 @@ def save_dp_model(filename: str, model_dict: dict) -> None:
         # use UTC+0 time
         "time": str(datetime.datetime.now(tz=datetime.timezone.utc)),
     }
-    if filename_extension == ".dp":
+    if filename_extension in (".dp", ".hlo"):
         variable_counter = Counter()
         with h5py.File(filename, "w") as f:
             model_dict = traverse_model_dict(
@@ -141,7 +141,7 @@ def load_dp_model(filename: str) -> dict:
         The loaded model dict, including meta information.
     """
     filename_extension = Path(filename).suffix
-    if filename_extension == ".dp":
+    if filename_extension in {".dp", ".hlo"}:
         with h5py.File(filename, "r") as f:
             model_dict = json.loads(f.attrs["json"])
             model_dict = traverse_model_dict(model_dict, lambda x: f[x][()].copy())
diff --git a/deepmd/jax/env.py b/deepmd/jax/env.py
index ee11e17125..1b90433b00 100644
--- a/deepmd/jax/env.py
+++ b/deepmd/jax/env.py
@@ -8,6 +8,7 @@
 from flax import (
     nnx,
 )
+from jax import export as jax_export
 
 jax.config.update("jax_enable_x64", True)
 # jax.config.update("jax_debug_nans", True)
@@ -16,4 +17,5 @@
     "jax",
     "jnp",
     "nnx",
+    "jax_export",
 ]
diff --git a/deepmd/jax/infer/__init__.py b/deepmd/jax/infer/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/jax/infer/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/jax/infer/deep_eval.py b/deepmd/jax/infer/deep_eval.py
new file mode 100644
index 0000000000..76f044a327
--- /dev/null
+++ b/deepmd/jax/infer/deep_eval.py
@@ -0,0 +1,391 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.common import (
+    to_numpy_array,
+)
+from deepmd.dpmodel.output_def import (
+    ModelOutputDef,
+    OutputVariableCategory,
+    OutputVariableDef,
+)
+from deepmd.dpmodel.utils.serialization import (
+    load_dp_model,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.infer.deep_dos import (
+    DeepDOS,
+)
+from deepmd.infer.deep_eval import DeepEval as DeepEvalWrapper
+from deepmd.infer.deep_eval import (
+    DeepEvalBackend,
+)
+from deepmd.infer.deep_polar import (
+    DeepPolar,
+)
+from deepmd.infer.deep_pot import (
+    DeepPot,
+)
+from deepmd.infer.deep_wfc import (
+    DeepWFC,
+)
+from deepmd.jax.common import (
+    to_jax_array,
+)
+from deepmd.jax.model.hlo import (
+    HLO,
+)
+from deepmd.jax.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+
+if TYPE_CHECKING:
+    import ase.neighborlist
+
+
+class DeepEval(DeepEvalBackend):
+    """NumPy backend implementation of DeepEval.
+
+    Parameters
+    ----------
+    model_file : str
+        The name of the frozen model file.
+    output_def : ModelOutputDef
+        The output definition of the model.
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutoBatchSize, default: True
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
+    """
+
+    def __init__(
+        self,
+        model_file: str,
+        output_def: ModelOutputDef,
+        *args: Any,
+        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
+        neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
+        **kwargs: Any,
+    ):
+        self.output_def = output_def
+        self.model_path = model_file
+
+        model_data = load_dp_model(model_file)
+        self.dp = HLO(
+            stablehlo=model_data["@variables"]["stablehlo"].tobytes(),
+            model_def_script=model_data["model_def_script"],
+            **model_data["constants"],
+        )
+        self.rcut = self.dp.get_rcut()
+        self.type_map = self.dp.get_type_map()
+        if isinstance(auto_batch_size, bool):
+            if auto_batch_size:
+                self.auto_batch_size = AutoBatchSize()
+            else:
+                self.auto_batch_size = None
+        elif isinstance(auto_batch_size, int):
+            self.auto_batch_size = AutoBatchSize(auto_batch_size)
+        elif isinstance(auto_batch_size, AutoBatchSize):
+            self.auto_batch_size = auto_batch_size
+        else:
+            raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
+
+    def get_rcut(self) -> float:
+        """Get the cutoff radius of this model."""
+        return self.rcut
+
+    def get_ntypes(self) -> int:
+        """Get the number of atom types of this model."""
+        return len(self.type_map)
+
+    def get_type_map(self) -> list[str]:
+        """Get the type map (element name of the atom types) of this model."""
+        return self.type_map
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this DP."""
+        return self.dp.get_dim_fparam()
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this DP."""
+        return self.dp.get_dim_aparam()
+
+    @property
+    def model_type(self) -> type["DeepEvalWrapper"]:
+        """The evaluator of the model type."""
+        model_output_type = self.dp.model_output_type()
+        if "energy" in model_output_type:
+            return DeepPot
+        elif "dos" in model_output_type:
+            return DeepDOS
+        elif "dipole" in model_output_type:
+            return DeepDipole
+        elif "polar" in model_output_type:
+            return DeepPolar
+        elif "wfc" in model_output_type:
+            return DeepWFC
+        else:
+            raise RuntimeError("Unknown model type")
+
+    def get_sel_type(self) -> list[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.dp.get_sel_type()
+
+    def get_numb_dos(self) -> int:
+        """Get the number of DOS."""
+        return 0
+
+    def get_has_efield(self):
+        """Check if the model has efield."""
+        return False
+
+    def get_ntypes_spin(self):
+        """Get the number of spin atom types of this model."""
+        return 0
+
+    def eval(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        atomic: bool = False,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Any,
+    ) -> dict[str, np.ndarray]:
+        """Evaluate the energy, force and virial by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 3 x 3
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        atomic
+            Calculate the atomic energy and virial
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        **kwargs
+            Other parameters
+
+        Returns
+        -------
+        output_dict : dict
+            The output of the evaluation. The keys are the names of the output
+            variables, and the values are the corresponding output arrays.
+        """
+        if fparam is not None or aparam is not None:
+            raise NotImplementedError
+        # convert all of the input to numpy array
+        atom_types = np.array(atom_types, dtype=np.int32)
+        coords = np.array(coords)
+        if cells is not None:
+            cells = np.array(cells)
+        natoms, numb_test = self._get_natoms_and_nframes(
+            coords, atom_types, len(atom_types.shape) > 1
+        )
+        request_defs = self._get_request_defs(atomic)
+        out = self._eval_func(self._eval_model, numb_test, natoms)(
+            coords, cells, atom_types, request_defs
+        )
+        return dict(
+            zip(
+                [x.name for x in request_defs],
+                out,
+            )
+        )
+
+    def _get_request_defs(self, atomic: bool) -> list[OutputVariableDef]:
+        """Get the requested output definitions.
+
+        When atomic is True, all output_def are requested.
+        When atomic is False, only energy (tensor), force, and virial
+        are requested.
+
+        Parameters
+        ----------
+        atomic : bool
+            Whether to request the atomic output.
+
+        Returns
+        -------
+        list[OutputVariableDef]
+            The requested output definitions.
+        """
+        if atomic:
+            return list(self.output_def.var_defs.values())
+        else:
+            return [
+                x
+                for x in self.output_def.var_defs.values()
+                if x.category
+                in (
+                    OutputVariableCategory.REDU,
+                    OutputVariableCategory.DERV_R,
+                    OutputVariableCategory.DERV_C_REDU,
+                )
+            ]
+
+    def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable:
+        """Wrapper method with auto batch size.
+
+        Parameters
+        ----------
+        inner_func : Callable
+            the method to be wrapped
+        numb_test : int
+            number of tests
+        natoms : int
+            number of atoms
+
+        Returns
+        -------
+        Callable
+            the wrapper
+        """
+        if self.auto_batch_size is not None:
+
+            def eval_func(*args, **kwargs):
+                return self.auto_batch_size.execute_all(
+                    inner_func, numb_test, natoms, *args, **kwargs
+                )
+
+        else:
+            eval_func = inner_func
+        return eval_func
+
+    def _get_natoms_and_nframes(
+        self,
+        coords: np.ndarray,
+        atom_types: np.ndarray,
+        mixed_type: bool = False,
+    ) -> tuple[int, int]:
+        if mixed_type:
+            natoms = len(atom_types[0])
+        else:
+            natoms = len(atom_types)
+        if natoms == 0:
+            assert coords.size == 0
+        else:
+            coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        nframes = coords.shape[0]
+        return natoms, nframes
+
+    def _eval_model(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        request_defs: list[OutputVariableDef],
+    ):
+        model = self.dp
+
+        nframes = coords.shape[0]
+        if len(atom_types.shape) == 1:
+            natoms = len(atom_types)
+            atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
+        else:
+            natoms = len(atom_types[0])
+
+        coord_input = coords.reshape([-1, natoms, 3])
+        type_input = atom_types
+        if cells is not None:
+            box_input = cells.reshape([-1, 3, 3])
+        else:
+            box_input = None
+
+        do_atomic_virial = any(
+            x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs
+        )
+        batch_output = model(
+            to_jax_array(coord_input),
+            to_jax_array(type_input),
+            box=to_jax_array(box_input),
+            do_atomic_virial=do_atomic_virial,
+        )
+        if isinstance(batch_output, tuple):
+            batch_output = batch_output[0]
+        for kk, vv in batch_output.items():
+            batch_output[kk] = to_numpy_array(vv)
+
+        results = []
+        for odef in request_defs:
+            # it seems not doing conversion
+            # dp_name = self._OUTDEF_DP2BACKEND[odef.name]
+            dp_name = odef.name
+            if dp_name in batch_output:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                if batch_output[dp_name] is not None:
+                    out = batch_output[dp_name].reshape(shape)
+                else:
+                    out = np.full(shape, np.nan, dtype=GLOBAL_NP_FLOAT_PRECISION)
+                results.append(out)
+            else:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                results.append(
+                    np.full(np.abs(shape), np.nan, dtype=GLOBAL_NP_FLOAT_PRECISION)
+                )  # this is kinda hacky
+        return tuple(results)
+
+    def _get_output_shape(self, odef, nframes, natoms):
+        if odef.category == OutputVariableCategory.DERV_C_REDU:
+            # virial
+            return [nframes, *odef.shape[:-1], 9]
+        elif odef.category == OutputVariableCategory.REDU:
+            # energy
+            return [nframes, *odef.shape, 1]
+        elif odef.category == OutputVariableCategory.DERV_C:
+            # atom_virial
+            return [nframes, *odef.shape[:-1], natoms, 9]
+        elif odef.category == OutputVariableCategory.DERV_R:
+            # force
+            return [nframes, *odef.shape[:-1], natoms, 3]
+        elif odef.category == OutputVariableCategory.OUT:
+            # atom_energy, atom_tensor
+            return [nframes, natoms, *odef.shape, 1]
+        else:
+            raise RuntimeError("unknown category")
+
+    def get_model_def_script(self) -> dict:
+        """Get model definition script."""
+        return json.loads(self.dp.get_model_def_script())
diff --git a/deepmd/jax/model/hlo.py b/deepmd/jax/model/hlo.py
new file mode 100644
index 0000000000..010e3d7a5e
--- /dev/null
+++ b/deepmd/jax/model/hlo.py
@@ -0,0 +1,311 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+    Optional,
+)
+
+from deepmd.dpmodel.model.make_model import (
+    model_call_from_call_lower,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableDef,
+)
+from deepmd.jax.env import (
+    jax_export,
+    jnp,
+)
+from deepmd.jax.model.base_model import (
+    BaseModel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+OUTPUT_DEFS = {
+    "energy": OutputVariableDef(
+        "energy",
+        shape=[1],
+        reducible=True,
+        r_differentiable=True,
+        c_differentiable=True,
+    ),
+    "mask": OutputVariableDef(
+        "mask",
+        shape=[1],
+        reducible=False,
+        r_differentiable=False,
+        c_differentiable=False,
+    ),
+}
+
+
+class HLO(BaseModel):
+    def __init__(
+        self,
+        stablehlo,
+        model_def_script,
+        type_map,
+        rcut,
+        dim_fparam,
+        dim_aparam,
+        sel_type,
+        is_aparam_nall,
+        model_output_type,
+        mixed_types,
+        min_nbor_dist,
+        sel,
+    ) -> None:
+        self._call_lower = jax_export.deserialize(stablehlo).call
+        self.stablehlo = stablehlo
+        self.type_map = type_map
+        self.rcut = rcut
+        self.dim_fparam = dim_fparam
+        self.dim_aparam = dim_aparam
+        self.sel_type = sel_type
+        self._is_aparam_nall = is_aparam_nall
+        self._model_output_type = model_output_type
+        self._mixed_types = mixed_types
+        self.min_nbor_dist = min_nbor_dist
+        self.sel = sel
+        self.model_def_script = model_def_script
+
+    def __call__(
+        self,
+        coord: jnp.ndarray,
+        atype: jnp.ndarray,
+        box: Optional[jnp.ndarray] = None,
+        fparam: Optional[jnp.ndarray] = None,
+        aparam: Optional[jnp.ndarray] = None,
+        do_atomic_virial: bool = False,
+    ) -> Any:
+        """Return model prediction.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of the atoms.
+            shape: nf x (nloc x 3)
+        atype
+            The type of atoms. shape: nf x nloc
+        box
+            The simulation box. shape: nf x 9
+        fparam
+            frame parameter. nf x ndf
+        aparam
+            atomic parameter. nf x nloc x nda
+        do_atomic_virial
+            If calculate the atomic virial.
+
+        Returns
+        -------
+        ret_dict
+            The result dict of type dict[str,np.ndarray].
+            The keys are defined by the `ModelOutputDef`.
+
+        """
+        return self.call(coord, atype, box, fparam, aparam, do_atomic_virial)
+
+    def call(
+        self,
+        coord: jnp.ndarray,
+        atype: jnp.ndarray,
+        box: Optional[jnp.ndarray] = None,
+        fparam: Optional[jnp.ndarray] = None,
+        aparam: Optional[jnp.ndarray] = None,
+        do_atomic_virial: bool = False,
+    ):
+        """Return model prediction.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of the atoms.
+            shape: nf x (nloc x 3)
+        atype
+            The type of atoms. shape: nf x nloc
+        box
+            The simulation box. shape: nf x 9
+        fparam
+            frame parameter. nf x ndf
+        aparam
+            atomic parameter. nf x nloc x nda
+        do_atomic_virial
+            If calculate the atomic virial.
+
+        Returns
+        -------
+        ret_dict
+            The result dict of type dict[str,np.ndarray].
+            The keys are defined by the `ModelOutputDef`.
+
+        """
+        return model_call_from_call_lower(
+            call_lower=self.call_lower,
+            rcut=self.get_rcut(),
+            sel=self.get_sel(),
+            mixed_types=self.mixed_types(),
+            model_output_def=self.model_output_def(),
+            coord=coord,
+            atype=atype,
+            box=box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+    def model_output_def(self):
+        return ModelOutputDef(
+            FittingOutputDef([OUTPUT_DEFS[tt] for tt in self.model_output_type()])
+        )
+
+    def call_lower(
+        self,
+        extended_coord: jnp.ndarray,
+        extended_atype: jnp.ndarray,
+        nlist: jnp.ndarray,
+        mapping: Optional[jnp.ndarray] = None,
+        fparam: Optional[jnp.ndarray] = None,
+        aparam: Optional[jnp.ndarray] = None,
+        do_atomic_virial: bool = False,
+    ):
+        return self._call_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam,
+            aparam,
+            do_atomic_virial,
+        )
+
+    def get_type_map(self) -> list[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def get_rcut(self):
+        """Get the cut-off radius."""
+        return self.rcut
+
+    def get_dim_fparam(self):
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.dim_fparam
+
+    def get_dim_aparam(self):
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.dim_aparam
+
+    def get_sel_type(self) -> list[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.sel_type
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return self._is_aparam_nall
+
+    def model_output_type(self) -> list[str]:
+        """Get the output type for the model."""
+        return self._model_output_type
+
+    def serialize(self) -> dict:
+        """Serialize the model.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        raise NotImplementedError("Not implemented")
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "BaseModel":
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        BaseModel
+            The deserialized model
+        """
+        raise NotImplementedError("Not implemented")
+
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        return self.model_def_script
+
+    def get_min_nbor_dist(self) -> Optional[float]:
+        """Get the minimum distance between two atoms."""
+        return self.min_nbor_dist
+
+    def get_nnei(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return self.nsel
+
+    def get_sel(self) -> list[int]:
+        return self.sel
+
+    def get_nsel(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def mixed_types(self) -> bool:
+        return self._mixed_types
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        raise NotImplementedError("Not implemented")
+
+    @classmethod
+    def get_model(cls, model_params: dict) -> "BaseModel":
+        """Get the model by the parameters.
+
+        By default, all the parameters are directly passed to the constructor.
+        If not, override this method.
+
+        Parameters
+        ----------
+        model_params : dict
+            The model parameters
+
+        Returns
+        -------
+        BaseBaseModel
+            The model
+        """
+        raise NotImplementedError("Not implemented")
diff --git a/deepmd/jax/utils/serialization.py b/deepmd/jax/utils/serialization.py
index 43070f8a07..fcfcc8a610 100644
--- a/deepmd/jax/utils/serialization.py
+++ b/deepmd/jax/utils/serialization.py
@@ -3,10 +3,17 @@
     Path,
 )
 
+import numpy as np
 import orbax.checkpoint as ocp
 
+from deepmd.dpmodel.utils.serialization import (
+    load_dp_model,
+    save_dp_model,
+)
 from deepmd.jax.env import (
     jax,
+    jax_export,
+    jnp,
     nnx,
 )
 from deepmd.jax.model.model import (
@@ -39,6 +46,44 @@ def deserialize_to_file(model_file: str, data: dict) -> None:
                     model_def_script=ocp.args.JsonSave(model_def_script),
                 ),
             )
+    elif model_file.endswith(".hlo"):
+        model = BaseModel.deserialize(data["model"])
+        model_def_script = data["model_def_script"]
+        call_lower = model.call_lower
+
+        nf, nloc, nghost, nfp, nap = jax_export.symbolic_shape(
+            "nf, nloc, nghost, nfp, nap"
+        )
+        exported = jax_export.export(jax.jit(call_lower))(
+            jax.ShapeDtypeStruct((nf, nloc + nghost, 3), jnp.float64),  # extended_coord
+            jax.ShapeDtypeStruct((nf, nloc + nghost), jnp.int32),  # extended_atype
+            jax.ShapeDtypeStruct((nf, nloc, model.get_nnei()), jnp.int64),  # nlist
+            jax.ShapeDtypeStruct((nf, nloc + nghost), jnp.int64),  # mapping
+            jax.ShapeDtypeStruct((nf, nfp), jnp.float64)
+            if model.get_dim_fparam()
+            else None,  # fparam
+            jax.ShapeDtypeStruct((nf, nap), jnp.float64)
+            if model.get_dim_aparam()
+            else None,  # aparam
+            False,  # do_atomic_virial
+        )
+        serialized: bytearray = exported.serialize()
+        data = data.copy()
+        data.setdefault("@variables", {})
+        data["@variables"]["stablehlo"] = np.void(serialized)
+        data["constants"] = {
+            "type_map": model.get_type_map(),
+            "rcut": model.get_rcut(),
+            "dim_fparam": model.get_dim_fparam(),
+            "dim_aparam": model.get_dim_aparam(),
+            "sel_type": model.get_sel_type(),
+            "is_aparam_nall": model.is_aparam_nall(),
+            "model_output_type": model.model_output_type(),
+            "mixed_types": model.mixed_types(),
+            "min_nbor_dist": model.get_min_nbor_dist(),
+            "sel": model.get_sel(),
+        }
+        save_dp_model(filename=model_file, model_dict=data)
     else:
         raise ValueError("JAX backend only supports converting .jax directory")
 
@@ -93,5 +138,10 @@ def convert_str_to_int_key(item: dict):
             "@variables": {},
         }
         return data
+    elif model_file.endswith(".hlo"):
+        data = load_dp_model(model_file)
+        data.pop("constants")
+        data["@variables"].pop("stablehlo")
+        return data
     else:
         raise ValueError("JAX backend only supports converting .jax directory")
diff --git a/source/tests/consistent/io/test_io.py b/source/tests/consistent/io/test_io.py
index df81c24ff5..dc0f280d56 100644
--- a/source/tests/consistent/io/test_io.py
+++ b/source/tests/consistent/io/test_io.py
@@ -74,14 +74,21 @@ def tearDown(self):
     @unittest.skipIf(TEST_DEVICE != "cpu" and CI, "Only test on CPU.")
     def test_data_equal(self):
         prefix = "test_consistent_io_" + self.__class__.__name__.lower()
-        for backend_name in ("tensorflow", "pytorch", "dpmodel", "jax"):
+        for backend_name, suffix_idx in (
+            ("tensorflow", 0),
+            ("pytorch", 0),
+            ("dpmodel", 0),
+            ("jax", 0),
+        ):
             with self.subTest(backend_name=backend_name):
                 backend = Backend.get_backend(backend_name)()
                 if not backend.is_available():
                     continue
                 reference_data = copy.deepcopy(self.data)
-                self.save_data_to_model(prefix + backend.suffixes[0], reference_data)
-                data = self.get_data_from_model(prefix + backend.suffixes[0])
+                self.save_data_to_model(
+                    prefix + backend.suffixes[suffix_idx], reference_data
+                )
+                data = self.get_data_from_model(prefix + backend.suffixes[suffix_idx])
                 data = copy.deepcopy(data)
                 reference_data = copy.deepcopy(self.data)
                 # some keys are not expected to be not the same
@@ -131,7 +138,7 @@ def test_deep_eval(self):
         ).reshape(1, 9)
         prefix = "test_consistent_io_" + self.__class__.__name__.lower()
         rets = []
-        for backend_name in ("tensorflow", "pytorch", "dpmodel"):
+        for backend_name in ("tensorflow", "pytorch", "dpmodel", "jax"):
             backend = Backend.get_backend(backend_name)()
             if not backend.is_available():
                 continue