From 544ba67cff276a0ebd2cbce431c25224d1cf6c78 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 22 Oct 2024 16:38:38 -0400
Subject: [PATCH 1/4] docs: fix parameter links

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/utils/argcheck.py          | 20 +++++++++++---------
 doc/freeze/compress.md            |  2 +-
 doc/model/dplr.md                 |  2 +-
 doc/model/dprc.md                 |  6 +++---
 doc/model/index.rst               |  1 +
 doc/model/overall.md              |  2 +-
 doc/model/train-energy-spin.md    |  4 ++--
 doc/model/train-energy.md         |  8 ++++----
 doc/model/train-fitting-dos.md    |  4 ++--
 doc/model/train-fitting-tensor.md |  4 ++--
 doc/model/train-hybrid.md         |  2 +-
 doc/model/train-se-a-mask.md      | 22 +++++++++++-----------
 doc/model/train-se-atten.md       | 24 ++++++++++++------------
 doc/model/train-se-e2-a.md        | 18 +++++++++---------
 doc/model/train-se-e2-r.md        |  4 ++--
 doc/model/train-se-e3-tebd.md     |  4 ++--
 doc/model/train-se-e3.md          |  4 ++--
 doc/train/finetuning.md           |  2 +-
 doc/train/gpu-limitations.md      |  2 +-
 doc/train/training-advanced.md    |  2 +-
 20 files changed, 70 insertions(+), 67 deletions(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 1a5e1cc3b2..c3fce807d5 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1387,14 +1387,16 @@ def descrpt_se_a_mask_args():
 
 
 def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
-    link_lf = make_link("loc_frame", "model/descriptor[loc_frame]")
-    link_se_e2_a = make_link("se_e2_a", "model/descriptor[se_e2_a]")
-    link_se_e2_r = make_link("se_e2_r", "model/descriptor[se_e2_r]")
-    link_se_e3 = make_link("se_e3", "model/descriptor[se_e3]")
-    link_se_a_tpe = make_link("se_a_tpe", "model/descriptor[se_a_tpe]")
-    link_hybrid = make_link("hybrid", "model/descriptor[hybrid]")
-    link_se_atten = make_link("se_atten", "model/descriptor[se_atten]")
-    link_se_atten_v2 = make_link("se_atten_v2", "model/descriptor[se_atten_v2]")
+    link_lf = make_link("loc_frame", "model[standard]/descriptor[loc_frame]")
+    link_se_e2_a = make_link("se_e2_a", "model[standard]/descriptor[se_e2_a]")
+    link_se_e2_r = make_link("se_e2_r", "model[standard]/descriptor[se_e2_r]")
+    link_se_e3 = make_link("se_e3", "model[standard]/descriptor[se_e3]")
+    link_se_a_tpe = make_link("se_a_tpe", "model[standard]/descriptor[se_a_tpe]")
+    link_hybrid = make_link("hybrid", "model[standard]/descriptor[hybrid]")
+    link_se_atten = make_link("se_atten", "model[standard]/descriptor[se_atten]")
+    link_se_atten_v2 = make_link(
+        "se_atten_v2", "model[standard]/descriptor[se_atten_v2]"
+    )
     doc_descrpt_type = "The type of the descritpor. See explanation below. \n\n\
 - `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
 - `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
@@ -1692,7 +1694,7 @@ def fitting_variant_type_args():
 #  --- Modifier configurations: --- #
 def modifier_dipole_charge():
     doc_model_name = "The name of the frozen dipole model file."
-    doc_model_charge_map = f"The charge of the WFCC. The list length should be the same as the {make_link('sel_type', 'model/fitting_net[dipole]/sel_type')}. "
+    doc_model_charge_map = f"The charge of the WFCC. The list length should be the same as the {make_link('sel_type', 'model[standard]/fitting_net[dipole]/sel_type')}. "
     doc_sys_charge_map = f"The charge of real atoms. The list length should be the same as the {make_link('type_map', 'model/type_map')}"
     doc_ewald_h = "The grid spacing of the FFT grid. Unit is A"
     doc_ewald_beta = f"The splitting parameter of Ewald sum. Unit is A^{-1}"
diff --git a/doc/freeze/compress.md b/doc/freeze/compress.md
index 3cce96c993..e26c85e45a 100644
--- a/doc/freeze/compress.md
+++ b/doc/freeze/compress.md
@@ -99,7 +99,7 @@ The model compression interface requires the version of DeePMD-kit used in the o
 
 Descriptors with `se_e2_a`, `se_e3`, `se_e2_r` and `se_atten_v2` types are supported by the model compression feature. `Hybrid` mixed with the above descriptors is also supported.
 
-Notice: Model compression for the `se_atten_v2` descriptor is exclusively designed for models with the training parameter {ref}`attn_layer <model/descriptor[se_atten_v2]/attn_layer>` set to 0.
+Notice: Model compression for the `se_atten_v2` descriptor is exclusively designed for models with the training parameter {ref}`attn_layer <model[standard]/descriptor[se_atten_v2]/attn_layer>` set to 0.
 
 **Available activation functions for descriptor:**
 
diff --git a/doc/model/dplr.md b/doc/model/dplr.md
index ec95f9f424..91c2251346 100644
--- a/doc/model/dplr.md
+++ b/doc/model/dplr.md
@@ -58,7 +58,7 @@ Two settings make the training input script different from an energy training in
 	},
 ```
 
-The type of fitting is set to {ref}`dipole <model/fitting_net[dipole]>`. The dipole is associated with type 0 atoms (oxygens), by the setting `"dipole_type": [0]`. What we trained is the displacement of the WC from the corresponding oxygen atom. It shares the same training input as the atomic dipole because both are 3-dimensional vectors defined on atoms.
+The type of fitting is set to {ref}`dipole <model[standard]/fitting_net[dipole]>`. The dipole is associated with type 0 atoms (oxygens), by the setting `"dipole_type": [0]`. What we trained is the displacement of the WC from the corresponding oxygen atom. It shares the same training input as the atomic dipole because both are 3-dimensional vectors defined on atoms.
 The loss section is provided as follows
 
 ```json
diff --git a/doc/model/dprc.md b/doc/model/dprc.md
index 33dde237d7..d9ce24b600 100644
--- a/doc/model/dprc.md
+++ b/doc/model/dprc.md
@@ -140,7 +140,7 @@ As described in the paper, the DPRc model only corrects $E_\text{QM}$ and $E_\te
 
 ::::
 
-{ref}`exclude_types <model/descriptor[se_a_ebd_v2]/exclude_types>` can be generated by the following Python script:
+{ref}`exclude_types <model[standard]/descriptor[se_a_ebd_v2]/exclude_types>` can be generated by the following Python script:
 
 ```py
 from itertools import combinations_with_replacement, product
@@ -163,7 +163,7 @@ print(
 )
 ```
 
-Also, DPRc assumes MM atom energies ({ref}`atom_ener <model/fitting_net[ener]/atom_ener>`) are zero:
+Also, DPRc assumes MM atom energies ({ref}`atom_ener <model[standard]/fitting_net[ener]/atom_ener>`) are zero:
 
 ```json
 "fitting_net": {
@@ -173,7 +173,7 @@ Also, DPRc assumes MM atom energies ({ref}`atom_ener <model/fitting_net[ener]/at
 }
 ```
 
-Note that {ref}`atom_ener <model/fitting_net[ener]/atom_ener>` only works when {ref}`descriptor/set_davg_zero <model/descriptor[se_a_ebd_v2]/set_davg_zero>` of the QM/MM part is `true`.
+Note that {ref}`atom_ener <model[standard]/fitting_net[ener]/atom_ener>` only works when {ref}`descriptor/set_davg_zero <model[standard]/descriptor[se_a_ebd_v2]/set_davg_zero>` of the QM/MM part is `true`.
 
 ## Run MD simulations
 
diff --git a/doc/model/index.rst b/doc/model/index.rst
index 8409d4ce97..c067ea4207 100644
--- a/doc/model/index.rst
+++ b/doc/model/index.rst
@@ -24,3 +24,4 @@ Model
    linear
    pairtab
    change-bias
+   precision
diff --git a/doc/model/overall.md b/doc/model/overall.md
index 102a8fc671..7f67c6545d 100644
--- a/doc/model/overall.md
+++ b/doc/model/overall.md
@@ -42,7 +42,7 @@ A model has two parts, a descriptor that maps atomic configuration to a set of s
     }
 ```
 
-The two subsections, {ref}`descriptor <model/descriptor>` and {ref}`fitting_net <model/fitting_net>`, define the descriptor and the fitting net, respectively.
+The two subsections, {ref}`descriptor <model[standard]/descriptor>` and {ref}`fitting_net <model[standard]/fitting_net>`, define the descriptor and the fitting net, respectively.
 
 The {ref}`type_map <model/type_map>` is optional, which provides the element names (but not necessarily same as the actual name of the element) of the corresponding atom types. A water model, as in this example, has two kinds of atoms. The atom types are internally recorded as integers, e.g., `0` for oxygen and `1` for hydrogen here. A mapping from the atom type to their names is provided by {ref}`type_map <model/type_map>`.
 
diff --git a/doc/model/train-energy-spin.md b/doc/model/train-energy-spin.md
index 9f4e3cf04b..ec169892f2 100644
--- a/doc/model/train-energy-spin.md
+++ b/doc/model/train-energy-spin.md
@@ -11,9 +11,9 @@ keeping other sections the same as the normal energy model's input script.
 Note that when adding spin into the model, there will be some implicit modifications automatically done by the program:
 
 - In the TensorFlow backend, the `se_e2_a` descriptor will treat those atom types with spin as new (virtual) types,
-  and duplicate their corresponding selected numbers of neighbors ({ref}`sel <model/descriptor[se_e2_a]/sel>`) from their real atom types.
+  and duplicate their corresponding selected numbers of neighbors ({ref}`sel <model[standard]/descriptor[se_e2_a]/sel>`) from their real atom types.
 - In the PyTorch backend, if spin settings are added, all the types (with or without spin) will have their virtual types.
-  The `se_e2_a` descriptor will thus double the {ref}`sel <model/descriptor[se_e2_a]/sel>` list,
+  The `se_e2_a` descriptor will thus double the {ref}`sel <model[standard]/descriptor[se_e2_a]/sel>` list,
   while in other descriptors with mixed types (such as `dpa1` or `dpa2`), the sel number will not be changed for clarity.
   If you are using descriptors with mixed types, to achieve better performance,
   you should manually extend your sel number (maybe double) depending on the balance between performance and efficiency.
diff --git a/doc/model/train-energy.md b/doc/model/train-energy.md
index c1da1f4c1f..75d31d4670 100644
--- a/doc/model/train-energy.md
+++ b/doc/model/train-energy.md
@@ -79,7 +79,7 @@ Benefiting from the relative force loss, small forces can be fitted more accurat
 
 ## The fitting network
 
-The construction of the fitting net is given by section {ref}`fitting_net <model/fitting_net>`
+The construction of the fitting net is given by section {ref}`fitting_net <model[standard]/fitting_net>`
 
 ```json
 	"fitting_net" : {
@@ -89,9 +89,9 @@ The construction of the fitting net is given by section {ref}`fitting_net <model
 	},
 ```
 
-- {ref}`neuron <model/fitting_net[ener]/neuron>` specifies the size of the fitting net. If two neighboring layers are of the same size, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-- If the option {ref}`resnet_dt <model/fitting_net[ener]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
-- {ref}`seed <model/fitting_net[ener]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
+- {ref}`neuron <model[standard]/fitting_net[ener]/neuron>` specifies the size of the fitting net. If two neighboring layers are of the same size, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- If the option {ref}`resnet_dt <model[standard]/fitting_net[ener]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
+- {ref}`seed <model[standard]/fitting_net[ener]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
 
 ## Loss
 
diff --git a/doc/model/train-fitting-dos.md b/doc/model/train-fitting-dos.md
index 4c4366a1e1..d04dbc669c 100644
--- a/doc/model/train-fitting-dos.md
+++ b/doc/model/train-fitting-dos.md
@@ -16,11 +16,11 @@ $deepmd_source_dir/examples/dos/input.json
 
 The training and validation data are also provided our examples. But note that **the data provided along with the examples are of limited amount, and should not be used to train a production model.**
 
-Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model <model>`, {ref}`learning_rate <learning_rate>`, {ref}`loss <loss>` and {ref}`training <training>`. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-e2-a.md). To fit the `dos`, one needs to modify {ref}`model/fitting_net <model/fitting_net>` and {ref}`loss <loss>`.
+Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model <model>`, {ref}`learning_rate <learning_rate>`, {ref}`loss <loss>` and {ref}`training <training>`. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-e2-a.md). To fit the `dos`, one needs to modify {ref}`model[standard]/fitting_net <model[standard]/fitting_net>` and {ref}`loss <loss>`.
 
 ## The fitting Network
 
-The {ref}`fitting_net <model/fitting_net>` section tells DP which fitting net to use.
+The {ref}`fitting_net <model[standard]/fitting_net>` section tells DP which fitting net to use.
 
 The JSON of `dos` type should be provided like
 
diff --git a/doc/model/train-fitting-tensor.md b/doc/model/train-fitting-tensor.md
index 4d5cb22707..c6b54c69ef 100644
--- a/doc/model/train-fitting-tensor.md
+++ b/doc/model/train-fitting-tensor.md
@@ -30,7 +30,7 @@ $deepmd_source_dir/examples/water_tensor/polar/polar_input_torch.json
 
 The training and validation data are also provided our examples. But note that **the data provided along with the examples are of limited amount, and should not be used to train a production model.**
 
-Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model <model>`, {ref}`learning_rate <learning_rate>`, {ref}`loss <loss>` and {ref}`training <training>`. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-e2-a.md). To fit a tensor, one needs to modify {ref}`model/fitting_net <model/fitting_net>` and {ref}`loss <loss>`.
+Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model <model>`, {ref}`learning_rate <learning_rate>`, {ref}`loss <loss>` and {ref}`training <training>`. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-e2-a.md). To fit a tensor, one needs to modify {ref}`model[standard]/fitting_net <model[standard]/fitting_net>` and {ref}`loss <loss>`.
 
 ## Theory
 
@@ -72,7 +72,7 @@ The tensorial models can be used to calculate IR spectrum and Raman spectrum.[^1
 
 ## The fitting Network
 
-The {ref}`fitting_net <model/fitting_net>` section tells DP which fitting net to use.
+The {ref}`fitting_net <model[standard]/fitting_net>` section tells DP which fitting net to use.
 
 ::::{tab-set}
 
diff --git a/doc/model/train-hybrid.md b/doc/model/train-hybrid.md
index c0a55d9eb5..1219d208a7 100644
--- a/doc/model/train-hybrid.md
+++ b/doc/model/train-hybrid.md
@@ -25,7 +25,7 @@ This way, one can set the different cutoff radii for different descriptors.[^1]
 
 ## Instructions
 
-To use the descriptor in DeePMD-kit, one firstly set the {ref}`type <model/descriptor/type>` to {ref}`hybrid <model/descriptor[hybrid]>`, then provide the definitions of the descriptors by the items in the `list`,
+To use the descriptor in DeePMD-kit, one firstly set the {ref}`type <model[standard]/descriptor/type>` to {ref}`hybrid <model[standard]/descriptor[hybrid]>`, then provide the definitions of the descriptors by the items in the `list`,
 
 ```json
         "descriptor" :{
diff --git a/doc/model/train-se-a-mask.md b/doc/model/train-se-a-mask.md
index 6757fbefbd..69f344b138 100644
--- a/doc/model/train-se-a-mask.md
+++ b/doc/model/train-se-a-mask.md
@@ -29,7 +29,7 @@ A complete training input script of this example can be found in the directory.
 $deepmd_source_dir/examples/zinc_protein/zinc_se_a_mask.json
 ```
 
-The construction of the descriptor is given by section {ref}`descriptor <model/descriptor>`. An example of the descriptor is provided as follows
+The construction of the descriptor is given by section {ref}`descriptor <model[standard]/descriptor>`. An example of the descriptor is provided as follows
 
 ```json
 	"descriptor" :{
@@ -43,13 +43,13 @@ The construction of the descriptor is given by section {ref}`descriptor <model/d
 	}
 ```
 
-- The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_a_mask"`.
-- {ref}`sel <model/descriptor[se_a_mask]/sel>` gives the maximum number of atoms in input coordinates. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum number of atoms with type `i`.
-- The {ref}`neuron <model/descriptor[se_a_mask]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-- The {ref}`axis_neuron <model/descriptor[se_a_mask]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
-- If the option {ref}`type_one_side <model/descriptor[se_a_mask]/type_one_side>` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.
-- If the option {ref}`resnet_dt <model/descriptor[se_a_mask]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
-- {ref}`seed <model/descriptor[se_a_mask]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
+- The {ref}`type <model[standard]/descriptor/type>` of the descriptor is set to `"se_a_mask"`.
+- {ref}`sel <model[standard]/descriptor[se_a_mask]/sel>` gives the maximum number of atoms in input coordinates. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum number of atoms with type `i`.
+- The {ref}`neuron <model[standard]/descriptor[se_a_mask]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- The {ref}`axis_neuron <model[standard]/descriptor[se_a_mask]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
+- If the option {ref}`type_one_side <model[standard]/descriptor[se_a_mask]/type_one_side>` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.
+- If the option {ref}`resnet_dt <model[standard]/descriptor[se_a_mask]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
+- {ref}`seed <model[standard]/descriptor[se_a_mask]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
 
 To make the `aparam.npy` used for descriptor `se_a_mask`, two variables in `fitting_net` section are needed.
 
@@ -63,9 +63,9 @@ To make the `aparam.npy` used for descriptor `se_a_mask`, two variables in `fitt
 	}
 ```
 
-- `neuron`, `resnet_dt` and `seed` are the same as the {ref}`fitting_net <model/fitting_net[ener]>` section for fitting energy.
-- {ref}`numb_aparam <model/fitting_net[ener]/numb_aparam>` gives the dimesion of the `aparam.npy` file. In this example, it is set to 1 and stores the real/virtual sign of the atoms. For real/virtual atoms, the corresponding sign in `aparam.npy` is set to 1/0.
-- {ref}`use_aparam_as_mask <model/fitting_net[ener]/use_aparam_as_mask>` is set to `true` to use the `aparam.npy` as the mask of the atoms in the descriptor `se_a_mask`.
+- `neuron`, `resnet_dt` and `seed` are the same as the {ref}`fitting_net <model[standard]/fitting_net[ener]>` section for fitting energy.
+- {ref}`numb_aparam <model[standard]/fitting_net[ener]/numb_aparam>` gives the dimesion of the `aparam.npy` file. In this example, it is set to 1 and stores the real/virtual sign of the atoms. For real/virtual atoms, the corresponding sign in `aparam.npy` is set to 1/0.
+- {ref}`use_aparam_as_mask <model[standard]/fitting_net[ener]/use_aparam_as_mask>` is set to `true` to use the `aparam.npy` as the mask of the atoms in the descriptor `se_a_mask`.
 
 Finally, to make a reasonable fitting task with `se_a_mask` descriptor for DP/MM simulations, the loss function with `se_a_mask` is designed to include the atomic forces difference in specific atoms of the input particles only.
 More details about the selection of the specific atoms can be found in paper [DP/MM](left to be filled).
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
index 24950d9595..bebce78365 100644
--- a/doc/model/train-se-atten.md
+++ b/doc/model/train-se-atten.md
@@ -104,17 +104,17 @@ An example of the DPA-1 descriptor is provided as follows
 	}
 ```
 
-- The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_atten"`, which will use DPA-1 structures.
-- {ref}`rcut <model/descriptor[se_atten]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_atten]/rcut_smth>` gives where the smoothing starts.
-- **{ref}`sel <model/descriptor[se_atten]/sel>`** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md))
-- The {ref}`neuron <model/descriptor[se_atten]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-- The {ref}`axis_neuron <model/descriptor[se_atten]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
-- If the option {ref}`resnet_dt <model/descriptor[se_atten]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
-- {ref}`seed <model/descriptor[se_atten]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
-- {ref}`attn <model/descriptor[se_atten]/attn>` sets the length of a hidden vector during scale-dot attention computation.
-- {ref}`attn_layer <model/descriptor[se_atten]/attn_layer>` sets the number of layers in attention mechanism.
-- {ref}`attn_mask <model/descriptor[se_atten]/attn_mask>` determines whether to mask the diagonal in the attention weights and False is recommended.
-- {ref}`attn_dotr <model/descriptor[se_atten]/attn_dotr>` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended.
+- The {ref}`type <model[standard]/descriptor/type>` of the descriptor is set to `"se_atten"`, which will use DPA-1 structures.
+- {ref}`rcut <model[standard]/descriptor[se_atten]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model[standard]/descriptor[se_atten]/rcut_smth>` gives where the smoothing starts.
+- **{ref}`sel <model[standard]/descriptor[se_atten]/sel>`** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md))
+- The {ref}`neuron <model[standard]/descriptor[se_atten]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- The {ref}`axis_neuron <model[standard]/descriptor[se_atten]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
+- If the option {ref}`resnet_dt <model[standard]/descriptor[se_atten]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
+- {ref}`seed <model[standard]/descriptor[se_atten]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
+- {ref}`attn <model[standard]/descriptor[se_atten]/attn>` sets the length of a hidden vector during scale-dot attention computation.
+- {ref}`attn_layer <model[standard]/descriptor[se_atten]/attn_layer>` sets the number of layers in attention mechanism.
+- {ref}`attn_mask <model[standard]/descriptor[se_atten]/attn_mask>` determines whether to mask the diagonal in the attention weights and False is recommended.
+- {ref}`attn_dotr <model[standard]/descriptor[se_atten]/attn_dotr>` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended.
 
 ### Descriptor `"se_atten_v2"`
 
@@ -138,7 +138,7 @@ You can use descriptor `"se_atten_v2"` and do not need to set `tebd_input_mode`
 
 Practical evidence demonstrates that `"se_atten_v2"` offers better and more stable performance compared to `"se_atten"`.
 
-Notice: Model compression for the `se_atten_v2` descriptor is exclusively designed for models with the training parameter {ref}`attn_layer <model/descriptor[se_atten_v2]/attn_layer>` set to 0.
+Notice: Model compression for the `se_atten_v2` descriptor is exclusively designed for models with the training parameter {ref}`attn_layer <model[standard]/descriptor[se_atten_v2]/attn_layer>` set to 0.
 
 ### Fitting `"ener"`
 
diff --git a/doc/model/train-se-e2-a.md b/doc/model/train-se-e2-a.md
index 2412bbc64e..81b95399e0 100644
--- a/doc/model/train-se-e2-a.md
+++ b/doc/model/train-se-e2-a.md
@@ -70,7 +70,7 @@ $deepmd_source_dir/examples/water/se_e2_a/input.json
 
 With the training input script, data are also provided in the example directory. One may train the model with the DeePMD-kit from the directory.
 
-The construction of the descriptor is given by section {ref}`descriptor <model/descriptor>`. An example of the descriptor is provided as follows
+The construction of the descriptor is given by section {ref}`descriptor <model[standard]/descriptor>`. An example of the descriptor is provided as follows
 
 ```json
 	"descriptor" :{
@@ -86,11 +86,11 @@ The construction of the descriptor is given by section {ref}`descriptor <model/d
 	}
 ```
 
-- The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_e2_a"`.
-- {ref}`rcut <model/descriptor[se_e2_a]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_e2_a]/rcut_smth>` gives where the smoothing starts.
-- {ref}`sel <model/descriptor[se_e2_a]/sel>` gives the maximum possible number of neighbors in the cut-off radius. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum possible number of neighbors with type `i`.
-- The {ref}`neuron <model/descriptor[se_e2_a]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-- If the option {ref}`type_one_side <model/descriptor[se_e2_a]/type_one_side>` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.
-- The {ref}`axis_neuron <model/descriptor[se_e2_a]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
-- If the option {ref}`resnet_dt <model/descriptor[se_e2_a]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
-- {ref}`seed <model/descriptor[se_e2_a]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
+- The {ref}`type <model[standard]/descriptor/type>` of the descriptor is set to `"se_e2_a"`.
+- {ref}`rcut <model[standard]/descriptor[se_e2_a]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model[standard]/descriptor[se_e2_a]/rcut_smth>` gives where the smoothing starts.
+- {ref}`sel <model[standard]/descriptor[se_e2_a]/sel>` gives the maximum possible number of neighbors in the cut-off radius. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum possible number of neighbors with type `i`.
+- The {ref}`neuron <model[standard]/descriptor[se_e2_a]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- If the option {ref}`type_one_side <model[standard]/descriptor[se_e2_a]/type_one_side>` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.
+- The {ref}`axis_neuron <model[standard]/descriptor[se_e2_a]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
+- If the option {ref}`resnet_dt <model[standard]/descriptor[se_e2_a]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
+- {ref}`seed <model[standard]/descriptor[se_e2_a]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
diff --git a/doc/model/train-se-e2-r.md b/doc/model/train-se-e2-r.md
index f427310196..316bde43b4 100644
--- a/doc/model/train-se-e2-r.md
+++ b/doc/model/train-se-e2-r.md
@@ -52,7 +52,7 @@ A complete training input script of this example can be found in the directory
 $deepmd_source_dir/examples/water/se_e2_r/input.json
 ```
 
-The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the {ref}`descriptor <model/descriptor>` section
+The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the {ref}`descriptor <model[standard]/descriptor>` section
 
 ```json
 	"descriptor": {
@@ -68,4 +68,4 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m
 	},
 ```
 
-The type of the descriptor is set by the key {ref}`type <model/descriptor/type>`.
+The type of the descriptor is set by the key {ref}`type <model[standard]/descriptor/type>`.
diff --git a/doc/model/train-se-e3-tebd.md b/doc/model/train-se-e3-tebd.md
index 8b49b0c220..5935a8920a 100644
--- a/doc/model/train-se-e3-tebd.md
+++ b/doc/model/train-se-e3-tebd.md
@@ -56,7 +56,7 @@ A complete training input script of this example can be found in the directory
 $deepmd_source_dir/examples/water/se_e3_tebd/input.json
 ```
 
-The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the {ref}`descriptor <model/descriptor>` section
+The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the {ref}`descriptor <model[standard]/descriptor>` section
 
 ```json
 	"descriptor": {
@@ -75,4 +75,4 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m
 	},
 ```
 
-The type of the descriptor is set by the key {ref}`type <model/descriptor/type>`.
+The type of the descriptor is set by the key {ref}`type <model[standard]/descriptor/type>`.
diff --git a/doc/model/train-se-e3.md b/doc/model/train-se-e3.md
index d650d72493..3d82c42c9e 100644
--- a/doc/model/train-se-e3.md
+++ b/doc/model/train-se-e3.md
@@ -48,7 +48,7 @@ A complete training input script of this example can be found in the directory
 $deepmd_source_dir/examples/water/se_e3/input.json
 ```
 
-The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the `descriptor <model/descriptor>` section
+The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the `descriptor <model[standard]/descriptor>` section
 
 ```json
 	"descriptor": {
@@ -63,4 +63,4 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m
 	},
 ```
 
-The type of the descriptor is set by the key {ref}`type <model/descriptor/type>`.
+The type of the descriptor is set by the key {ref}`type <model[standard]/descriptor/type>`.
diff --git a/doc/train/finetuning.md b/doc/train/finetuning.md
index 4fbe95b2fd..669d1319bd 100644
--- a/doc/train/finetuning.md
+++ b/doc/train/finetuning.md
@@ -36,7 +36,7 @@ The elements in the training dataset must be contained in the pre-trained datase
 
 The finetune procedure will inherit the model structures in `pretrained.pb`,
 and thus it will ignore the model parameters in `input.json`,
-such as {ref}`descriptor <model/descriptor>`, {ref}`fitting_net <model/fitting_net>`,
+such as {ref}`descriptor <model[standard]/descriptor>`, {ref}`fitting_net <model[standard]/fitting_net>`,
 {ref}`type_embedding <model/type_embedding>` and {ref}`type_map <model/type_map>`.
 However, you can still set the `trainable` parameters in each part of `input.json` to control the training procedure.
 
diff --git a/doc/train/gpu-limitations.md b/doc/train/gpu-limitations.md
index 92577fd65c..44c9697dd4 100644
--- a/doc/train/gpu-limitations.md
+++ b/doc/train/gpu-limitations.md
@@ -5,5 +5,5 @@ If you use DeePMD-kit in a GPU environment, the acceptable value range of some v
 1. The number of atom types of a given system must be less than 128.
 2. The maximum distance between an atom and its neighbors must be less than 128. It can be controlled by setting the rcut value of training parameters.
 3. Theoretically, the maximum number of atoms that a single GPU can accept is about 10,000,000. However, this value is limited by the GPU memory size currently, usually within 1000,000 atoms even in the model compression mode.
-4. The total sel value of training parameters(in `model/descriptor` section) must be less than 4096.
+4. The total sel value of training parameters(in `model[standard]/descriptor` section) must be less than 4096.
 5. The size of the last layer of the embedding net must be less than 1024 during the model compression process.
diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md
index 9be12e9fb8..d21feb2126 100644
--- a/doc/train/training-advanced.md
+++ b/doc/train/training-advanced.md
@@ -114,7 +114,7 @@ The section {ref}`mixed_precision <training/mixed_precision>` specifies the mixe
 - {ref}`output_prec <training/mixed_precision/output_prec>` precision used in the output tensors, only `float32` is supported currently.
 - {ref}`compute_prec <training/mixed_precision/compute_prec>` precision used in the computing tensors, only `float16` is supported currently.
   Note there are several limitations about mixed precision training:
-- Only {ref}`se_e2_a <model/descriptor[se_e2_a]>` type descriptor is supported by the mixed precision training workflow.
+- Only {ref}`se_e2_a <model[standard]/descriptor[se_e2_a]>` type descriptor is supported by the mixed precision training workflow.
 - The precision of the embedding net and the fitting net are forced to be set to `float32`.
 
 Other keys in the {ref}`training <training>` section are explained below:

From 3e7e40cc7323967976fdbadf95ba6760a90fd9ec Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 22 Oct 2024 17:00:01 -0400
Subject: [PATCH 2/4] Update doc/model/index.rst

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 doc/model/index.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/model/index.rst b/doc/model/index.rst
index c067ea4207..8409d4ce97 100644
--- a/doc/model/index.rst
+++ b/doc/model/index.rst
@@ -24,4 +24,3 @@ Model
    linear
    pairtab
    change-bias
-   precision

From 178cfc6fa9029eb22ee78deb0a18729d8c62a2e3 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 22 Oct 2024 23:39:01 -0400
Subject: [PATCH 3/4] insert links to doc

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/utils/argcheck.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index c3fce807d5..239a6a5755 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1397,16 +1397,19 @@ def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
     link_se_atten_v2 = make_link(
         "se_atten_v2", "model[standard]/descriptor[se_atten_v2]"
     )
-    doc_descrpt_type = "The type of the descritpor. See explanation below. \n\n\
-- `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
-- `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
-- `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
-- `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\
-- `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\
-- `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\
-- `se_atten_v2`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.\n\n\
-- `se_a_mask`: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms. \n\n\
-- `hybrid`: Concatenate of a list of descriptors as a new descriptor."
+    link_se_a_mask = make_link(
+        "se_a_mask", "model[standard]/descriptor[se_a_mask]"
+    )
+    doc_descrpt_type = f"The type of the descritpor. See explanation below. \n\n\
+- {link_lf}: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
+- {link_se_e2_a}: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
+- {link_se_e2_r}: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
+- {link_se_e3}: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\
+- {link_se_a_tpe}: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\
+- {link_se_atten}: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\
+- {link_se_atten_v2}: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.\n\n\
+- {link_se_a_mask}: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms. \n\n\
+- {link_hybrid}: Concatenate of a list of descriptors as a new descriptor."
 
     return Variant(
         "type",

From 3de01405cf27442540f3b8f98c678ffe4107cbe5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 23 Oct 2024 03:40:08 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 deepmd/utils/argcheck.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 239a6a5755..b3f3b26fd0 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1397,9 +1397,7 @@ def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
     link_se_atten_v2 = make_link(
         "se_atten_v2", "model[standard]/descriptor[se_atten_v2]"
     )
-    link_se_a_mask = make_link(
-        "se_a_mask", "model[standard]/descriptor[se_a_mask]"
-    )
+    link_se_a_mask = make_link("se_a_mask", "model[standard]/descriptor[se_a_mask]")
     doc_descrpt_type = f"The type of the descritpor. See explanation below. \n\n\
 - {link_lf}: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
 - {link_se_e2_a}: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\