-
Notifications
You must be signed in to change notification settings - Fork 1
/
water-deepmd-e0-f1.yaml
147 lines (121 loc) · 9.97 KB
/
water-deepmd-e0-f1.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
root: water-deepmd-e0-f1-root
run_name: water-deepmd-e0-f1-run_name
workdir: water-deepmd-e0-f1-workdir
requeue: true
seed: 0 # random number seed for numpy and torch
append: true # set True if a restarted run should append to the previous log file
default_dtype: float32 # type of float to use, e.g. float32 and float64
allow_tf32: false # whether to use TensorFloat32 if it is available
# network
r_max: 6.0 # cutoff radius in length units
num_layers: 6 # number of interaction blocks, we found 5-6 to work best
chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species
feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
irreps_edge_sh: 0e + 1o + 2e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block
nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended
nonlinearity_scalars:
e: silu
o: tanh
nonlinearity_gates:
e: silu
o: tanh
resnet: false # set true to make interaction block a resnet-style update
num_basis: 8 # number of basis functions used in the radial basis
BesselBasis_trainable: true # set true to train the bessel weights
PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function
# radial network
invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2
invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster
avg_num_neighbors: 90 # number of neighbors to divide by, None => no normalization.
use_sc: true # use self-connection or not, usually gives big improvement
compile_model: false # whether to compile the constructed model to TorchScript
# data set
# the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
# key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
# all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
dataset: ase # type of data set, can be npz or ase
dataset_file_name: path-to-nequip-data/water-deepmd/water-pbe0ts/deepmd-water-183-preselected.xyz
ase_args:
format: extxyz
# logging
wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional
wandb_project: nequip-paper-results-water-deepmd # project name used in wandb
wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated.
# if false, a new wandb run will be generated
verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive
log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch
log_epoch_freq: 1 # epoch frequency, how often to print and save the model
save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive.
save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
# training
n_train: 133 # number of training data
n_val: 50 # number of validation data
learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best
batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5)
max_epochs: 1000000 # stop training after _ number of epochs
train_val_split: random # can be random or sequential
shuffle: true # If true, the data loader will shuffle the data, usually a good idea
metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
ema_decay: 0.99 # ema weight, commonly set to 0.999
ema_use_num_updates: true # whether to use number of updates when computing averages
# early stopping based on metrics values.
# LR, wall and any keys printed in the log file can be used.
# The key can start with Training or Validation. If not defined, the validation value will be used.
early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs
Validation_loss: 1000 #
early_stopping_lower_bounds: # stop early if a metric value is lower than the bound
LR: 1.0e-6 #
loss_coeffs:
forces:
- 1.
total_energy:
- 0.
# output metrics
metrics_components:
- - forces # key
- rmse # "rmse" or "mse"
- PerSpecies: True # if true, per species contribution is counted separately
report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
- - forces
- mae
- PerSpecies: True
report_per_component: False
- - total_energy
- mae
- - total_energy
- rmse
# optimizer, may be any optimizer defined in torch.optim
# the name `optimizer_name`is case sensitive
optimizer_name: Adam # default optimizer is Adam in the amsgrad mode
optimizer_amsgrad: true
optimizer_betas: !!python/tuple
- 0.9
- 0.999
optimizer_eps: 1.0e-08
optimizer_weight_decay: 0
# lr scheduler, currently only supports the two options listed below, if you need more please file an issue
# first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
lr_scheduler_name: ReduceLROnPlateau
lr_scheduler_patience: 50
lr_scheduler_factor: 0.8
# the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
# in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
# whether to apply a shift and scale, defined per-species, to the atomic energies
PerSpeciesScaleShift_enable: true
# if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
PerSpeciesScaleShift_trainable: true
# optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
PerSpeciesScaleShift_shifts: [-155.91459812556295, -155.91459812556295]
# optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
PerSpeciesScaleShift_scales: [1.966191139611105, 1.966191139611105]
# global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
global_rescale_shift: null
# global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
# If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
global_rescale_scale: null
# whether the shift of the final global energy rescaling should be trainable
trainable_global_rescale_shift: false
# whether the scale of the final global energy rescaling should be trainable
trainable_global_rescale_scale: false