-
Notifications
You must be signed in to change notification settings - Fork 1
/
model_config.json5
126 lines (126 loc) · 4.14 KB
/
model_config.json5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
{
model: {
// model types: 'mopp', 'more', 'saber'
model_name: 'td3_bc',
// Parameters for each model
// Imitation algorithms
bc: {
// set the model training schedule ('b': behavior, 'd': dynamics, 'q': Q-value function, 'vae_s': vae_s, 'agent': agent)
train_schedule: ['agent'],
hyper_params: {
model_params: {p: [[256, 256],]},
// p
optimizers: {p: ['adam', 3e-4],},
},
},
td3_bc: {
train_schedule: ['agent'],
hyper_params: {
model_params: {q: [[256, 256], 2], p: [[256, 256],]},
optimizers: {q: ['adam', 3e-4], p: ['adam', 3e-4]}
}
},
doge: {
train_schedule: ['agent'],
hyper_params: {
model_params: {q: [[256, 256], 2], p: [[256, 256],], distance: [[256, 256, 256],]},
optimizers: {q: ['adam', 3e-4], p: ['adam', 3e-4], distance: ['adam', 1e-3]},
alpha: 17.5,
lambda_lr: 0.0003,
initial_lambda: 5,
train_d_steps: 100000,
N: 20,
}
},
h2o: {
train_schedule: ['agent'],
hyper_params: {
model_params: {q: [[256, 256], 2], p: [[256, 256],], dsa: [[256, 256],], dsas: [[256, 256],]},
optimizers: {q: ['adam', 3e-4], p: ['adam', 3e-4], dsa: ['adam', 3e-4], dsas: ['adam', 3e-4], alpha: ['adam', 3e-4], alpha_prime: ['adam', 3e-4]},
}
},
dmil: {
train_schedule: ['agent'],
hyper_params: {
model_params: {f: [[256, 256],], p: [[256, 256],], d:[[512, 512],]},
optimizers: {f: ['adam', 1e-4], p: ['adam', 3e-4], d: ['adam', 1e-4]},
rollout_size: null,
}
},
iql: {
train_schedule: ['agent'],
hyper_params: {
model_params: {v: [[256, 256],], q: [[256, 256], 2], p: [[256, 256],]},
optimizers: {v: ['adam', 3e-4], q: ['adam', 3e-4], p: ['adam', 3e-4]}
}
},
},
env: {
basic_info: {state_dim: null, state_min: null, state_max: null,
action_dim: null, action_min: null, action_max: null},
// Parameters for Env which is provided externally
external: {benchmark_name: 'd4rl', data_source: 'mujoco', env_name: 'Hopper-v2', data_name: 'hopper_random-v2',
data_file_path: null, state_normalize: false, reward_normalize: false, num_transitions: -1,
score_normalize: false, score_norm_min: null, score_norm_max: null},
learned: {
// 'mlp', 'prob', 'rnn', 'adm'
dynamic_module_type: 'prob',
// If the dynamics predict the reward or not.
with_reward: true,
// parameters for each type of dynmamics models
prob: {
model_params: [[200, 200], 3], // network structure & ensemble net number.
optimizers: ['adam', 1e-3],
local_mode: true,
},
mlp: {
model_params: [[200, 200], 3],
optimizers: ['adam', 1e-3],
},
}
},
train: {
device: 'cuda',
data_loader_name: null, // 'app' for real-world application data.
action_noise: null, // The std value of the gaussian noise added to the action.
data_split_ratio: null, // The ratio for splitting the dataset. It should be in (0, 1].
test_data_ratio: 0.1,
batch_size: 64,
model_buffer_size: null, // The capacity of the empty buffer.
weight_decays: 0.0,
update_freq: 1,
update_rate: 0.005,
discount: 0.99,
total_train_steps: 10000,
summary_freq: 100,
print_freq: 1000,
save_freq: 10000,
eval_freq: 10000,
// parameters for model files
model_dir: 'models',
behavior_ckpt_name: 'b',
dynamics_ckpt_name: 'd',
q_ckpt_name: 'q',
vae_s_ckpt_name: 'vae_s',
agent_ckpt_name: 'agent',
seed: 1,
// parameters for Wandb logger
wandb: {entity: null, project: null, name: null, reinit: false, mode: 'online'}
},
eval: {
n_eval_episodes: 10,
episode_step: 10,
log_dir: 'eval',
start: 0.,
steps: 100,
ope: {
fqe: {train_steps: 250000, model_params: [[1024, 1024, 1024, 1024], 1], optimizers: ['adam', 1e-4],
update_freq: 100, update_rate: 1, start: 0, eval_steps: 100},
mb_ope: {discount: 0.99, episode_steps: 30, start: 0, eval_steps: 100}
}
},
interface: {
policy_file: null,
log_path: null,
}
}