example/benchmark/config/model_config.json5

{
  model: {
    // model types: 'mopp', 'more', 'saber'
    model_name: 'td3_bc',
    // Parameters for each model
    // Imitation algorithms
    bc: {
      // set the model training schedule ('b': behavior, 'd': dynamics, 'q': Q-value function, 'vae_s': vae_s, 'agent': agent)
      train_schedule: ['agent'],
      hyper_params: {
        model_params: {p: [[256, 256],]},
        // p
        optimizers: {p: ['adam', 3e-4],},
      },
    },
    td3_bc: {
      train_schedule: ['agent'],
      hyper_params: {
        model_params: {q: [[256, 256], 2], p: [[256, 256],]},
        optimizers: {q: ['adam', 3e-4], p: ['adam', 3e-4]}
      }
    },
    doge: {
      train_schedule: ['agent'],
      hyper_params: {
        model_params: {q: [[256, 256], 2], p: [[256, 256],], distance: [[256, 256, 256],]},
        optimizers: {q: ['adam', 3e-4], p: ['adam', 3e-4], distance:  ['adam', 1e-3]},
        alpha: 17.5,
        lambda_lr: 0.0003,
        initial_lambda: 5,
        train_d_steps: 100000,
        N: 20,
      }
    },
    h2o: {
      train_schedule: ['agent'],
      hyper_params: {
        model_params: {q: [[256, 256], 2], p: [[256, 256],], dsa: [[256, 256],], dsas: [[256, 256],]},
        optimizers: {q: ['adam', 3e-4], p: ['adam', 3e-4], dsa: ['adam', 3e-4], dsas: ['adam', 3e-4], alpha: ['adam', 3e-4], alpha_prime: ['adam', 3e-4]},
      }
    },
    dmil: {
      train_schedule: ['agent'],
      hyper_params: {
        model_params: {f: [[256, 256],], p: [[256, 256],], d:[[512, 512],]},
        optimizers: {f: ['adam', 1e-4], p: ['adam', 3e-4], d: ['adam', 1e-4]},
        rollout_size: null,
      }
    },
    iql: {
      train_schedule: ['agent'],
      hyper_params: {
        model_params: {v: [[256, 256],], q: [[256, 256], 2], p: [[256, 256],]},
        optimizers: {v: ['adam', 3e-4], q: ['adam', 3e-4], p: ['adam', 3e-4]}
      }
    },
  },
  env: {
    basic_info: {state_dim: null, state_min: null, state_max: null,
      action_dim: null, action_min: null, action_max: null},
    // Parameters for Env which is provided externally
    external: {benchmark_name: 'd4rl', data_source: 'mujoco', env_name: 'Hopper-v2', data_name: 'hopper_random-v2',
    data_file_path: null, state_normalize: false, reward_normalize: false, num_transitions: -1,
    score_normalize: false, score_norm_min: null, score_norm_max: null},
    learned: {
      // 'mlp', 'prob', 'rnn', 'adm'
      dynamic_module_type: 'prob',
      // If the dynamics predict the reward or not.
      with_reward: true,
      // parameters for each type of dynmamics models
      prob: {
        model_params: [[200, 200], 3],  // network structure & ensemble net number.
        optimizers: ['adam', 1e-3],
        local_mode: true,
      },
      mlp: {
        model_params: [[200, 200], 3],
        optimizers: ['adam', 1e-3],
      },
    }
  },
  train: {
    device: 'cuda',
    data_loader_name: null,  // 'app' for real-world application data.
    action_noise: null,  // The std value of the gaussian noise added to the action.
    data_split_ratio: null, // The ratio for splitting the dataset. It should be in (0, 1].
    test_data_ratio: 0.1,
    batch_size: 64,
    model_buffer_size: null,  // The capacity of the empty buffer.
    weight_decays: 0.0,
    update_freq: 1,
    update_rate: 0.005,
    discount: 0.99,
    total_train_steps: 10000,
    summary_freq: 100,
    print_freq: 1000,
    save_freq: 10000,
    eval_freq: 10000,
    // parameters for model files
    model_dir: 'models',
    behavior_ckpt_name: 'b',
    dynamics_ckpt_name: 'd',
    q_ckpt_name: 'q',
    vae_s_ckpt_name: 'vae_s',
    agent_ckpt_name: 'agent',
    seed: 1,
    // parameters for Wandb logger
    wandb: {entity: null, project: null, name: null, reinit: false, mode: 'online'}
  },
  eval: {
    n_eval_episodes: 10,
    episode_step: 10,
    log_dir: 'eval',
    start: 0.,
    steps: 100,
    ope: {
      fqe: {train_steps: 250000, model_params: [[1024, 1024, 1024, 1024], 1], optimizers: ['adam', 1e-4],
      update_freq: 100, update_rate: 1, start: 0, eval_steps: 100},
      mb_ope: {discount: 0.99, episode_steps: 30, start: 0, eval_steps: 100}
    }
  },
  interface: {
    policy_file: null,
    log_path: null,
  }
}