diff --git a/alfredo/agents/aant/aant.py b/alfredo/agents/aant/aant.py index 9003125..f65b796 100644 --- a/alfredo/agents/aant/aant.py +++ b/alfredo/agents/aant/aant.py @@ -157,12 +157,12 @@ def step(self, state: State, action: jax.Array) -> State: ctrl_cost = rControl_act_ss(self.sys, pipeline_state, action, - weight=0.0) + weight=-self._ctrl_cost_weight) torque_cost = rTorques(self.sys, pipeline_state, action, - weight=0.0) + weight=-0.003) upright_reward = rUpright(self.sys, pipeline_state, diff --git a/experiments/AAnt-locomotion/training.py b/experiments/AAnt-locomotion/training.py index 182b0cc..fe40455 100644 --- a/experiments/AAnt-locomotion/training.py +++ b/experiments/AAnt-locomotion/training.py @@ -28,7 +28,7 @@ "backend": "positional", "seed": 1, "len_training": 1_500_000, - "num_evals": 200, + "num_evals": 500, "num_envs": 2048, "batch_size": 2048, "num_minibatches": 8, @@ -81,7 +81,10 @@ def progress(num_steps, metrics): scenes_fp = os.path.dirname(scenes.__file__) -env_xml_paths = [f"{scenes_fp}/flatworld/flatworld_A1_env.xml"] +env_xml_paths = [f"{scenes_fp}/flatworld/flatworld_A1_env.xml", + f"{scenes_fp}/flatworld/flatworld_A1_env.xml", + f"{scenes_fp}/flatworld/flatworld_A1_env.xml", + f"{scenes_fp}/flatworld/flatworld_A1_env.xml"] # make and save initial ppo_network key = jax.random.PRNGKey(wandb.config.seed) @@ -115,7 +118,7 @@ def progress(num_steps, metrics): # ============================ # Training & Saving Params # ============================ -i = 0 +i = 8 for p in env_xml_paths: diff --git a/experiments/AAnt-locomotion/vis_traj.py b/experiments/AAnt-locomotion/vis_traj.py index 603c178..b1df220 100644 --- a/experiments/AAnt-locomotion/vis_traj.py +++ b/experiments/AAnt-locomotion/vis_traj.py @@ -79,7 +79,7 @@ #yaw_vel = 0.0 # rad/s #jcmd = jp.array([x_vel, y_vel, yaw_vel]) -wcmd = jp.array([-10.0, 10.0]) +wcmd = jp.array([10.0, 10.0]) # generate policy rollout for _ in range(episode_length):