Skip to content

Commit

Permalink
fixed some typos and implemented a torque cost reward scheme
Browse files Browse the repository at this point in the history
  • Loading branch information
mginoya committed Nov 19, 2023
1 parent f1896f7 commit 95e6467
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 7 deletions.
12 changes: 10 additions & 2 deletions alfredo/agents/A1/alfredo_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from alfredo.rewards import rHealthy_simple_z
from alfredo.rewards import rSpeed_X
from alfredo.rewards import rControl_act_ss
from alfredo.rewards import rTorques

class Alfredo(PipelineEnv):
# pyformat: disable
Expand All @@ -24,7 +25,7 @@ def __init__(
self,
forward_reward_weight=1.25,
ctrl_cost_weight=0.1,
healthy_reward=5.0,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(1.0, 2.0),
reset_noise_scale=1e-2,
Expand Down Expand Up @@ -120,6 +121,7 @@ def reset(self, rng: jp.ndarray) -> State:
"reward_ctrl": zero,
"reward_alive": zero,
"reward_velocity": zero,
"reward_torque":zero,
"agent_x_position": zero,
"agent_y_position": zero,
"agent_x_velocity": zero,
Expand Down Expand Up @@ -149,21 +151,27 @@ def step(self, state: State, action: jp.ndarray) -> State:
action,
weight=-self._ctrl_cost_weight)

torque_cost = rTorques(self.sys,
state.pipeline_state,
action,
weight=-0.0003)

healthy_reward = rHealthy_simple_z(self.sys,
state.pipeline_state,
self._healthy_z_range,
early_terminate=self._terminate_when_unhealthy,
weight=self._healthy_reward,
focus_idx_range=(0, 2))

reward = healthy_reward[0] + ctrl_cost + x_speed_reward[0]
reward = healthy_reward[0] + ctrl_cost + x_speed_reward[0] + torque_cost

done = 1.0 - healthy_reward[1] if self._terminate_when_unhealthy else 0.0

state.metrics.update(
reward_ctrl=ctrl_cost,
reward_alive=healthy_reward[0],
reward_velocity=x_speed_reward[0],
reward_torque=torque_cost,
agent_x_position=com_after[0],
agent_y_position=com_after[1],
agent_x_velocity=x_speed_reward[1],
Expand Down
1 change: 1 addition & 0 deletions alfredo/rewards/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .rSpeed import *
from .rHealthy import *
from .rControl import *
from .rEnergy import *
2 changes: 1 addition & 1 deletion alfredo/rewards/rConstant.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
def rConstant(sys: base.System,
pipeline_state: base.State,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:
focus_idx_range=(0, -1)) -> jp.ndarray:

return jp.array([weight])
2 changes: 1 addition & 1 deletion alfredo/rewards/rControl.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def rControl_act_ss(sys: base.System,
pipeline_state: base.State,
action: jp.ndarray,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:
focus_idx_range=(0, -1)) -> jp.ndarray:

ctrl_cost = weight * jp.sum(jp.square(action))

Expand Down
28 changes: 28 additions & 0 deletions alfredo/rewards/rEnergy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Tuple

import jax
from brax import actuator, base, math
from brax.envs import PipelineEnv, State
from brax.io import mjcf
from etils import epath
from jax import numpy as jp


def rTorques(sys: base.System,
pipeline_state: base.State,
action: jp.ndarray,
weight=1.0,
focus_idx_range=(0, -1)) -> jp.ndarray:

s_idx = focus_idx_range[0]
e_idx = focus_idx_range[1]

torque = actuator.to_tau(sys,
action,
pipeline_state.q[s_idx:e_idx],
pipeline_state.qd[s_idx:e_idx])


tr = jp.sqrt(jp.sum(jp.square(torque))) + jp.sum(jp.abs(torque))

return weight*tr
2 changes: 1 addition & 1 deletion alfredo/rewards/rHealthy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def rHealthy_simple_z(sys: base.System,
z_range: Tuple,
early_terminate: True,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:
focus_idx_range=(0, -1)) -> jp.ndarray:

min_z, max_z = z_range
focus_s = focus_idx_range[0]
Expand Down
4 changes: 2 additions & 2 deletions alfredo/rewards/rSpeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def rSpeed_X(sys: base.System,
CoM_now: jp.ndarray,
dt,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:
focus_idx_range=(0, -1)) -> jp.ndarray:


velocity = (CoM_now - CoM_prev) / dt
Expand All @@ -31,7 +31,7 @@ def rSpeed_Y(sys: base.System,
CoM_now: jp.ndarray,
dt,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:
focus_idx_range=(0, -1)) -> jp.ndarray:


velocity = (CoM_now - CoM_prev) / dt
Expand Down
1 change: 1 addition & 0 deletions experiments/Alfredo-simple-walk/seq_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def progress(num_steps, metrics):
"Vel Reward": metrics["eval/episode_reward_velocity"],
"Alive Reward": metrics["eval/episode_reward_alive"],
"Ctrl Reward": metrics["eval/episode_reward_ctrl"],
"Torque Reward": metrics["eval/episode_reward_torque"],
"a_vel_x": metrics["eval/episode_agent_x_velocity"],
"a_vel_y": metrics["eval/episode_agent_y_velocity"],
}
Expand Down

0 comments on commit 95e6467

Please sign in to comment.