Skip to content

Commit

Permalink
Merge pull request #33 from tartavull/env-reward-composit
Browse files Browse the repository at this point in the history
Splitting Env, Rewards & Agents
  • Loading branch information
mginoya authored Nov 8, 2023
2 parents 63f4eaf + cbcdbc5 commit ae5c7f1
Show file tree
Hide file tree
Showing 14 changed files with 482 additions and 44 deletions.
102 changes: 102 additions & 0 deletions alfredo/agents/A1/a1.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
<agent>

<body name="alfredo" pos="0 0 1.4">
<camera name="followme" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1" />

<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free" />
<geom fromto="-.07 0 -.035 .07 0 -.035" name="torso1" size="0.13" type="box" />
<geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258" />

<body name="upper_arm_r" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="1 0 0" name="shoulder_1_r" pos="0 0 0" range="-75 85" stiffness="1" type="hinge" />
<joint armature="0.0051" axis="0 0 1" name="shoulder_2_r" pos="0 0 0" range="-60 85" stiffness="1" type="hinge" />
<geom fromto="0 0 0 0 -.277 0" name="triceps_r" size="0.04 0.16" type="capsule" />

<body name="lower_arm_r" pos="0 -.29 0">
<joint armature="0.0028" axis="0 0 -1" name="elbow_r" pos="0 0 0" range="-90 10" stiffness="0" type="hinge" />
<geom fromto="0.01 0.01 0.01 0 -.295 0" name="forearm_r" size="0.031" type="capsule" />
<geom contype="1" name="gripper_r" pos="0.0 -.3 0.0" size="0.04" type="sphere" />
<camera pos="0 0 0" />
</body>
</body>

<body name="upper_arm_l" pos="0 0.17 0.06">
<joint armature="0.0068" axis="-1 0 0" name="shoulder_1_l" pos="0 0 0" range="-75 85" stiffness="1" type="hinge" />
<joint armature="0.0051" axis="0 0 -1" name="shoulder_2_l" pos="0 0 0" range="-60 85" stiffness="1" type="hinge" />
<geom fromto="0 0 0 0 .277 0" name="triceps_l" size="0.04 0.16" type="capsule" />

<body name="lower_arm_l" pos="0 .29 0">
<joint armature="0.0028" axis="0 0 1" name="elbow_l" pos="0 0 0" range="-90 10" stiffness="0" type="hinge" />
<geom fromto="0.01 -0.01 0.01 0 .295 0" name="forearm_l" size="0.031" type="capsule" />
<geom contype="1" name="gripper_l" pos="0.0 0.3 0.0" size="0.04" type="sphere" />
</body>
</body>

<body name="pelv" pos="0 0 -0.270" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="0 0 1" damping="5" name="ab_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge" />
<joint armature="0.02" axis="0 1 0" damping="5" name="ab_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge" />
<joint armature="0.02" axis="1 0 0" damping="5" name="ab_x" pos="0 0 0.065" range="-35 35" stiffness="10" type="hinge" />
<geom fromto="-.02 -.07 0 -.02 .07 0" name="booty" size="0.09" type="capsule" />

<body name="thigh_r" pos="0 -0.1 -0.04">
<joint armature="0.01" axis="1 0 0" damping="5" name="hip_r_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge" />
<joint armature="0.01" axis="0 0 1" damping="5" name="hip_r_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge" />
<joint armature="0.0080" axis="0 1 0" damping="5" name="hip_r_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge" />
<geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule" />

<body name="shin_r" pos="0 0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="knee_r" pos="0 0 .02" range="-160 -2" type="hinge" />
<geom fromto="0 0 0 0 0 -.3" name="calves_r" size="0.049" type="capsule" />

<body name="foot_r" pos="0 0 -0.37">
<joint armature="0.006" axis="0 -1 0" name="ankle_r" pos="0 0 0" range="-45 20" type="hinge" />
<geom contype="1" fromto="-0.07 0 0 0.150 0 0" name="footm_r" size="0.042983" type="box" user="0" density="200" />
</body>
</body>
</body>

<body name="thigh_l" pos="0 0.1 -0.04">
<joint armature="0.01" axis="-1 0 0" damping="5" name="hip_l_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge" />
<joint armature="0.01" axis="0 0 -1" damping="5" name="hip_l_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge" />
<joint armature="0.01" axis="0 1 0" damping="5" name="hip_l_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge" />
<geom fromto="0 0 0 0 -0.01 -.34" name="quad_l" size="0.06" type="capsule" />

<body name="shin_l" pos="0 -0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="knee_l" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge" />
<geom fromto="0 0 0 0 0 -.3" name="calves_l" size="0.049" type="capsule" />

<body name="foot_l" pos="0 0 -0.37">
<joint armature="0.006" axis="0 -1 0" name="ankle_l" pos="0 0 0" range="-45 20" type="hinge" />
<geom contype="1" fromto="-0.07 0 0 0.150 0 0" name="footm_l" size="0.042983" type="box" user="0" density="200"/>
</body>
</body>
</body>
</body>
<!--/body-->
</body>

<actuator>

<motor gear="100" joint="ab_y" name="ab_y" />
<motor gear="100" joint="ab_z" name="ab_z" />
<motor gear="100" joint="ab_x" name="ab_x" />
<motor gear="100" joint="hip_r_x" name="hip_r_x" />
<motor gear="100" joint="hip_r_z" name="hip_r_z" />
<motor gear="300" joint="hip_r_y" name="hip_r_y" />
<motor gear="200" joint="knee_r" name="knee_r" />
<motor gear="200" joint="ankle_r" name="ankle_r" />
<motor gear="100" joint="hip_l_x" name="hip_l_x" />
<motor gear="100" joint="hip_l_z" name="hip_l_z" />
<motor gear="300" joint="hip_l_y" name="hip_l_y" />
<motor gear="200" joint="knee_l" name="knee_l" />
<motor gear="200" joint="ankle_l" name="ankle_l" />
<motor gear="25" joint="shoulder_1_r" name="shoulder_1_r" />
<motor gear="25" joint="shoulder_2_r" name="shoulder_2_r" />
<motor gear="25" joint="elbow_r" name="elbow_r" />
<motor gear="25" joint="shoulder_1_l" name="shoulder_1_l" />
<motor gear="25" joint="shoulder_2_l" name="shoulder_2_l" />
<motor gear="25" joint="elbow_l" name="elbow_l" />

</actuator>

</agent>
85 changes: 54 additions & 31 deletions alfredo/agents/A1/alfredo_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
from etils import epath
from jax import numpy as jp

from alfredo.tools import compose_scene
from alfredo.rewards import rConstant
from alfredo.rewards import rHealthy_simple_z
from alfredo.rewards import rSpeed_X
from alfredo.rewards import rControl_act_ss

class Alfredo(PipelineEnv):
# pyformat: disable
Expand All @@ -28,15 +33,26 @@ def __init__(
**kwargs,
):

# forcing this model to need an input paramFile_path
# will throw error if this is not included in kwargs
# forcing this model to need an input scene_xml_path or
# the combination of env_xml_path and agent_xml_path
# if none of these options are present, an error will be thrown
path=""

if "paramFile_path" in kwargs:
path = kwargs["paramFile_path"]
del kwargs["paramFile_path"]

sys = mjcf.load(path)
if "env_xml_path" and "agent_xml_path" in kwargs:
env_xp = kwargs["env_xml_path"]
agent_xp = kwargs["agent_xml_path"]
xml_scene = compose_scene(env_xp, agent_xp)
del kwargs["env_xml_path"]
del kwargs["agent_xml_path"]

sys = mjcf.loads(xml_scene)

# this is vestigial - get rid of this someday soon
if "scene_xml_path" in kwargs:
path = kwargs["scene_xml_path"]
del kwargs["scene_xml_path"]

sys = mjcf.load(path)

n_frames = 5

Expand Down Expand Up @@ -120,44 +136,51 @@ def step(self, state: State, action: jp.ndarray) -> State:

com_before, *_ = self._com(prev_pipeline_state)
com_after, *_ = self._com(pipeline_state)
a_velocity = (com_after - com_before) / self.dt

reward_vel = math.safe_norm(a_velocity)
forward_reward = self._forward_reward_weight * a_velocity[0] # * reward_vel
ctrl_cost = self._ctrl_cost_weight * jp.sum(jp.square(action))

min_z, max_z = self._healthy_z_range
is_healthy = jp.where(pipeline_state.x.pos[0, 2] < min_z, x=0.0, y=1.0)
is_healthy = jp.where(pipeline_state.x.pos[0, 2] > max_z, x=0.0, y=is_healthy)

if self._terminate_when_unhealthy:
healthy_reward = self._healthy_reward
else:
healthy_reward = self._healthy_reward * is_healthy

reward = healthy_reward - ctrl_cost + forward_reward

done = 1.0 - is_healthy if self._terminate_when_unhealthy else 0.0
x_speed_reward = rSpeed_X(self.sys,
state.pipeline_state,
CoM_prev=com_before,
CoM_now=com_after,
dt=self.dt,
weight=self._forward_reward_weight)

ctrl_cost = rControl_act_ss(self.sys,
state.pipeline_state,
action,
weight=-self._ctrl_cost_weight)

healthy_reward = rHealthy_simple_z(self.sys,
state.pipeline_state,
self._healthy_z_range,
early_terminate=self._terminate_when_unhealthy,
weight=self._healthy_reward,
focus_idx_range=(0, 2))

reward = healthy_reward[0] + ctrl_cost + x_speed_reward[0]

done = 1.0 - healthy_reward[1] if self._terminate_when_unhealthy else 0.0

state.metrics.update(
reward_ctrl=-ctrl_cost,
reward_alive=healthy_reward,
reward_velocity=forward_reward,
reward_ctrl=ctrl_cost,
reward_alive=healthy_reward[0],
reward_velocity=x_speed_reward[0],
agent_x_position=com_after[0],
agent_y_position=com_after[1],
agent_x_velocity=a_velocity[0],
agent_y_velocity=a_velocity[1],
agent_x_velocity=x_speed_reward[1],
agent_y_velocity=x_speed_reward[2],
)

return state.replace(
pipeline_state=pipeline_state, obs=obs, reward=reward, done=done
)

def _get_obs(self, pipeline_state: base.State, action: jp.ndarray) -> jp.ndarray:
"""Observes humanoid body position, velocities, and angles."""
"""Observes Alfredo's body position, velocities, and angles."""

a_positions = pipeline_state.q
a_velocities = pipeline_state.qd
#print(f"a_positions = {a_positions}")
#print(f"a_velocities = {a_velocities}")

if self._exclude_current_positions_from_observation:
a_positions = a_positions[2:]
Expand Down Expand Up @@ -194,7 +217,7 @@ def _get_obs(self, pipeline_state: base.State, action: jp.ndarray) -> jp.ndarray
)

def _com(self, pipeline_state: base.State) -> jp.ndarray:
"""Computes Center of Mass of the Humanoid"""
"""Computes Center of Mass of Alfredo"""

inertia = self.sys.link.inertia

Expand Down
4 changes: 4 additions & 0 deletions alfredo/rewards/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .rConstant import *
from .rSpeed import *
from .rHealthy import *
from .rControl import *
15 changes: 15 additions & 0 deletions alfredo/rewards/rConstant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from typing import Tuple

import jax
from brax import actuator, base, math
from brax.envs import PipelineEnv, State
from brax.io import mjcf
from etils import epath
from jax import numpy as jp

def rConstant(sys: base.System,
pipeline_state: base.State,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:

return jp.array([weight])
18 changes: 18 additions & 0 deletions alfredo/rewards/rControl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Tuple

import jax
from brax import actuator, base, math
from brax.envs import PipelineEnv, State
from brax.io import mjcf
from etils import epath
from jax import numpy as jp

def rControl_act_ss(sys: base.System,
pipeline_state: base.State,
action: jp.ndarray,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:

ctrl_cost = weight * jp.sum(jp.square(action))

return ctrl_cost
31 changes: 31 additions & 0 deletions alfredo/rewards/rHealthy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import Tuple

import jax
from brax import actuator, base, math
from brax.envs import PipelineEnv, State
from brax.io import mjcf
from etils import epath
from jax import numpy as jp

def rHealthy_simple_z(sys: base.System,
pipeline_state: base.State,
z_range: Tuple,
early_terminate: True,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:

min_z, max_z = z_range
focus_s = focus_idx_range[0]
focus_e = focus_idx_range[-1]

focus_x_pos = pipeline_state.x.pos[focus_s, focus_e]

is_healthy = jp.where(focus_x_pos < min_z, x=0.0, y=1.0)
is_healthy = jp.where(focus_x_pos > max_z, x=0.0, y=is_healthy)

if early_terminate:
hr = weight
else:
hr = weight * is_healthy

return jp.array([hr, is_healthy])
44 changes: 44 additions & 0 deletions alfredo/rewards/rSpeed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from typing import Tuple

import jax
from brax import actuator, base, math
from brax.envs import PipelineEnv, State
from brax.io import mjcf
from etils import epath
from jax import numpy as jp

def rSpeed_X(sys: base.System,
pipeline_state: base.State,
CoM_prev: jp.ndarray,
CoM_now: jp.ndarray,
dt,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:


velocity = (CoM_now - CoM_prev) / dt

focus_s = focus_idx_range[0]
focus_e = focus_idx_range[-1]

sxr = weight * velocity[0]

return jp.array([sxr, velocity[0], velocity[1]])

def rSpeed_Y(sys: base.System,
pipeline_state: base.State,
CoM_prev: jp.ndarray,
CoM_now: jp.ndarray,
dt,
weight=1.0,
focus_idx_range=(1, -1)) -> jp.ndarray:


velocity = (CoM_now - CoM_prev) / dt

focus_s = focus_idx_range[0]
focus_e = focus_idx_range[-1]

syr = weight * velocity[1]

return jp.array([syr, velocity[0], velocity[1]])
48 changes: 48 additions & 0 deletions alfredo/scenes/flatworld/flatworld_A1_env.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<mujoco model="alfredo">

<compiler angle="degree" inertiafromgeom="true" />

<default>
<joint armature="1" damping="1" limited="true" />
<geom conaffinity="0" condim="1" contype="0" material="geom" />
<motor ctrllimited="true" ctrlrange="-.4 .4" />
</default>

<option iterations="8" timestep="0.003" />

<custom>
<numeric data="2500" name="constraint_limit_stiffness" />
<numeric data="27000" name="constraint_stiffness" />
<numeric data="30" name="constraint_ang_damping" />
<numeric data="80" name="constraint_vel_damping" />
<numeric data="-0.05" name="ang_damping" />
<numeric data="0.5" name="joint_scale_pos" />
<numeric data="0.1" name="joint_scale_ang" />
<numeric data="0" name="spring_mass_scale" />
<numeric data="1" name="spring_inertia_scale" />
<numeric data="20" name="matrix_inv_iterations" />
<numeric data="15" name="solver_maxls" />
</custom>

<size nkey="5" nuser_geom="1" />

<visual>
<map fogend="5" fogstart="3" />
</visual>

<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100" />

<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127" />
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100" />
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane" />
<material name="geom" texture="texgeom" texuniform="true" />
</asset>

<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1" />
<geom conaffinity="1" condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" size="20 20 0.125" type="plane" />

</worldbody>

</mujoco>
Loading

0 comments on commit ae5c7f1

Please sign in to comment.