Merge pull request #33 from tartavull/env-reward-composit

Splitting Env, Rewards & Agents
tartavull · Nov 8, 2023 · ae5c7f1 · ae5c7f1
2 parents 63f4eaf + cbcdbc5
commit ae5c7f1
Show file tree

Hide file tree

Showing 14 changed files with 482 additions and 44 deletions.
diff --git a/alfredo/agents/A1/a1.xml b/alfredo/agents/A1/a1.xml
@@ -0,0 +1,102 @@
+<agent>
+
+    <body name="alfredo" pos="0 0 1.4">
+        <camera name="followme" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1" />
+
+        <joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free" />
+        <geom fromto="-.07 0 -.035 .07 0 -.035" name="torso1" size="0.13" type="box" />
+        <geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258" />
+
+        <body name="upper_arm_r" pos="0 -0.17 0.06">
+            <joint armature="0.0068" axis="1 0 0" name="shoulder_1_r" pos="0 0 0" range="-75 85" stiffness="1" type="hinge" />
+            <joint armature="0.0051" axis="0 0 1" name="shoulder_2_r" pos="0 0 0" range="-60 85" stiffness="1" type="hinge" />
+            <geom fromto="0 0 0 0 -.277 0" name="triceps_r" size="0.04 0.16" type="capsule" />
+
+            <body name="lower_arm_r" pos="0 -.29 0">
+                <joint armature="0.0028" axis="0 0 -1" name="elbow_r" pos="0 0 0" range="-90 10" stiffness="0" type="hinge" />
+                <geom fromto="0.01 0.01 0.01 0 -.295 0" name="forearm_r" size="0.031" type="capsule" />
+                <geom contype="1" name="gripper_r" pos="0.0 -.3 0.0" size="0.04" type="sphere" />
+                <camera pos="0 0 0" />
+            </body>
+        </body>
+
+        <body name="upper_arm_l" pos="0 0.17 0.06">
+            <joint armature="0.0068" axis="-1 0 0" name="shoulder_1_l" pos="0 0 0" range="-75 85" stiffness="1" type="hinge" />
+            <joint armature="0.0051" axis="0 0 -1" name="shoulder_2_l" pos="0 0 0" range="-60 85" stiffness="1" type="hinge" />
+            <geom fromto="0 0 0 0 .277 0" name="triceps_l" size="0.04 0.16" type="capsule" />
+
+            <body name="lower_arm_l" pos="0 .29 0">
+                <joint armature="0.0028" axis="0 0 1" name="elbow_l" pos="0 0 0" range="-90 10" stiffness="0" type="hinge" />
+                <geom fromto="0.01 -0.01 0.01 0 .295 0" name="forearm_l" size="0.031" type="capsule" />
+                <geom contype="1" name="gripper_l" pos="0.0 0.3 0.0" size="0.04" type="sphere" />
+            </body>
+        </body>
+
+        <body name="pelv" pos="0 0 -0.270" quat="1.000 0 -0.002 0">
+            <joint armature="0.02" axis="0 0 1" damping="5" name="ab_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge" />
+            <joint armature="0.02" axis="0 1 0" damping="5" name="ab_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge" />
+            <joint armature="0.02" axis="1 0 0" damping="5" name="ab_x" pos="0 0 0.065" range="-35 35" stiffness="10" type="hinge" />
+            <geom fromto="-.02 -.07 0 -.02 .07 0" name="booty" size="0.09" type="capsule" />
+
+            <body name="thigh_r" pos="0 -0.1 -0.04">
+                <joint armature="0.01" axis="1 0 0" damping="5" name="hip_r_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge" />
+                <joint armature="0.01" axis="0 0 1" damping="5" name="hip_r_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge" />
+                <joint armature="0.0080" axis="0 1 0" damping="5" name="hip_r_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge" />
+                <geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule" />
+
+                <body name="shin_r" pos="0 0.01 -0.403">
+                    <joint armature="0.0060" axis="0 -1 0" name="knee_r" pos="0 0 .02" range="-160 -2" type="hinge" />
+                    <geom fromto="0 0 0 0 0 -.3" name="calves_r" size="0.049" type="capsule" />
+
+                    <body name="foot_r" pos="0 0 -0.37">
+                        <joint armature="0.006" axis="0 -1 0" name="ankle_r" pos="0 0 0" range="-45 20" type="hinge" />
+                        <geom contype="1" fromto="-0.07 0 0 0.150 0 0" name="footm_r" size="0.042983" type="box" user="0" density="200" />
+                    </body>
+                </body>
+            </body>
+
+            <body name="thigh_l" pos="0 0.1 -0.04">
+                <joint armature="0.01" axis="-1 0 0" damping="5" name="hip_l_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge" />
+                <joint armature="0.01" axis="0 0 -1" damping="5" name="hip_l_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge" />
+                <joint armature="0.01" axis="0 1 0" damping="5" name="hip_l_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge" />
+                <geom fromto="0 0 0 0 -0.01 -.34" name="quad_l" size="0.06" type="capsule" />
+
+                <body name="shin_l" pos="0 -0.01 -0.403">
+                    <joint armature="0.0060" axis="0 -1 0" name="knee_l" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge" />
+                    <geom fromto="0 0 0 0 0 -.3" name="calves_l" size="0.049" type="capsule" />
+
+                    <body name="foot_l" pos="0 0 -0.37">
+                        <joint armature="0.006" axis="0 -1 0" name="ankle_l" pos="0 0 0" range="-45 20" type="hinge" />
+                        <geom contype="1" fromto="-0.07 0 0 0.150 0 0" name="footm_l" size="0.042983" type="box" user="0" density="200"/>
+                    </body>
+                </body>
+            </body>
+        </body>
+        <!--/body-->
+    </body>
+
+    <actuator>
+
+        <motor gear="100" joint="ab_y" name="ab_y" />
+        <motor gear="100" joint="ab_z" name="ab_z" />
+        <motor gear="100" joint="ab_x" name="ab_x" />
+        <motor gear="100" joint="hip_r_x" name="hip_r_x" />
+        <motor gear="100" joint="hip_r_z" name="hip_r_z" />
+        <motor gear="300" joint="hip_r_y" name="hip_r_y" />
+        <motor gear="200" joint="knee_r" name="knee_r" />
+        <motor gear="200" joint="ankle_r" name="ankle_r" />
+        <motor gear="100" joint="hip_l_x" name="hip_l_x" />
+        <motor gear="100" joint="hip_l_z" name="hip_l_z" />
+        <motor gear="300" joint="hip_l_y" name="hip_l_y" />
+        <motor gear="200" joint="knee_l" name="knee_l" />
+        <motor gear="200" joint="ankle_l" name="ankle_l" />
+        <motor gear="25" joint="shoulder_1_r" name="shoulder_1_r" />
+        <motor gear="25" joint="shoulder_2_r" name="shoulder_2_r" />
+        <motor gear="25" joint="elbow_r" name="elbow_r" />
+        <motor gear="25" joint="shoulder_1_l" name="shoulder_1_l" />
+        <motor gear="25" joint="shoulder_2_l" name="shoulder_2_l" />
+        <motor gear="25" joint="elbow_l" name="elbow_l" />
+
+    </actuator>
+
+</agent>
diff --git a/alfredo/agents/A1/alfredo_1.py b/alfredo/agents/A1/alfredo_1.py
@@ -9,6 +9,11 @@
 from etils import epath
 from jax import numpy as jp
 
+from alfredo.tools import compose_scene
+from alfredo.rewards import rConstant
+from alfredo.rewards import rHealthy_simple_z
+from alfredo.rewards import rSpeed_X
+from alfredo.rewards import rControl_act_ss
 
 class Alfredo(PipelineEnv):
     # pyformat: disable
@@ -28,15 +33,26 @@ def __init__(
         **kwargs,
     ):
 
-        # forcing this model to need an input paramFile_path
-        # will throw error if this is not included in kwargs
+        # forcing this model to need an input scene_xml_path or 
+        # the combination of env_xml_path and agent_xml_path
+        # if none of these options are present, an error will be thrown
         path=""
 
-        if "paramFile_path" in kwargs:
-            path = kwargs["paramFile_path"]
-            del kwargs["paramFile_path"]
-
-        sys = mjcf.load(path)
+        if "env_xml_path" and "agent_xml_path" in kwargs: 
+            env_xp = kwargs["env_xml_path"]
+            agent_xp = kwargs["agent_xml_path"]
+            xml_scene = compose_scene(env_xp, agent_xp)
+            del kwargs["env_xml_path"]
+            del kwargs["agent_xml_path"]
+
+            sys = mjcf.loads(xml_scene)
+
+        # this is vestigial - get rid of this someday soon
+        if "scene_xml_path" in kwargs:
+            path = kwargs["scene_xml_path"]
+            del kwargs["scene_xml_path"]
+
+            sys = mjcf.load(path)
 
         n_frames = 5
 
@@ -120,44 +136,51 @@ def step(self, state: State, action: jp.ndarray) -> State:
 
         com_before, *_ = self._com(prev_pipeline_state)
         com_after, *_ = self._com(pipeline_state)
-        a_velocity = (com_after - com_before) / self.dt
-
-        reward_vel = math.safe_norm(a_velocity)
-        forward_reward = self._forward_reward_weight * a_velocity[0]  # * reward_vel
-        ctrl_cost = self._ctrl_cost_weight * jp.sum(jp.square(action))
-
-        min_z, max_z = self._healthy_z_range
-        is_healthy = jp.where(pipeline_state.x.pos[0, 2] < min_z, x=0.0, y=1.0)
-        is_healthy = jp.where(pipeline_state.x.pos[0, 2] > max_z, x=0.0, y=is_healthy)
-
-        if self._terminate_when_unhealthy:
-            healthy_reward = self._healthy_reward
-        else:
-            healthy_reward = self._healthy_reward * is_healthy
-
-        reward = healthy_reward - ctrl_cost + forward_reward
 
-        done = 1.0 - is_healthy if self._terminate_when_unhealthy else 0.0
+        x_speed_reward = rSpeed_X(self.sys,
+                                  state.pipeline_state,
+                                  CoM_prev=com_before,
+                                  CoM_now=com_after,
+                                  dt=self.dt,
+                                  weight=self._forward_reward_weight)
+
+        ctrl_cost = rControl_act_ss(self.sys,
+                                    state.pipeline_state,
+                                    action,
+                                    weight=-self._ctrl_cost_weight)
+
+        healthy_reward = rHealthy_simple_z(self.sys,
+                                           state.pipeline_state,
+                                           self._healthy_z_range,
+                                           early_terminate=self._terminate_when_unhealthy,
+                                           weight=self._healthy_reward,
+                                           focus_idx_range=(0, 2))
+
+        reward = healthy_reward[0] + ctrl_cost + x_speed_reward[0]
+
+        done = 1.0 - healthy_reward[1] if self._terminate_when_unhealthy else 0.0
 
         state.metrics.update(
-            reward_ctrl=-ctrl_cost,
-            reward_alive=healthy_reward,
-            reward_velocity=forward_reward,
+            reward_ctrl=ctrl_cost,
+            reward_alive=healthy_reward[0],
+            reward_velocity=x_speed_reward[0],
             agent_x_position=com_after[0],
             agent_y_position=com_after[1],
-            agent_x_velocity=a_velocity[0],
-            agent_y_velocity=a_velocity[1],
+            agent_x_velocity=x_speed_reward[1],
+            agent_y_velocity=x_speed_reward[2],
         )
 
         return state.replace(
             pipeline_state=pipeline_state, obs=obs, reward=reward, done=done
         )
 
     def _get_obs(self, pipeline_state: base.State, action: jp.ndarray) -> jp.ndarray:
-        """Observes humanoid body position, velocities, and angles."""
+        """Observes Alfredo's body position, velocities, and angles."""
 
         a_positions = pipeline_state.q
         a_velocities = pipeline_state.qd
+        #print(f"a_positions = {a_positions}")
+        #print(f"a_velocities = {a_velocities}")
 
         if self._exclude_current_positions_from_observation:
             a_positions = a_positions[2:]
@@ -194,7 +217,7 @@ def _get_obs(self, pipeline_state: base.State, action: jp.ndarray) -> jp.ndarray
         )
 
     def _com(self, pipeline_state: base.State) -> jp.ndarray:
-        """Computes Center of Mass of the Humanoid"""
+        """Computes Center of Mass of Alfredo"""
 
         inertia = self.sys.link.inertia
 

diff --git a/alfredo/rewards/__init__.py b/alfredo/rewards/__init__.py
@@ -0,0 +1,4 @@
+from .rConstant import *
+from .rSpeed import *
+from .rHealthy import *
+from .rControl import *
diff --git a/alfredo/rewards/rConstant.py b/alfredo/rewards/rConstant.py
@@ -0,0 +1,15 @@
+from typing import Tuple
+
+import jax
+from brax import actuator, base, math
+from brax.envs import PipelineEnv, State
+from brax.io import mjcf
+from etils import epath
+from jax import numpy as jp
+
+def rConstant(sys: base.System, 
+              pipeline_state: base.State, 
+              weight=1.0,
+              focus_idx_range=(1, -1)) -> jp.ndarray:
+
+    return jp.array([weight])
diff --git a/alfredo/rewards/rControl.py b/alfredo/rewards/rControl.py
@@ -0,0 +1,18 @@
+from typing import Tuple
+
+import jax
+from brax import actuator, base, math
+from brax.envs import PipelineEnv, State
+from brax.io import mjcf
+from etils import epath
+from jax import numpy as jp
+
+def rControl_act_ss(sys: base.System, 
+                    pipeline_state: base.State,
+                    action: jp.ndarray,
+                    weight=1.0,
+                    focus_idx_range=(1, -1)) -> jp.ndarray: 
+
+    ctrl_cost = weight * jp.sum(jp.square(action))
+
+    return ctrl_cost 
diff --git a/alfredo/rewards/rHealthy.py b/alfredo/rewards/rHealthy.py
@@ -0,0 +1,31 @@
+from typing import Tuple
+
+import jax
+from brax import actuator, base, math
+from brax.envs import PipelineEnv, State
+from brax.io import mjcf
+from etils import epath
+from jax import numpy as jp
+
+def rHealthy_simple_z(sys: base.System, 
+                      pipeline_state: base.State, 
+                      z_range: Tuple,
+                      early_terminate: True,
+                      weight=1.0,
+                      focus_idx_range=(1, -1)) -> jp.ndarray:
+
+    min_z, max_z = z_range
+    focus_s = focus_idx_range[0]
+    focus_e = focus_idx_range[-1]
+
+    focus_x_pos = pipeline_state.x.pos[focus_s, focus_e]
+
+    is_healthy = jp.where(focus_x_pos < min_z, x=0.0, y=1.0)
+    is_healthy = jp.where(focus_x_pos > max_z, x=0.0, y=is_healthy)
+
+    if early_terminate:
+        hr = weight
+    else:
+        hr = weight * is_healthy
+
+    return jp.array([hr, is_healthy])
diff --git a/alfredo/rewards/rSpeed.py b/alfredo/rewards/rSpeed.py
@@ -0,0 +1,44 @@
+from typing import Tuple
+
+import jax
+from brax import actuator, base, math
+from brax.envs import PipelineEnv, State
+from brax.io import mjcf
+from etils import epath
+from jax import numpy as jp
+
+def rSpeed_X(sys: base.System,
+             pipeline_state: base.State,
+             CoM_prev: jp.ndarray,
+             CoM_now: jp.ndarray,
+             dt,
+             weight=1.0,
+             focus_idx_range=(1, -1)) -> jp.ndarray:
+
+
+    velocity = (CoM_now - CoM_prev) / dt
+
+    focus_s = focus_idx_range[0]
+    focus_e = focus_idx_range[-1]
+
+    sxr = weight * velocity[0]
+
+    return jp.array([sxr, velocity[0], velocity[1]])
+
+def rSpeed_Y(sys: base.System,
+             pipeline_state: base.State,
+             CoM_prev: jp.ndarray,
+             CoM_now: jp.ndarray,
+             dt,
+             weight=1.0,
+             focus_idx_range=(1, -1)) -> jp.ndarray:
+
+
+    velocity = (CoM_now - CoM_prev) / dt
+
+    focus_s = focus_idx_range[0]
+    focus_e = focus_idx_range[-1]
+
+    syr = weight * velocity[1]
+
+    return jp.array([syr, velocity[0], velocity[1]])
diff --git a/alfredo/scenes/flatworld/flatworld_A1_env.xml b/alfredo/scenes/flatworld/flatworld_A1_env.xml
@@ -0,0 +1,48 @@
+<mujoco model="alfredo">
+
+    <compiler angle="degree" inertiafromgeom="true" />
+
+    <default>
+        <joint armature="1" damping="1" limited="true" />
+        <geom conaffinity="0" condim="1" contype="0" material="geom" />
+        <motor ctrllimited="true" ctrlrange="-.4 .4" />
+    </default>
+
+    <option iterations="8" timestep="0.003" />
+
+    <custom>
+        <numeric data="2500" name="constraint_limit_stiffness" />
+        <numeric data="27000" name="constraint_stiffness" />
+        <numeric data="30" name="constraint_ang_damping" />
+        <numeric data="80" name="constraint_vel_damping" />
+        <numeric data="-0.05" name="ang_damping" />
+        <numeric data="0.5" name="joint_scale_pos" />
+        <numeric data="0.1" name="joint_scale_ang" />
+        <numeric data="0" name="spring_mass_scale" />
+        <numeric data="1" name="spring_inertia_scale" />
+        <numeric data="20" name="matrix_inv_iterations" />
+        <numeric data="15" name="solver_maxls" />
+    </custom>
+
+    <size nkey="5" nuser_geom="1" />
+
+    <visual>
+        <map fogend="5" fogstart="3" />
+    </visual>
+
+    <asset>
+        <texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100" />
+
+        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127" />
+        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100" />
+        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane" />
+        <material name="geom" texture="texgeom" texuniform="true" />
+    </asset>
+
+    <worldbody>
+        <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1" />
+        <geom conaffinity="1" condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" size="20 20 0.125" type="plane" />
+
+    </worldbody>
+
+</mujoco>