From 80aa8b6342d7e2418fe4c4e46e18bb5d660c5334 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?=
 <quentin.gallouedec@huggingface.co>
Date: Tue, 21 May 2024 09:30:00 +0000
Subject: [PATCH] clean reach and remove old namespace

---
 examples/gym_check_determinism.py       | 10 ++--
 gym_lowcostrobot/envs/reach_cube_env.py | 69 ++++++++++++++++++++++---
 tests/test_env.py                       | 12 ++---
 3 files changed, 72 insertions(+), 19 deletions(-)

diff --git a/examples/gym_check_determinism.py b/examples/gym_check_determinism.py
index 22e6fc4..5a53a41 100644
--- a/examples/gym_check_determinism.py
+++ b/examples/gym_check_determinism.py
@@ -2,11 +2,11 @@
 
 if __name__ == "__main__":
     for env_name in [
-        "lowcostrobot-LiftCube-v0",
-        "lowcostrobot-PickPlaceCube-v0",
-        "lowcostrobot-PushCube-v0",
-        "lowcostrobot-ReachCube-v0",
-        "lowcostrobot-Stack-v0",
+        "LiftCube-v0",
+        "PickPlaceCube-v0",
+        "PushCube-v0",
+        "ReachCube-v0",
+        "Stack-v0",
     ]:
         env = gym.make(env_name)
         observation1, info = env.reset(seed=123)
diff --git a/gym_lowcostrobot/envs/reach_cube_env.py b/gym_lowcostrobot/envs/reach_cube_env.py
index 724f5f0..dad196d 100644
--- a/gym_lowcostrobot/envs/reach_cube_env.py
+++ b/gym_lowcostrobot/envs/reach_cube_env.py
@@ -9,6 +9,57 @@
 
 
 class ReachCubeEnv(BaseRobotEnv):
+    """
+    ## Description
+
+    The robot has to reach a cube with its end-effector. The cube is placed at a random position within a predefined
+    range. The robot has to reach the cube with its end-effector. The episode is terminated when the end-effector is
+    within a threshold distance from the cube.
+
+    ## Action space
+
+    Two action modes are available: "joint" and "ee". In the "joint" mode, the action space is a 5-dimensional box
+    representing the target joint angles.
+
+    | Index | Action                      | Type (unit) | Min  | Max |
+    | ----- | --------------------------- | ----------- | ---- | --- |
+    | 0     | Joint 1 (base to shoulder)  | Float (rad) | -1.0 | 1.0 |
+    | 1     | Joint 2 (shoulder to elbow) | Float (rad) | -1.0 | 1.0 |
+    | 2     | Joint 3 (elbow to wrist)    | Float (rad) | -1.0 | 1.0 |
+    | 3     | Joint 4 (wrist to gripper)  | Float (rad) | -1.0 | 1.0 |
+    | 4     | Joint 5 (gripper)           | Float (rad) | -1.0 | 1.0 |
+
+    In the "ee" mode, the action space is a 3-dimensional box representing the target end-effector position.
+
+    | Index | Action | Type (unit) | Min  | Max |
+    | ----- | ------ | ----------- | ---- | --- |
+    | 0     | X      | Float (m)   | -1.0 | 1.0 |
+    | 1     | Y      | Float (m)   | -1.0 | 1.0 |
+    | 2     | Z      | Float (m)   | -1.0 | 1.0 |
+
+    ## Observation space
+
+    | Index | Observation                              | Type (unit) | Min   | Max  |
+    | ----- | ---------------------------------------- | ----------- | ----- | ---- |
+    | 0     | Angle of 1st joint 1 (base to shoulder)  | Float (rad) | -3.14 | 3.14 |
+    | 1     | Angle of 2nd joint 2 (shoulder to elbow) | Float (rad) | -3.14 | 3.14 |
+    | 2     | Angle of 3rd joint 3 (elbow to wrist)    | Float (rad) | -3.14 | 3.14 |
+    | 3     | Angle of 4th joint 4 (wrist to gripper)  | Float (rad) | -3.14 | 3.14 |
+    | 4     | Angle of 5th joint 5 (gripper)           | Float (rad) | -3.14 | 3.14 |
+    | 5     | X position of the cube                   | Float (m)   | -10.0 | 10.0 |
+    | 6     | Y position of the cube                   | Float (m)   | -10.0 | 10.0 |
+    | 7     | Z position of the cube                   | Float (m)   | -10.0 | 10.0 |
+    | 8     | Quaternion \( w \) of the cube           | Float       | -1.0  | 1.0  |
+    | 9     | Quaternion \( x \) of the cube           | Float       | -1.0  | 1.0  |
+    | 10    | Quaternion \( y \) of the cube           | Float       | -1.0  | 1.0  |
+    | 11    | Quaternion \( z \) of the cube           | Float       | -1.0  | 1.0  |
+
+    ## Reward
+
+    The reward is the negative distance between the end-effector and the cube. The episode is terminated when the
+    distance is less than a threshold.
+    """
+
     def __init__(self, image_state=None, action_mode="joint", render_mode=None, obj_xy_range=0.15):
         super().__init__(
             xml_path="assets/scene_one_cube.xml",
@@ -23,9 +74,9 @@ def __init__(self, image_state=None, action_mode="joint", render_mode=None, obj_
         else:
             self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(5,), dtype=np.float32)
         self.observation_space = spaces.Box(
-            low=-np.inf,
-            high=np.inf,
-            shape=(self.data.xpos.flatten().shape[0] + 3,),
+            low=np.array([-np.pi, -np.pi, -np.pi, -np.pi, -np.pi, -10.0, -10.0, -10.0, -1.0, -1.0, -1.0, -1.0]),
+            high=np.array([np.pi, np.pi, np.pi, np.pi, np.pi, 10.0, 10.0, 10.0, 1.0, 1.0, 1.0, 1.0]),
+            shape=(12,),
             dtype=np.float32,
         )
 
@@ -37,13 +88,16 @@ def reset(self, seed=None, options=None):
         # We need the following line to seed self.np_random
         super().reset(seed=seed, options=options)
 
+        # Reset the robot to the initial position
+        self.data.qpos[:5] = np.array([0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float32)
+
         # Sample and set the object position
-        self.data.joint("red_box_joint").qpos[:3] = self.np_random.uniform(self.object_low, self.object_high)
+        object_pos = self.np_random.uniform(self.object_low, self.object_high).astype(np.float32)
+        object_rot = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32)
+        self.data.qpos[5:12] = np.concatenate([object_pos, object_rot])
 
         # Step the simulation
-        # mujoco.mj_step(self.model, self.data)
         mujoco.mj_forward(self.model, self.data)
-        self.step_start = time.time()
 
         # Get the additional info
         info = self.get_info()
@@ -51,8 +105,7 @@ def reset(self, seed=None, options=None):
         return self.get_observation(), info
 
     def get_observation(self):
-        box_id = self.model.body("box").id
-        return np.concatenate([self.data.xpos.flatten(), self.data.xpos[box_id]], dtype=np.float32)
+        return self.data.qpos.astype(dtype=np.float32)
 
     def step(self, action):
         # Perform the action and step the simulation
diff --git a/tests/test_env.py b/tests/test_env.py
index 04710ae..f96e03c 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -8,14 +8,14 @@
 @pytest.mark.parametrize(
     "env_id",
     [
-        "lowcostrobot-LiftCube-v0",
-        "lowcostrobot-PickPlaceCube-v0",
-        "lowcostrobot-PushCube-v0",
-        "lowcostrobot-ReachCube-v0",
-        "lowcostrobot-Stack-v0",
+        "LiftCube-v0",
+        "PickPlaceCube-v0",
+        "PushCube-v0",
+        "ReachCube-v0",
+        "Stack-v0",
     ],
 )
 def test_env_check(env_id):
     env = gym.make(env_id)
-    check_env(env)
+    check_env(env, skip_render_check=True)
     env.close()