Merge pull request #8 from perezjln/move-assets-and-cie

Move assets and Cie
perezjln · May 21, 2024 · 5598155 · 5598155
2 parents c62654a + a67737a
commit 5598155
Show file tree

Hide file tree

Showing 23 changed files with 301 additions and 92 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,5 +3,6 @@ repos:
     rev: v0.4.3
     hooks:
       - id: ruff
-        args: [--fix]
+        args: [--fix, --line-length, '127']
       - id: ruff-format
+        args: [--line-length, '127']
diff --git a/examples/mujoco_ctrl_range.py b/examples/mujoco_ctrl_range.py
@@ -2,7 +2,7 @@
 
 if __name__ == "__main__":
     # Load the model from the XML file
-    model = mujoco.MjModel.from_xml_path("assets/scene_one_cube.xml")
+    model = mujoco.MjModel.from_xml_path("gym_lowcostrobot/assets/scene_one_cube.xml")
 
     # Extract control range for each actuator
     control_ranges = model.actuator_ctrlrange

diff --git a/examples/mujoco_interactive_simulation.py.py b/examples/mujoco_interactive_simulation.py.py
@@ -5,7 +5,7 @@
 
 
 def do_interactive_sim():
-    m = mujoco.MjModel.from_xml_path("assets/scene_one_cube.xml")
+    m = mujoco.MjModel.from_xml_path("gym_lowcostrobot/assets/scene_one_cube.xml")
     data = mujoco.MjData(m)
 
     with mujoco.viewer.launch_passive(m, data) as viewer:

diff --git a/examples/mujoco_simple_invk.py b/examples/mujoco_simple_invk.py
@@ -8,7 +8,7 @@
 
 
 def do_simple_invk():
-    m = mujoco.MjModel.from_xml_path("assets/scene_one_cube.xml")
+    m = mujoco.MjModel.from_xml_path("gym_lowcostrobot/assets/scene_one_cube.xml")
     data = mujoco.MjData(m)
     robot = SimulatedRobot(m, data)
 

diff --git a/examples/trace_hdf5_mujoco.py b/examples/trace_hdf5_mujoco.py
@@ -11,7 +11,7 @@
 def do_replay_hdf5(args):
     # Specify the path to your HDF5 file
     with h5py.File(args.file_path, "r") as file:
-        m = mujoco.MjModel.from_xml_path("assets/scene_one_cube.xml")
+        m = mujoco.MjModel.from_xml_path("gym_lowcostrobot/assets/scene_one_cube.xml")
         data = mujoco.MjData(m)
         robot = SimulatedRobot(m, data)
 

diff --git a/gym_lowcostrobot/__init__.py b/gym_lowcostrobot/__init__.py
@@ -1,7 +1,11 @@
+import os
+
 from gymnasium.envs.registration import register
 
 __version__ = "0.0.1"
 
+ASSETS_PATH = os.path.join(os.path.dirname(__file__), "assets")
+
 register(
     id="LiftCube-v0",
     entry_point="gym_lowcostrobot.envs:LiftCubeEnv",

diff --git a/assets/low-cost-arm.urdf → gym_lowcostrobot/assets/low-cost-arm.urdf b/assets/low-cost-arm.urdf → gym_lowcostrobot/assets/low-cost-arm.urdf
diff --git a/assets/low_cost_robot.xml → gym_lowcostrobot/assets/low_cost_robot.xml b/assets/low_cost_robot.xml → gym_lowcostrobot/assets/low_cost_robot.xml
@@ -1,5 +1,5 @@
 <mujoco model="low_cost_robot">
-    <compiler angle="radian" meshdir="assets" texturedir="assets" autolimits="true"/>
+    <compiler angle="radian" meshdir="meshes" texturedir="meshes" autolimits="true"/>
     <!-- The timestep has a big influence on the contacts stability -->
     <option cone="elliptic" impratio="10" timestep="0.001"/>
 

diff --git a/assets/assets/base_link.stl → gym_lowcostrobot/assets/meshes/base_link.stl b/assets/assets/base_link.stl → gym_lowcostrobot/assets/meshes/base_link.stl
diff --git a/...s/elbow-to-wrist-motor-reference_v1_1.stl → ...s/elbow-to-wrist-motor-reference_v1_1.stl b/...s/elbow-to-wrist-motor-reference_v1_1.stl → ...s/elbow-to-wrist-motor-reference_v1_1.stl
diff --git a/assets/assets/first-bracket-motor_1.stl → ...t/assets/meshes/first-bracket-motor_1.stl b/assets/assets/first-bracket-motor_1.stl → ...t/assets/meshes/first-bracket-motor_1.stl
diff --git a/.../assets/gripper-moving-part-dumb_v2_1.stl → .../meshes/gripper-moving-part-dumb_v2_1.stl b/.../assets/gripper-moving-part-dumb_v2_1.stl → .../meshes/gripper-moving-part-dumb_v2_1.stl
diff --git a/assets/assets/gripper-static-motor-pt1.stl → ...ssets/meshes/gripper-static-motor-pt1.stl b/assets/assets/gripper-static-motor-pt1.stl → ...ssets/meshes/gripper-static-motor-pt1.stl
diff --git a/assets/assets/gripper-static-motor-pt2.stl → ...ssets/meshes/gripper-static-motor-pt2.stl b/assets/assets/gripper-static-motor-pt2.stl → ...ssets/meshes/gripper-static-motor-pt2.stl
diff --git a/assets/assets/gripper-static-motor_v2_1.stl → ...sets/meshes/gripper-static-motor_v2_1.stl b/assets/assets/gripper-static-motor_v2_1.stl → ...sets/meshes/gripper-static-motor_v2_1.stl
diff --git a/...s/assets/shoulder-to-elbow-motor_v1_1.stl → ...s/meshes/shoulder-to-elbow-motor_v1_1.stl b/...s/assets/shoulder-to-elbow-motor_v1_1.stl → ...s/meshes/shoulder-to-elbow-motor_v1_1.stl
diff --git a/assets/scene_one_cube.xml → gym_lowcostrobot/assets/scene_one_cube.xml b/assets/scene_one_cube.xml → gym_lowcostrobot/assets/scene_one_cube.xml
diff --git a/assets/scene_two_cubes.xml → gym_lowcostrobot/assets/scene_two_cubes.xml b/assets/scene_two_cubes.xml → gym_lowcostrobot/assets/scene_two_cubes.xml
diff --git a/gym_lowcostrobot/envs/lift_cube_env.py b/gym_lowcostrobot/envs/lift_cube_env.py
@@ -1,56 +1,113 @@
-import time
+import os
 
 import mujoco
 import mujoco.viewer
 import numpy as np
 from gymnasium import spaces
 
+from gym_lowcostrobot import ASSETS_PATH
 from gym_lowcostrobot.envs.base_env import BaseRobotEnv
 
 
 class LiftCubeEnv(BaseRobotEnv):
+    """
+    ## Description
+
+    The robot has to lift a cube with its end-effector. The episode is terminated when the cube is lifted above a
+    threshold distance.
+
+    ## Action space
+
+    Two action modes are available: "joint" and "ee". In the "joint" mode, the action space is a 5-dimensional box
+    representing the target joint angles.
+
+    | Index | Action                      | Type (unit) | Min  | Max |
+    | ----- | --------------------------- | ----------- | ---- | --- |
+    | 0     | Joint 1 (base to shoulder)  | Float (rad) | -1.0 | 1.0 |
+    | 1     | Joint 2 (shoulder to elbow) | Float (rad) | -1.0 | 1.0 |
+    | 2     | Joint 3 (elbow to wrist)    | Float (rad) | -1.0 | 1.0 |
+    | 3     | Joint 4 (wrist to gripper)  | Float (rad) | -1.0 | 1.0 |
+    | 4     | Joint 5 (gripper)           | Float (rad) | -1.0 | 1.0 |
+
+    In the "ee" mode, the action space is a 3-dimensional box representing the target end-effector position.
+
+    | Index | Action  | Type (unit) | Min  | Max |
+    | ----- | ------- | ----------- | ---- | --- |
+    | 0     | X       | Float (m)   | -1.0 | 1.0 |
+    | 1     | Y       | Float (m)   | -1.0 | 1.0 |
+    | 2     | Z       | Float (m)   | -1.0 | 1.0 |
+    | 3     | Gripper | Float (m)   | -1.0 | 1.0 |
+
+    ## Observation space
+
+    | Index | Observation                              | Type (unit) | Min   | Max  |
+    | ----- | ---------------------------------------- | ----------- | ----- | ---- |
+    | 0     | Angle of 1st joint 1 (base to shoulder)  | Float (rad) | -3.14 | 3.14 |
+    | 1     | Angle of 2nd joint 2 (shoulder to elbow) | Float (rad) | -3.14 | 3.14 |
+    | 2     | Angle of 3rd joint 3 (elbow to wrist)    | Float (rad) | -3.14 | 3.14 |
+    | 3     | Angle of 4th joint 4 (wrist to gripper)  | Float (rad) | -3.14 | 3.14 |
+    | 4     | Angle of 5th joint 5 (gripper)           | Float (rad) | -3.14 | 3.14 |
+    | 5     | X position of the cube                   | Float (m)   | -10.0 | 10.0 |
+    | 6     | Y position of the cube                   | Float (m)   | -10.0 | 10.0 |
+    | 7     | Z position of the cube                   | Float (m)   | -10.0 | 10.0 |
+    | 8     | Quaternion \( w \) of the cube           | Float       | -1.0  | 1.0  |
+    | 9     | Quaternion \( x \) of the cube           | Float       | -1.0  | 1.0  |
+    | 10    | Quaternion \( y \) of the cube           | Float       | -1.0  | 1.0  |
+    | 11    | Quaternion \( z \) of the cube           | Float       | -1.0  | 1.0  |
+
+    ## Reward
+
+    The reward is the z position of the cube. The episode is terminated when the cube is lifted above a threshold
+    distance.
+
+    ## Reward
+
+    The reward is the z position of the cube. The episode is terminated when the cube is lifted above a threshold
+    distance.
+    """
+
     def __init__(self, image_state=None, action_mode="joint", render_mode=None, obj_xy_range=0.15):
         super().__init__(
-            xml_path="assets/scene_one_cube.xml",
+            xml_path=os.path.join(ASSETS_PATH, "scene_one_cube.xml"),
             image_state=image_state,
             action_mode=action_mode,
             render_mode=render_mode,
         )
 
         # Define the action space and observation space
         if self.action_mode == "ee":
-            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(3 + 1,), dtype=np.float32)
+            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float32)
         else:
             self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(5,), dtype=np.float32)
-        self.observation_space = spaces.Box(
-            low=-np.inf,
-            high=np.inf,
-            shape=(self.data.xpos.flatten().shape[0],),
-            dtype=np.float32,
-        )
+        low = [-np.pi, -np.pi, -np.pi, -np.pi, -np.pi, -10.0, -10.0, -10.0, -1.0, -1.0, -1.0, -1.0]
+        high = [np.pi, np.pi, np.pi, np.pi, np.pi, 10.0, 10.0, 10.0, 1.0, 1.0, 1.0, 1.0]
+        self.observation_space = spaces.Box(low=np.array(low), high=np.array(high), dtype=np.float32)
 
-        self.threshold_distance = 0.5
+        self.threshold_height = 0.5
         self.set_object_range(obj_xy_range)
 
     def reset(self, seed=None, options=None):
         # We need the following line to seed self.np_random
         super().reset(seed=seed, options=options)
 
+        # Reset the robot to the initial position
+        self.data.qpos[:5] = np.array([0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float32)
+
         # Sample and set the object position
-        self.data.joint("red_box_joint").qpos[:3] = self.np_random.uniform(self.object_low, self.object_high)
+        object_pos = self.np_random.uniform(self.object_low, self.object_high).astype(np.float32)
+        object_rot = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32)
+        self.data.qpos[5:12] = np.concatenate([object_pos, object_rot])
 
         # Step the simulation
-        # mujoco.mj_step(self.model, self.data)
         mujoco.mj_forward(self.model, self.data)
-        self.step_start = time.time()
 
         # Get the additional info
         info = self.get_info()
 
         return self.get_observation(), info
 
     def get_observation(self):
-        return np.concatenate([self.data.xpos.flatten()], dtype=np.float32)
+        return self.data.qpos.astype(dtype=np.float32)
 
     def step(self, action):
         # Perform the action and step the simulation
@@ -59,16 +116,14 @@ def step(self, action):
         # Get the new observation
         observation = self.get_observation()
 
-        # Compute the distance
-        object_id = self.model.body("box").id
-        object_pos = self.data.geom_xpos[object_id]
-        object_z = object_pos[-1]
+        # Get the height of the object
+        object_z = self.data.qpos[7]
 
         # Compute the reward
         reward = object_z
 
         # Check if the target position is reached
-        terminated = object_z > self.threshold_distance
+        terminated = object_z > self.threshold_height
 
         # Get the additional info
         info = self.get_info()

diff --git a/gym_lowcostrobot/envs/pick_place_cube_env.py b/gym_lowcostrobot/envs/pick_place_cube_env.py
@@ -1,33 +1,85 @@
-import time
+import os
 
 import mujoco
 import mujoco.viewer
 import numpy as np
 from gymnasium import spaces
 
+from gym_lowcostrobot import ASSETS_PATH
 from gym_lowcostrobot.envs.base_env import BaseRobotEnv
 
 
 class PickPlaceCubeEnv(BaseRobotEnv):
+    """
+    ## Description
+
+    The robot has to pick and place a cube. The episode is terminated when the cube is placed within a threshold
+    distance.
+
+    ## Action space
+
+    Two action modes are available: "joint" and "ee". In the "joint" mode, the action space is a 5-dimensional box
+    representing the target joint angles.
+
+    | Index | Action                      | Type (unit) | Min  | Max |
+    | ----- | --------------------------- | ----------- | ---- | --- |
+    | 0     | Joint 1 (base to shoulder)  | Float (rad) | -1.0 | 1.0 |
+    | 1     | Joint 2 (shoulder to elbow) | Float (rad) | -1.0 | 1.0 |
+    | 2     | Joint 3 (elbow to wrist)    | Float (rad) | -1.0 | 1.0 |
+    | 3     | Joint 4 (wrist to gripper)  | Float (rad) | -1.0 | 1.0 |
+    | 4     | Joint 5 (gripper)           | Float (rad) | -1.0 | 1.0 |
+
+    In the "ee" mode, the action space is a 3-dimensional box representing the target end-effector position.
+
+    | Index | Action  | Type (unit) | Min  | Max |
+    | ----- | ------- | ----------- | ---- | --- |
+    | 0     | X       | Float (m)   | -1.0 | 1.0 |
+    | 1     | Y       | Float (m)   | -1.0 | 1.0 |
+    | 2     | Z       | Float (m)   | -1.0 | 1.0 |
+    | 3     | Gripper | Float (m)   | -1.0 | 1.0 |
+
+    ## Observation space
+
+    | Index | Observation                              | Type (unit) | Min   | Max  |
+    | ----- | ---------------------------------------- | ----------- | ----- | ---- |
+    | 0     | Angle of 1st joint 1 (base to shoulder)  | Float (rad) | -3.14 | 3.14 |
+    | 1     | Angle of 2nd joint 2 (shoulder to elbow) | Float (rad) | -3.14 | 3.14 |
+    | 2     | Angle of 3rd joint 3 (elbow to wrist)    | Float (rad) | -3.14 | 3.14 |
+    | 3     | Angle of 4th joint 4 (wrist to gripper)  | Float (rad) | -3.14 | 3.14 |
+    | 4     | Angle of 5th joint 5 (gripper)           | Float (rad) | -3.14 | 3.14 |
+    | 5     | X position of the cube                   | Float (m)   | -10.0 | 10.0 |
+    | 6     | Y position of the cube                   | Float (m)   | -10.0 | 10.0 |
+    | 7     | Z position of the cube                   | Float (m)   | -10.0 | 10.0 |
+    | 8     | Quaternion \( w \) of the cube           | Float       | -1.0  | 1.0  |
+    | 9     | Quaternion \( x \) of the cube           | Float       | -1.0  | 1.0  |
+    | 10    | Quaternion \( y \) of the cube           | Float       | -1.0  | 1.0  |
+    | 11    | Quaternion \( z \) of the cube           | Float       | -1.0  | 1.0  |
+    | 12    | X position of the target                 | Float (m)   | -10.0 | 10.0 |
+    | 13    | Y position of the target                 | Float (m)   | -10.0 | 10.0 |
+    | 14    | Z position of the target                 | Float (m)   | -10.0 | 10.0 |
+
+    ## Reward
+
+    The reward is the negative distance between the cube and the target position. The episode is terminated when the
+    distance is less than a threshold.
+    """
+
     def __init__(self, image_state=None, action_mode="joint", render_mode=None, target_xy_range=0.2, obj_xy_range=0.2):
         super().__init__(
-            xml_path="assets/scene_one_cube.xml",
+            xml_path=os.path.join(ASSETS_PATH, "scene_one_cube.xml"),
             image_state=image_state,
             action_mode=action_mode,
             render_mode=render_mode,
         )
 
         # Define the action space and observation space
         if self.action_mode == "ee":
-            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(3 + 1,), dtype=np.float32)
+            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float32)
         else:
             self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(5,), dtype=np.float32)
-        self.observation_space = spaces.Box(
-            low=-np.inf,
-            high=np.inf,
-            shape=(self.data.xpos.flatten().shape[0] + 3,),
-            dtype=np.float32,
-        )
+        low = [-np.pi, -np.pi, -np.pi, -np.pi, -np.pi, -10.0, -10.0, -10.0, -1.0, -1.0, -1.0, -1.0, -10.0, -10.0, -10.0]
+        high = [np.pi, np.pi, np.pi, np.pi, np.pi, 10.0, 10.0, 10.0, 1.0, 1.0, 1.0, 1.0, 10.0, 10.0, 10.0]
+        self.observation_space = spaces.Box(low=np.array(low), high=np.array(high), dtype=np.float32)
 
         self.threshold_distance = 0.26
         self.set_object_range(obj_xy_range)
@@ -37,24 +89,27 @@ def reset(self, seed=None, options=None):
         # We need the following line to seed self.np_random
         super().reset(seed=seed, options=options)
 
-        # Sample the target position
-        self.target_pos = self.np_random.uniform(self.target_low, self.target_high)
+        # Reset the robot to the initial position
+        self.data.qpos[:5] = np.array([0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float32)
 
         # Sample and set the object position
-        self.data.joint("red_box_joint").qpos[:3] = self.np_random.uniform(self.object_low, self.object_high)
+        object_pos = self.np_random.uniform(self.object_low, self.object_high).astype(np.float32)
+        object_rot = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32)
+        self.data.qpos[5:12] = np.concatenate([object_pos, object_rot])
+
+        # Sample the target position
+        self.target_pos = self.np_random.uniform(self.target_low, self.target_high)
 
         # Step the simulation
-        # mujoco.mj_step(self.model, self.data)
         mujoco.mj_forward(self.model, self.data)
-        self.step_start = time.time()
 
         # Get the additional info
         info = self.get_info()
 
         return self.get_observation(), info
 
     def get_observation(self):
-        return np.concatenate([self.data.xpos.flatten(), self.target_pos], dtype=np.float32)
+        return np.concatenate([self.data.qpos, self.target_pos], dtype=np.float32)
 
     def step(self, action):
         # Perform the action and step the simulation
@@ -63,7 +118,7 @@ def step(self, action):
         # Get the new observation
         observation = self.get_observation()
 
-        # Compute the distance
+        # Compute the distance between the cube and the target position
         cube_id = self.model.body("box").id
         cube_pos = self.data.geom_xpos[cube_id]
         distance = np.linalg.norm(cube_pos - self.target_pos)