From 80aa8b6342d7e2418fe4c4e46e18bb5d660c5334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 21 May 2024 09:30:00 +0000 Subject: [PATCH] clean reach and remove old namespace --- examples/gym_check_determinism.py | 10 ++-- gym_lowcostrobot/envs/reach_cube_env.py | 69 ++++++++++++++++++++++--- tests/test_env.py | 12 ++--- 3 files changed, 72 insertions(+), 19 deletions(-) diff --git a/examples/gym_check_determinism.py b/examples/gym_check_determinism.py index 22e6fc4..5a53a41 100644 --- a/examples/gym_check_determinism.py +++ b/examples/gym_check_determinism.py @@ -2,11 +2,11 @@ if __name__ == "__main__": for env_name in [ - "lowcostrobot-LiftCube-v0", - "lowcostrobot-PickPlaceCube-v0", - "lowcostrobot-PushCube-v0", - "lowcostrobot-ReachCube-v0", - "lowcostrobot-Stack-v0", + "LiftCube-v0", + "PickPlaceCube-v0", + "PushCube-v0", + "ReachCube-v0", + "Stack-v0", ]: env = gym.make(env_name) observation1, info = env.reset(seed=123) diff --git a/gym_lowcostrobot/envs/reach_cube_env.py b/gym_lowcostrobot/envs/reach_cube_env.py index 724f5f0..dad196d 100644 --- a/gym_lowcostrobot/envs/reach_cube_env.py +++ b/gym_lowcostrobot/envs/reach_cube_env.py @@ -9,6 +9,57 @@ class ReachCubeEnv(BaseRobotEnv): + """ + ## Description + + The robot has to reach a cube with its end-effector. The cube is placed at a random position within a predefined + range. The robot has to reach the cube with its end-effector. The episode is terminated when the end-effector is + within a threshold distance from the cube. + + ## Action space + + Two action modes are available: "joint" and "ee". In the "joint" mode, the action space is a 5-dimensional box + representing the target joint angles. + + | Index | Action | Type (unit) | Min | Max | + | ----- | --------------------------- | ----------- | ---- | --- | + | 0 | Joint 1 (base to shoulder) | Float (rad) | -1.0 | 1.0 | + | 1 | Joint 2 (shoulder to elbow) | Float (rad) | -1.0 | 1.0 | + | 2 | Joint 3 (elbow to wrist) | Float (rad) | -1.0 | 1.0 | + | 3 | Joint 4 (wrist to gripper) | Float (rad) | -1.0 | 1.0 | + | 4 | Joint 5 (gripper) | Float (rad) | -1.0 | 1.0 | + + In the "ee" mode, the action space is a 3-dimensional box representing the target end-effector position. + + | Index | Action | Type (unit) | Min | Max | + | ----- | ------ | ----------- | ---- | --- | + | 0 | X | Float (m) | -1.0 | 1.0 | + | 1 | Y | Float (m) | -1.0 | 1.0 | + | 2 | Z | Float (m) | -1.0 | 1.0 | + + ## Observation space + + | Index | Observation | Type (unit) | Min | Max | + | ----- | ---------------------------------------- | ----------- | ----- | ---- | + | 0 | Angle of 1st joint 1 (base to shoulder) | Float (rad) | -3.14 | 3.14 | + | 1 | Angle of 2nd joint 2 (shoulder to elbow) | Float (rad) | -3.14 | 3.14 | + | 2 | Angle of 3rd joint 3 (elbow to wrist) | Float (rad) | -3.14 | 3.14 | + | 3 | Angle of 4th joint 4 (wrist to gripper) | Float (rad) | -3.14 | 3.14 | + | 4 | Angle of 5th joint 5 (gripper) | Float (rad) | -3.14 | 3.14 | + | 5 | X position of the cube | Float (m) | -10.0 | 10.0 | + | 6 | Y position of the cube | Float (m) | -10.0 | 10.0 | + | 7 | Z position of the cube | Float (m) | -10.0 | 10.0 | + | 8 | Quaternion \( w \) of the cube | Float | -1.0 | 1.0 | + | 9 | Quaternion \( x \) of the cube | Float | -1.0 | 1.0 | + | 10 | Quaternion \( y \) of the cube | Float | -1.0 | 1.0 | + | 11 | Quaternion \( z \) of the cube | Float | -1.0 | 1.0 | + + ## Reward + + The reward is the negative distance between the end-effector and the cube. The episode is terminated when the + distance is less than a threshold. + """ + def __init__(self, image_state=None, action_mode="joint", render_mode=None, obj_xy_range=0.15): super().__init__( xml_path="assets/scene_one_cube.xml", @@ -23,9 +74,9 @@ def __init__(self, image_state=None, action_mode="joint", render_mode=None, obj_ else: self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(5,), dtype=np.float32) self.observation_space = spaces.Box( - low=-np.inf, - high=np.inf, - shape=(self.data.xpos.flatten().shape[0] + 3,), + low=np.array([-np.pi, -np.pi, -np.pi, -np.pi, -np.pi, -10.0, -10.0, -10.0, -1.0, -1.0, -1.0, -1.0]), + high=np.array([np.pi, np.pi, np.pi, np.pi, np.pi, 10.0, 10.0, 10.0, 1.0, 1.0, 1.0, 1.0]), + shape=(12,), dtype=np.float32, ) @@ -37,13 +88,16 @@ def reset(self, seed=None, options=None): # We need the following line to seed self.np_random super().reset(seed=seed, options=options) + # Reset the robot to the initial position + self.data.qpos[:5] = np.array([0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float32) + # Sample and set the object position - self.data.joint("red_box_joint").qpos[:3] = self.np_random.uniform(self.object_low, self.object_high) + object_pos = self.np_random.uniform(self.object_low, self.object_high).astype(np.float32) + object_rot = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32) + self.data.qpos[5:12] = np.concatenate([object_pos, object_rot]) # Step the simulation - # mujoco.mj_step(self.model, self.data) mujoco.mj_forward(self.model, self.data) - self.step_start = time.time() # Get the additional info info = self.get_info() @@ -51,8 +105,7 @@ def reset(self, seed=None, options=None): return self.get_observation(), info def get_observation(self): - box_id = self.model.body("box").id - return np.concatenate([self.data.xpos.flatten(), self.data.xpos[box_id]], dtype=np.float32) + return self.data.qpos.astype(dtype=np.float32) def step(self, action): # Perform the action and step the simulation diff --git a/tests/test_env.py b/tests/test_env.py index 04710ae..f96e03c 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -8,14 +8,14 @@ @pytest.mark.parametrize( "env_id", [ - "lowcostrobot-LiftCube-v0", - "lowcostrobot-PickPlaceCube-v0", - "lowcostrobot-PushCube-v0", - "lowcostrobot-ReachCube-v0", - "lowcostrobot-Stack-v0", + "LiftCube-v0", + "PickPlaceCube-v0", + "PushCube-v0", + "ReachCube-v0", + "Stack-v0", ], ) def test_env_check(env_id): env = gym.make(env_id) - check_env(env) + check_env(env, skip_render_check=True) env.close()