Skip to content

Commit

Permalink
clean reach and remove old namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
qgallouedec committed May 21, 2024
1 parent e8000fd commit 80aa8b6
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 19 deletions.
10 changes: 5 additions & 5 deletions examples/gym_check_determinism.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

if __name__ == "__main__":
for env_name in [
"lowcostrobot-LiftCube-v0",
"lowcostrobot-PickPlaceCube-v0",
"lowcostrobot-PushCube-v0",
"lowcostrobot-ReachCube-v0",
"lowcostrobot-Stack-v0",
"LiftCube-v0",
"PickPlaceCube-v0",
"PushCube-v0",
"ReachCube-v0",
"Stack-v0",
]:
env = gym.make(env_name)
observation1, info = env.reset(seed=123)
Expand Down
69 changes: 61 additions & 8 deletions gym_lowcostrobot/envs/reach_cube_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,57 @@


class ReachCubeEnv(BaseRobotEnv):
"""
## Description
The robot has to reach a cube with its end-effector. The cube is placed at a random position within a predefined
range. The robot has to reach the cube with its end-effector. The episode is terminated when the end-effector is
within a threshold distance from the cube.
## Action space
Two action modes are available: "joint" and "ee". In the "joint" mode, the action space is a 5-dimensional box
representing the target joint angles.
| Index | Action | Type (unit) | Min | Max |
| ----- | --------------------------- | ----------- | ---- | --- |
| 0 | Joint 1 (base to shoulder) | Float (rad) | -1.0 | 1.0 |
| 1 | Joint 2 (shoulder to elbow) | Float (rad) | -1.0 | 1.0 |
| 2 | Joint 3 (elbow to wrist) | Float (rad) | -1.0 | 1.0 |
| 3 | Joint 4 (wrist to gripper) | Float (rad) | -1.0 | 1.0 |
| 4 | Joint 5 (gripper) | Float (rad) | -1.0 | 1.0 |
In the "ee" mode, the action space is a 3-dimensional box representing the target end-effector position.
| Index | Action | Type (unit) | Min | Max |
| ----- | ------ | ----------- | ---- | --- |
| 0 | X | Float (m) | -1.0 | 1.0 |
| 1 | Y | Float (m) | -1.0 | 1.0 |
| 2 | Z | Float (m) | -1.0 | 1.0 |
## Observation space
| Index | Observation | Type (unit) | Min | Max |
| ----- | ---------------------------------------- | ----------- | ----- | ---- |
| 0 | Angle of 1st joint 1 (base to shoulder) | Float (rad) | -3.14 | 3.14 |
| 1 | Angle of 2nd joint 2 (shoulder to elbow) | Float (rad) | -3.14 | 3.14 |
| 2 | Angle of 3rd joint 3 (elbow to wrist) | Float (rad) | -3.14 | 3.14 |
| 3 | Angle of 4th joint 4 (wrist to gripper) | Float (rad) | -3.14 | 3.14 |
| 4 | Angle of 5th joint 5 (gripper) | Float (rad) | -3.14 | 3.14 |
| 5 | X position of the cube | Float (m) | -10.0 | 10.0 |
| 6 | Y position of the cube | Float (m) | -10.0 | 10.0 |
| 7 | Z position of the cube | Float (m) | -10.0 | 10.0 |
| 8 | Quaternion \( w \) of the cube | Float | -1.0 | 1.0 |
| 9 | Quaternion \( x \) of the cube | Float | -1.0 | 1.0 |
| 10 | Quaternion \( y \) of the cube | Float | -1.0 | 1.0 |
| 11 | Quaternion \( z \) of the cube | Float | -1.0 | 1.0 |
## Reward
The reward is the negative distance between the end-effector and the cube. The episode is terminated when the
distance is less than a threshold.
"""

def __init__(self, image_state=None, action_mode="joint", render_mode=None, obj_xy_range=0.15):
super().__init__(
xml_path="assets/scene_one_cube.xml",
Expand All @@ -23,9 +74,9 @@ def __init__(self, image_state=None, action_mode="joint", render_mode=None, obj_
else:
self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(5,), dtype=np.float32)
self.observation_space = spaces.Box(
low=-np.inf,
high=np.inf,
shape=(self.data.xpos.flatten().shape[0] + 3,),
low=np.array([-np.pi, -np.pi, -np.pi, -np.pi, -np.pi, -10.0, -10.0, -10.0, -1.0, -1.0, -1.0, -1.0]),
high=np.array([np.pi, np.pi, np.pi, np.pi, np.pi, 10.0, 10.0, 10.0, 1.0, 1.0, 1.0, 1.0]),
shape=(12,),
dtype=np.float32,
)

Expand All @@ -37,22 +88,24 @@ def reset(self, seed=None, options=None):
# We need the following line to seed self.np_random
super().reset(seed=seed, options=options)

# Reset the robot to the initial position
self.data.qpos[:5] = np.array([0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float32)

# Sample and set the object position
self.data.joint("red_box_joint").qpos[:3] = self.np_random.uniform(self.object_low, self.object_high)
object_pos = self.np_random.uniform(self.object_low, self.object_high).astype(np.float32)
object_rot = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32)
self.data.qpos[5:12] = np.concatenate([object_pos, object_rot])

# Step the simulation
# mujoco.mj_step(self.model, self.data)
mujoco.mj_forward(self.model, self.data)
self.step_start = time.time()

# Get the additional info
info = self.get_info()

return self.get_observation(), info

def get_observation(self):
box_id = self.model.body("box").id
return np.concatenate([self.data.xpos.flatten(), self.data.xpos[box_id]], dtype=np.float32)
return self.data.qpos.astype(dtype=np.float32)

def step(self, action):
# Perform the action and step the simulation
Expand Down
12 changes: 6 additions & 6 deletions tests/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
@pytest.mark.parametrize(
"env_id",
[
"lowcostrobot-LiftCube-v0",
"lowcostrobot-PickPlaceCube-v0",
"lowcostrobot-PushCube-v0",
"lowcostrobot-ReachCube-v0",
"lowcostrobot-Stack-v0",
"LiftCube-v0",
"PickPlaceCube-v0",
"PushCube-v0",
"ReachCube-v0",
"Stack-v0",
],
)
def test_env_check(env_id):
env = gym.make(env_id)
check_env(env)
check_env(env, skip_render_check=True)
env.close()

0 comments on commit 80aa8b6

Please sign in to comment.