Skip to content

Commit

Permalink
Merge pull request #3 from perezjln/qgallouedec/refactor_2
Browse files Browse the repository at this point in the history
Refactor 2
  • Loading branch information
perezjln authored May 19, 2024
2 parents fbcafd9 + 270c928 commit 36b3105
Show file tree
Hide file tree
Showing 20 changed files with 225 additions and 253 deletions.
2 changes: 1 addition & 1 deletion .github/workflow/build.yml → .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,22 @@ https://github.com/perezjln/envs-lowcostrobot/assets/5373778/de8c6448-1ece-4823-

## Installation

```
```sh
cd gym-lowcostrobot
pip install .
```

## Test

```
```sh
pip install pytest
pytest
```

## Format

```
```sh
pip install ruff
ruff format gym_lowcostrobot examples tests setup.py --line-length 119
isort -l 119 gym_lowcostrobot examples tests setup.py
```
1 change: 1 addition & 0 deletions examples/gym_manipulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ def do_env_sim():
)
env.reset()


if __name__ == "__main__":
do_env_sim()
1 change: 1 addition & 0 deletions examples/gym_manipulation_img.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import matplotlib.pyplot as plt

from gym_lowcostrobot.envs.reach_cube_env import ReachCubeEnv


Expand Down
2 changes: 2 additions & 0 deletions examples/gym_manipulation_img_multi.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import matplotlib.pyplot as plt

from gym_lowcostrobot.envs.lift_cube_env import LiftCubeEnv


Expand All @@ -21,5 +22,6 @@ def do_env_sim_image():

env.render()


if __name__ == "__main__":
do_env_sim_image()
8 changes: 4 additions & 4 deletions examples/gym_manipulation_sb3.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from stable_baselines3 import TD3, PPO
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3 import PPO, TD3
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import SubprocVecEnv

from gym_lowcostrobot.envs.reach_cube_env import ReachCubeEnv
from gym_lowcostrobot.envs.lift_cube_env import LiftCubeEnv
from gym_lowcostrobot.envs.reach_cube_env import ReachCubeEnv


def do_td3_reach():
Expand Down
4 changes: 1 addition & 3 deletions examples/mujoco_ctrl_range.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import mujoco
import numpy as np


if __name__ == "__main__":

# Load the model from the XML file
model = mujoco.MjModel.from_xml_path("assets/scene_one_cube.xml")

Expand All @@ -13,4 +11,4 @@

# Print the control ranges
for i, (lower, upper) in enumerate(control_ranges):
print(f"Actuator {i} control range: lower = {lower}, upper = {upper}")
print(f"Actuator {i} control range: lower = {lower}, upper = {upper}")
1 change: 1 addition & 0 deletions examples/mujoco_interactive_simulation.py.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time

import mujoco
import mujoco.viewer

Expand Down
2 changes: 1 addition & 1 deletion examples/mujoco_simple_invk.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import time
import numpy as np

import mujoco
import mujoco.viewer
import numpy as np

from gym_lowcostrobot.simulated_robot import SimulatedRobot

Expand Down
3 changes: 2 additions & 1 deletion examples/simple.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import gym_lowcostrobot
import gymnasium as gym

import gym_lowcostrobot

env = gym.make("ReachCube-v0")

# env.reset()
Expand Down
1 change: 0 additions & 1 deletion gym_lowcostrobot/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@
from .reach_cube_env import ReachCubeEnv
from .stack_env import StackEnv


__all__ = ["LiftCubeEnv", "PickPlaceCubeEnv", "PushCubeEnv", "ReachCubeEnv", "StackEnv"]
70 changes: 40 additions & 30 deletions gym_lowcostrobot/envs/base_env.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,22 @@
import time
import numpy as np

import gymnasium as gym
import mujoco
import mujoco.viewer

import gymnasium as gym
import numpy as np

from gym_lowcostrobot.simulated_robot import SimulatedRobot


class BaseEnv(gym.Env):
metadata = {"render_modes": ["human", "rgb_array"],
"render_fps": 4,
"image_state": ["single", "multi"]}
class BaseRobotEnv(gym.Env):
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4, "image_state": ["single", "multi"]}

def __init__(
self,
xml_path,
image_state=None,
action_mode="joint",
render_mode=None,
):
def __init__(self, xml_path, image_state=None, action_mode="joint", render_mode=None):
super().__init__()

# Load the MuJoCo model and data
self.model = mujoco.MjModel.from_xml_path(xml_path)
self.data = mujoco.MjData(self.model)
self.robot = SimulatedRobot(self.model, self.data)

self.current_step = 0

assert render_mode is None or render_mode in self.metadata["render_modes"]
self.render_mode = render_mode
Expand All @@ -45,6 +33,31 @@ def __init__(

self.action_mode = action_mode

def inverse_kinematics(self, ee_target_pos, step=0.2, joint_name="end_effector"):
"""
:param ee_target_pos: numpy array of target end effector position
:param joint_name: name of the end effector joint
"""
joint_id = self.model.body(joint_name).id

# get the current end effector position
ee_pos = self.data.geom_xpos[joint_id]

# compute the jacobian
jac = np.zeros((3, self.model.nv))
mujoco.mj_jacBodyCom(self.model, self.data, jac, None, joint_id)

# compute target joint velocities
qpos = self.data.qpos[:5]
qdot = np.dot(np.linalg.pinv(jac[:, :5]), ee_target_pos - ee_pos)

# apply the joint velocities
q_target_pos = qpos + qdot * step
return q_target_pos

def set_target_pos(self, target_pos):
self.data.ctrl = target_pos

def get_actuator_ranges(self):
return self.model.actuator_ctrlrange

Expand All @@ -57,9 +70,9 @@ def base_step_action_nograsp(self, action):
# Use inverse kinematics to get the joint action wrt the end effector current position and displacement
q_target_pos = self.robot.inverse_kinematics(ee_target_pos=ee_target_pos, joint_name="joint5-pad")
q_target_pos[-1:] = 0.0 # Close the gripper
self.robot.set_target_pos(q_target_pos)
self.set_target_pos(q_target_pos)
else:
self.robot.set_target_pos(action)
self.set_target_pos(action)

# Step the simulation forward
mujoco.mj_step(self.model, self.data)
Expand All @@ -74,16 +87,15 @@ def base_step_action_withgrasp(self, action):
# Use inverse kinematics to get the joint action wrt the end effector current position and displacement
q_target_pos = self.robot.inverse_kinematics(ee_target_pos=ee_target_pos, joint_name="joint5-pad")
q_target_pos[-1:] = np.sign(action[-1]) # Open or close the gripper
self.robot.set_target_pos(q_target_pos)
self.set_target_pos(q_target_pos)
else:
self.robot.set_target_pos(action)
self.set_target_pos(action)

# Step the simulation forward
mujoco.mj_step(self.model, self.data)
self.current_step += 1

def get_info(self):

if self.image_state == "single":
self.renderer.update_scene(self.data)
img = self.renderer.render()
Expand All @@ -102,14 +114,12 @@ def set_fps(self, fps):
self.model.opt.timestep = 1 / fps

def render(self):
if not self.render_mode:
return
self.viewer.sync()
time_until_next_step = self.model.opt.timestep - (time.time() - self.step_start)
if time_until_next_step > 0:
# time.sleep(time_until_next_step)
...
self.step_start = time.time()
if self.render_mode is not None:
self.viewer.sync()
time_until_next_step = self.model.opt.timestep - (time.time() - self.step_start)
if time_until_next_step > 0:
time.sleep(time_until_next_step)
self.step_start = time.time()

def get_camera_images(self):
dict_cams = {}
Expand Down
69 changes: 31 additions & 38 deletions gym_lowcostrobot/envs/lift_cube_env.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,20 @@
import time
import numpy as np

import mujoco
import mujoco.viewer

import numpy as np
from gymnasium import spaces

from gym_lowcostrobot.envs.base_env import BaseEnv
from gym_lowcostrobot.envs.base_env import BaseRobotEnv


class LiftCubeEnv(BaseEnv):
def __init__(
self,
xml_path="assets/scene_one_cube.xml",
image_state=None,
action_mode="joint",
render_mode=None
):
class LiftCubeEnv(BaseRobotEnv):
def __init__(self, image_state=None, action_mode="joint", render_mode=None):
super().__init__(
xml_path=xml_path,
xml_path="assets/scene_one_cube.xml",
image_state=image_state,
action_mode=action_mode,
render_mode=render_mode
render_mode=render_mode,
)

# Define the action space and observation space
Expand All @@ -38,47 +31,47 @@ def __init__(

self.threshold_distance = 0.5

self.object_low = np.array([0.0, 0.0, 0.01])
self.object_high = np.array([0.2, 0.2, 0.01])

def reset(self, seed=None, options=None):
# We need the following line to seed self.np_random
super().reset(seed=seed)

# Sample the target position and set the robot position
self.target_pos = np.array([self.np_random.random(), self.np_random.random(), 0.1])
self.data.joint("red_box_joint").qpos[:3] = [
self.np_random.random() * 0.2,
self.np_random.random() * 0.2,
0.01,
]
super().reset(seed=seed, options=options)

# Sample and set the object position
self.data.joint("red_box_joint").qpos[:3] = self.np_random.uniform(self.object_low, self.object_high)

# Step the simulation
mujoco.mj_step(self.model, self.data)
self.step_start = time.time()
self.current_step = 0

# Get the additional info
info = self.get_info()

return self.current_state(), info

def reward(self):
cube_id = self.model.body("box").id
cube_pos = self.data.geom_xpos[cube_id]
return cube_pos[-1]
return self.get_observation(), info

def current_state(self):
def get_observation(self):
return np.concatenate([self.data.xpos.flatten()], dtype=np.float32)

def step(self, action):
# Perform the action and step the simulation
self.base_step_action_withgrasp(action)

# Compute the reward based on the distance
high = self.reward()
# Get the new observation
observation = self.get_observation()

# Check if the target position is reached
terminated = high > self.threshold_distance
# Compute the distance
object_id = self.model.body("box").id
object_pos = self.data.geom_xpos[object_id]
object_z = object_pos[-1]

# Return the next observation, reward, terminated flag, and additional info
next_observation = self.current_state()
# Compute the reward
reward = object_z

# Check if the target position is reached
terminated = object_z > self.threshold_distance

# Check if the episode is timed out, fill info dictionary
# Get the additional info
info = self.get_info()

return next_observation, high, terminated, False, info
return observation, reward, terminated, False, info
Loading

0 comments on commit 36b3105

Please sign in to comment.