Merge pull request #3 from perezjln/qgallouedec/refactor_2

Refactor 2
perezjln · May 19, 2024 · 36b3105 · 36b3105
2 parents fbcafd9 + 270c928
commit 36b3105
Show file tree

Hide file tree

Showing 20 changed files with 225 additions and 253 deletions.
diff --git a/.github/workflow/build.yml → .github/workflows/build.yml b/.github/workflow/build.yml → .github/workflows/build.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
+        python-version: ['3.8', '3.9', '3.10', '3.11']
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/README.md b/README.md
@@ -11,21 +11,22 @@ https://github.com/perezjln/envs-lowcostrobot/assets/5373778/de8c6448-1ece-4823-
 
 ## Installation
 
-```
+```sh
 cd gym-lowcostrobot
 pip install .
 ```
 
 ## Test
 
-```
+```sh
 pip install pytest
 pytest
 ```
 
 ## Format
 
-```
+```sh
 pip install ruff
 ruff format gym_lowcostrobot examples tests setup.py --line-length 119
+isort -l 119 gym_lowcostrobot examples tests setup.py
 ```
diff --git a/examples/gym_manipulation.py b/examples/gym_manipulation.py
@@ -23,5 +23,6 @@ def do_env_sim():
                 )
             env.reset()
 
+
 if __name__ == "__main__":
     do_env_sim()
diff --git a/examples/gym_manipulation_img.py b/examples/gym_manipulation_img.py
@@ -1,4 +1,5 @@
 import matplotlib.pyplot as plt
+
 from gym_lowcostrobot.envs.reach_cube_env import ReachCubeEnv
 
 

diff --git a/examples/gym_manipulation_img_multi.py b/examples/gym_manipulation_img_multi.py
@@ -1,4 +1,5 @@
 import matplotlib.pyplot as plt
+
 from gym_lowcostrobot.envs.lift_cube_env import LiftCubeEnv
 
 
@@ -21,5 +22,6 @@ def do_env_sim_image():
 
         env.render()
 
+
 if __name__ == "__main__":
     do_env_sim_image()
diff --git a/examples/gym_manipulation_sb3.py b/examples/gym_manipulation_sb3.py
@@ -1,10 +1,10 @@
-from stable_baselines3 import TD3, PPO
-from stable_baselines3.common.vec_env import SubprocVecEnv
-from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3 import PPO, TD3
 from stable_baselines3.common.callbacks import EvalCallback
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.vec_env import SubprocVecEnv
 
-from gym_lowcostrobot.envs.reach_cube_env import ReachCubeEnv
 from gym_lowcostrobot.envs.lift_cube_env import LiftCubeEnv
+from gym_lowcostrobot.envs.reach_cube_env import ReachCubeEnv
 
 
 def do_td3_reach():

diff --git a/examples/mujoco_ctrl_range.py b/examples/mujoco_ctrl_range.py
@@ -1,9 +1,7 @@
 import mujoco
 import numpy as np
 
-
 if __name__ == "__main__":
-
     # Load the model from the XML file
     model = mujoco.MjModel.from_xml_path("assets/scene_one_cube.xml")
 
@@ -13,4 +11,4 @@
 
     # Print the control ranges
     for i, (lower, upper) in enumerate(control_ranges):
-        print(f"Actuator {i} control range: lower = {lower}, upper = {upper}")
+        print(f"Actuator {i} control range: lower = {lower}, upper = {upper}")
diff --git a/examples/mujoco_interactive_simulation.py.py b/examples/mujoco_interactive_simulation.py.py
@@ -1,4 +1,5 @@
 import time
+
 import mujoco
 import mujoco.viewer
 

diff --git a/examples/mujoco_simple_invk.py b/examples/mujoco_simple_invk.py
@@ -1,8 +1,8 @@
 import time
-import numpy as np
 
 import mujoco
 import mujoco.viewer
+import numpy as np
 
 from gym_lowcostrobot.simulated_robot import SimulatedRobot
 

diff --git a/examples/simple.py b/examples/simple.py
@@ -1,6 +1,7 @@
-import gym_lowcostrobot
 import gymnasium as gym
 
+import gym_lowcostrobot
+
 env = gym.make("ReachCube-v0")
 
 # env.reset()

diff --git a/gym_lowcostrobot/envs/__init__.py b/gym_lowcostrobot/envs/__init__.py
@@ -4,5 +4,4 @@
 from .reach_cube_env import ReachCubeEnv
 from .stack_env import StackEnv
 
-
 __all__ = ["LiftCubeEnv", "PickPlaceCubeEnv", "PushCubeEnv", "ReachCubeEnv", "StackEnv"]
diff --git a/gym_lowcostrobot/envs/base_env.py b/gym_lowcostrobot/envs/base_env.py
@@ -1,34 +1,22 @@
 import time
-import numpy as np
 
+import gymnasium as gym
 import mujoco
 import mujoco.viewer
-
-import gymnasium as gym
+import numpy as np
 
 from gym_lowcostrobot.simulated_robot import SimulatedRobot
 
 
-class BaseEnv(gym.Env):
-    metadata = {"render_modes": ["human", "rgb_array"], 
-                "render_fps": 4,
-                "image_state": ["single", "multi"]}
+class BaseRobotEnv(gym.Env):
+    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4, "image_state": ["single", "multi"]}
 
-    def __init__(
-        self,
-        xml_path,
-        image_state=None,
-        action_mode="joint",
-        render_mode=None,
-    ):
+    def __init__(self, xml_path, image_state=None, action_mode="joint", render_mode=None):
         super().__init__()
 
         # Load the MuJoCo model and data
         self.model = mujoco.MjModel.from_xml_path(xml_path)
         self.data = mujoco.MjData(self.model)
-        self.robot = SimulatedRobot(self.model, self.data)
-
-        self.current_step = 0
 
         assert render_mode is None or render_mode in self.metadata["render_modes"]
         self.render_mode = render_mode
@@ -45,6 +33,31 @@ def __init__(
 
         self.action_mode = action_mode
 
+    def inverse_kinematics(self, ee_target_pos, step=0.2, joint_name="end_effector"):
+        """
+        :param ee_target_pos: numpy array of target end effector position
+        :param joint_name: name of the end effector joint
+        """
+        joint_id = self.model.body(joint_name).id
+
+        # get the current end effector position
+        ee_pos = self.data.geom_xpos[joint_id]
+
+        # compute the jacobian
+        jac = np.zeros((3, self.model.nv))
+        mujoco.mj_jacBodyCom(self.model, self.data, jac, None, joint_id)
+
+        # compute target joint velocities
+        qpos = self.data.qpos[:5]
+        qdot = np.dot(np.linalg.pinv(jac[:, :5]), ee_target_pos - ee_pos)
+
+        # apply the joint velocities
+        q_target_pos = qpos + qdot * step
+        return q_target_pos
+
+    def set_target_pos(self, target_pos):
+        self.data.ctrl = target_pos
+
     def get_actuator_ranges(self):
         return self.model.actuator_ctrlrange
 
@@ -57,9 +70,9 @@ def base_step_action_nograsp(self, action):
             # Use inverse kinematics to get the joint action wrt the end effector current position and displacement
             q_target_pos = self.robot.inverse_kinematics(ee_target_pos=ee_target_pos, joint_name="joint5-pad")
             q_target_pos[-1:] = 0.0  # Close the gripper
-            self.robot.set_target_pos(q_target_pos)
+            self.set_target_pos(q_target_pos)
         else:
-            self.robot.set_target_pos(action)
+            self.set_target_pos(action)
 
         # Step the simulation forward
         mujoco.mj_step(self.model, self.data)
@@ -74,16 +87,15 @@ def base_step_action_withgrasp(self, action):
             # Use inverse kinematics to get the joint action wrt the end effector current position and displacement
             q_target_pos = self.robot.inverse_kinematics(ee_target_pos=ee_target_pos, joint_name="joint5-pad")
             q_target_pos[-1:] = np.sign(action[-1])  # Open or close the gripper
-            self.robot.set_target_pos(q_target_pos)
+            self.set_target_pos(q_target_pos)
         else:
-            self.robot.set_target_pos(action)
+            self.set_target_pos(action)
 
         # Step the simulation forward
         mujoco.mj_step(self.model, self.data)
         self.current_step += 1
 
     def get_info(self):
-
         if self.image_state == "single":
             self.renderer.update_scene(self.data)
             img = self.renderer.render()
@@ -102,14 +114,12 @@ def set_fps(self, fps):
             self.model.opt.timestep = 1 / fps
 
     def render(self):
-        if not self.render_mode:
-            return
-        self.viewer.sync()
-        time_until_next_step = self.model.opt.timestep - (time.time() - self.step_start)
-        if time_until_next_step > 0:
-            # time.sleep(time_until_next_step)
-            ...
-        self.step_start = time.time()
+        if self.render_mode is not None:
+            self.viewer.sync()
+            time_until_next_step = self.model.opt.timestep - (time.time() - self.step_start)
+            if time_until_next_step > 0:
+                time.sleep(time_until_next_step)
+            self.step_start = time.time()
 
     def get_camera_images(self):
         dict_cams = {}

diff --git a/gym_lowcostrobot/envs/lift_cube_env.py b/gym_lowcostrobot/envs/lift_cube_env.py
@@ -1,27 +1,20 @@
 import time
-import numpy as np
 
 import mujoco
 import mujoco.viewer
-
+import numpy as np
 from gymnasium import spaces
 
-from gym_lowcostrobot.envs.base_env import BaseEnv
+from gym_lowcostrobot.envs.base_env import BaseRobotEnv
 
 
-class LiftCubeEnv(BaseEnv):
-    def __init__(
-        self,
-        xml_path="assets/scene_one_cube.xml",
-        image_state=None,
-        action_mode="joint",
-        render_mode=None
-    ):
+class LiftCubeEnv(BaseRobotEnv):
+    def __init__(self, image_state=None, action_mode="joint", render_mode=None):
         super().__init__(
-            xml_path=xml_path,
+            xml_path="assets/scene_one_cube.xml",
             image_state=image_state,
             action_mode=action_mode,
-            render_mode=render_mode
+            render_mode=render_mode,
         )
 
         # Define the action space and observation space
@@ -38,47 +31,47 @@ def __init__(
 
         self.threshold_distance = 0.5
 
+        self.object_low = np.array([0.0, 0.0, 0.01])
+        self.object_high = np.array([0.2, 0.2, 0.01])
+
     def reset(self, seed=None, options=None):
         # We need the following line to seed self.np_random
-        super().reset(seed=seed)
-
-        # Sample the target position and set the robot position
-        self.target_pos = np.array([self.np_random.random(), self.np_random.random(), 0.1])
-        self.data.joint("red_box_joint").qpos[:3] = [
-            self.np_random.random() * 0.2,
-            self.np_random.random() * 0.2,
-            0.01,
-        ]
+        super().reset(seed=seed, options=options)
+
+        # Sample and set the object position
+        self.data.joint("red_box_joint").qpos[:3] = self.np_random.uniform(self.object_low, self.object_high)
+
+        # Step the simulation
         mujoco.mj_step(self.model, self.data)
         self.step_start = time.time()
-        self.current_step = 0
 
+        # Get the additional info
         info = self.get_info()
 
-        return self.current_state(), info
-
-    def reward(self):
-        cube_id = self.model.body("box").id
-        cube_pos = self.data.geom_xpos[cube_id]
-        return cube_pos[-1]
+        return self.get_observation(), info
 
-    def current_state(self):
+    def get_observation(self):
         return np.concatenate([self.data.xpos.flatten()], dtype=np.float32)
 
     def step(self, action):
         # Perform the action and step the simulation
         self.base_step_action_withgrasp(action)
 
-        # Compute the reward based on the distance
-        high = self.reward()
+        # Get the new observation
+        observation = self.get_observation()
 
-        # Check if the target position is reached
-        terminated = high > self.threshold_distance
+        # Compute the distance
+        object_id = self.model.body("box").id
+        object_pos = self.data.geom_xpos[object_id]
+        object_z = object_pos[-1]
 
-        # Return the next observation, reward, terminated flag, and additional info
-        next_observation = self.current_state()
+        # Compute the reward
+        reward = object_z
+
+        # Check if the target position is reached
+        terminated = object_z > self.threshold_distance
 
-        # Check if the episode is timed out, fill info dictionary
+        # Get the additional info
         info = self.get_info()
 
-        return next_observation, high, terminated, False, info
+        return observation, reward, terminated, False, info