Refactor rllab from rllab.mujoco_py to mujoco_py (v1.5.0) (#91)

In this change, rllab is updated to use the latest version of MuJoCo (i.e. v1.5.0) by replacing all applications of rllab.mujoco_py in the code with mujoco_py, and subsequently remove the rllab.mujoco_py folder. This also updates the setup_mujoco.sh script to support installation using MuJoCo v1.5.0 in lieu of v1.3.0. Most changes are minor, such as replacing applications of self.model with self.sim due to MuJoCo transferring functionality from model to sim in the new version. Instead, sim is instantiated by passing a reference to model: self.sim = MjSim(self.model). However, some changes are major, most notably embedded_viewer.py and gather_env.py. Many functions within these files were using deprecated APIs that did not have a direct and easy complement in the new MuJoCo documentation. Some of these functions were renamed, others were moved around underneath new or pre-existing classes under a new guise, and others were most likely removed. These files are not as of now rigorously tested, but no major functionality has been broken thus far, and so can be used tentatively.
ryanjulian · May 30, 2018 · 2e55295 · 2e55295
1 parent 49e7505
commit 2e55295
Show file tree

Hide file tree

Showing 36 changed files with 509 additions and 9,769 deletions.
diff --git a/environment.yml b/environment.yml
@@ -24,6 +24,7 @@ dependencies:
         - PyOpenGL
         - nose2
         - pyzmq
+        - mujoco-py<1.50.2,>=1.50.1
         - cached_property
         - cloudpickle
         - git+https://github.com/Lasagne/Lasagne.git@7992faa80fa5233a786e2582a605e854cea7d1cf#egg=Lasagne

diff --git a/examples/trpo_cartpole.py b/examples/trpo_cartpole.py
@@ -9,8 +9,7 @@
 policy = GaussianMLPPolicy(
     env_spec=env.spec,
     # The neural network policy should have two hidden layers, each with 32 hidden units.
-    hidden_sizes=(32, 32)
-)
+    hidden_sizes=(32, 32))
 
 baseline = LinearFeatureBaseline(env_spec=env.spec)
 
@@ -23,5 +22,6 @@
     n_itr=40,
     discount=0.99,
     step_size=0.01,
+    # plot=True
 )
 algo.train()
diff --git a/examples/trpo_swimmer.py b/examples/trpo_swimmer.py
@@ -9,8 +9,7 @@
 policy = GaussianMLPPolicy(
     env_spec=env.spec,
     # The neural network policy should have two hidden layers, each with 32 hidden units.
-    hidden_sizes=(32, 32)
-)
+    hidden_sizes=(32, 32))
 
 baseline = LinearFeatureBaseline(env_spec=env.spec)
 
@@ -23,5 +22,6 @@
     n_itr=40,
     discount=0.99,
     step_size=0.01,
+    # plot=True
 )
 algo.train()
diff --git a/rllab/algos/batch_polopt.py b/rllab/algos/batch_polopt.py
@@ -14,7 +14,8 @@ def __init__(self, algo):
         self.algo = algo
 
     def start_worker(self):
-        parallel_sampler.populate_task(self.algo.env, self.algo.policy, scope=self.algo.scope)
+        parallel_sampler.populate_task(
+            self.algo.env, self.algo.policy, scope=self.algo.scope)
 
     def shutdown_worker(self):
         parallel_sampler.terminate_task(scope=self.algo.scope)
@@ -30,7 +31,8 @@ def obtain_samples(self, itr):
         if self.algo.whole_paths:
             return paths
         else:
-            paths_truncated = parallel_sampler.truncate_paths(paths, self.algo.batch_size)
+            paths_truncated = parallel_sampler.truncate_paths(
+                paths, self.algo.batch_size)
             return paths_truncated
 
 
@@ -40,28 +42,26 @@ class BatchPolopt(RLAlgorithm):
     This includes various policy gradient methods like vpg, npg, ppo, trpo, etc.
     """
 
-    def __init__(
-            self,
-            env,
-            policy,
-            baseline,
-            scope=None,
-            n_itr=500,
-            start_itr=0,
-            batch_size=5000,
-            max_path_length=500,
-            discount=0.99,
-            gae_lambda=1,
-            plot=False,
-            pause_for_plot=False,
-            center_adv=True,
-            positive_adv=False,
-            store_paths=False,
-            whole_paths=True,
-            sampler_cls=None,
-            sampler_args=None,
-            **kwargs
-    ):
+    def __init__(self,
+                 env,
+                 policy,
+                 baseline,
+                 scope=None,
+                 n_itr=500,
+                 start_itr=0,
+                 batch_size=5000,
+                 max_path_length=500,
+                 discount=0.99,
+                 gae_lambda=1,
+                 plot=False,
+                 pause_for_plot=False,
+                 center_adv=True,
+                 positive_adv=False,
+                 store_paths=False,
+                 whole_paths=True,
+                 sampler_cls=None,
+                 sampler_args=None,
+                 **kwargs):
         """
         :param env: Environment
         :param policy: Policy
@@ -107,6 +107,7 @@ def __init__(
     def start_worker(self):
         self.sampler.start_worker()
         if self.plot:
+            plotter.init_worker()
             plotter.init_plot(self.env, self.policy)
 
     def shutdown_worker(self):
@@ -134,7 +135,7 @@ def train(self):
                     self.update_plot()
                     if self.pause_for_plot:
                         input("Plotting evaluation run: Press Enter to "
-                                  "continue...")
+                              "continue...")
 
         self.shutdown_worker()
 

diff --git a/rllab/envs/mujoco/ant_env.py b/rllab/envs/mujoco/ant_env.py
@@ -20,9 +20,9 @@ def __init__(self, *args, **kwargs):
 
     def get_current_obs(self):
         return np.concatenate([
-            self.model.data.qpos.flat,
-            self.model.data.qvel.flat,
-            np.clip(self.model.data.cfrc_ext, -1, 1).flat,
+            self.sim.data.qpos.flat,
+            self.sim.data.qvel.flat,
+            np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
             self.get_body_xmat("torso").flat,
             self.get_body_com("torso"),
         ]).reshape(-1)
@@ -35,7 +35,7 @@ def step(self, action):
         scaling = (ub - lb) * 0.5
         ctrl_cost = 0.5 * 1e-2 * np.sum(np.square(action / scaling))
         contact_cost = 0.5 * 1e-3 * np.sum(
-            np.square(np.clip(self.model.data.cfrc_ext, -1, 1))),
+            np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))),
         survive_reward = 0.05
         reward = forward_reward - ctrl_cost - contact_cost + survive_reward
         state = self._state
@@ -48,8 +48,10 @@ def step(self, action):
     @overrides
     def get_ori(self):
         ori = [0, 1, 0, 0]
-        rot = self.model.data.qpos[self.__class__.ORI_IND:self.__class__.ORI_IND + 4]  # take the quaternion
-        ori = q_mult(q_mult(rot, ori), q_inv(rot))[1:3]  # project onto x-y plane
+        rot = self.sim.data.qpos[self.__class__.ORI_IND:self.__class__.ORI_IND
+                                 + 4]  # take the quaternion
+        ori = q_mult(q_mult(rot, ori),
+                     q_inv(rot))[1:3]  # project onto x-y plane
         ori = math.atan2(ori[1], ori[0])
         return ori
 
@@ -63,4 +65,3 @@ def log_diagnostics(self, paths):
         logger.record_tabular('MaxForwardProgress', np.max(progs))
         logger.record_tabular('MinForwardProgress', np.min(progs))
         logger.record_tabular('StdForwardProgress', np.std(progs))
-
diff --git a/rllab/envs/mujoco/gather/embedded_viewer.py b/rllab/envs/mujoco/gather/embedded_viewer.py
@@ -1,22 +1,26 @@
-from rllab.mujoco_py import glfw, mjcore
-import rllab.mujoco_py.mjconstants as C
-from rllab.mujoco_py.mjlib import mjlib
-from ctypes import byref
 import ctypes
+from ctypes import byref
 from threading import Lock
 
-mjCAT_ALL = 7
+import mujoco_py
+from mujoco_py import const as C
+from mujoco_py import functions
+from mujoco_py import MjRenderContext
+from mujoco_py import MjSim
+from mujoco_py.builder import cymj
+from mujoco_py.generated.const import CAT_ALL
+import glfw
+# https://github.com/openai/mujoco-py/blob/6ac6ac203a875ef35b1505827264cadccbfd9f05/mujoco_py/builder.py#L61
 
+PyMjrRect = cymj.PyMjrRect
+PyMjvCamera = cymj.PyMjvCamera
 
-class EmbeddedViewer(object):
 
+class EmbeddedViewer(object):
     def __init__(self):
         self.last_render_time = 0
-        self.objects = mjcore.MJVOBJECTS()
-        self.cam = mjcore.MJVCAMERA()
-        self.vopt = mjcore.MJVOPTION()
-        self.ropt = mjcore.MJROPTION()
-        self.con = mjcore.MJRCONTEXT()
+        self.cam = PyMjvCamera()
+
         self.running = False
         self.speedtype = 1
         self.window = None
@@ -35,52 +39,55 @@ def __init__(self):
     def set_model(self, model):
         self.model = model
         if model:
-            self.data = model.data
+            self.sim = MjSim(self.model)
+            self.data = self.sim.data
+            self.con = MjRenderContext(self.sim)
+            self.scn = self.con.scn
+            self.vopt = self.con.vopt
         else:
             self.data = None
         if self.running:
             if model:
-                mjlib.mjr_makeContext(model.ptr, byref(self.con), 150)
+                functions.mjr_makeContext(self.model, self.con, 150)
             else:
-                mjlib.mjr_makeContext(None, byref(self.con), 150)
+                functions.mjr_makeContext(None, self.con, 150)
             self.render()
         if model:
             self.autoscale()
 
     def autoscale(self):
-        self.cam.lookat[0] = self.model.stat.center[0]
-        self.cam.lookat[1] = self.model.stat.center[1]
-        self.cam.lookat[2] = self.model.stat.center[2]
-        self.cam.distance = 1.0 * self.model.stat.extent
+        self.cam.lookat[0] = self.sim.stat.center[0]
+        self.cam.lookat[1] = self.sim.stat.center[1]
+        self.cam.lookat[2] = self.sim.stat.center[2]
+        self.cam.distance = 1.0 * self.sim.stat.extent
         self.cam.camid = -1
         self.cam.trackbodyid = -1
         if self.window:
             width, height = glfw.get_framebuffer_size(self.window)
-            mjlib.mjv_updateCameraPose(byref(self.cam), width * 1.0 / height)
+            functions.mjv_moveCamera(self.model, None, width, height, self.scn,
+                                     self.cam)
 
     def get_rect(self):
-        rect = mjcore.MJRRECT(0, 0, 0, 0)
+        rect = PyMjrRect()
         rect.width, rect.height = glfw.get_framebuffer_size(self.window)
         return rect
 
     def record_frame(self, **kwargs):
-        self.frames.append({'pos': self.model.data.qpos, 'extra': kwargs})
+        self.frames.append({'pos': self.sim.data.qpos, 'extra': kwargs})
 
     def clear_frames(self):
         self.frames = []
 
     def render(self):
         rect = self.get_rect()
-        arr = (ctypes.c_double * 3)(0, 0, 0)
-        mjlib.mjv_makeGeoms(
-            self.model.ptr, self.data.ptr, byref(self.objects),
-            byref(self.vopt), mjCAT_ALL, 0, None, None,
-            ctypes.cast(arr, ctypes.POINTER(ctypes.c_double)))
-        mjlib.mjv_setCamera(self.model.ptr, self.data.ptr, byref(self.cam))
-        mjlib.mjv_updateCameraPose(
-            byref(self.cam), rect.width * 1.0 / rect.height)
-        mjlib.mjr_render(0, rect, byref(self.objects), byref(
-            self.ropt), byref(self.cam.pose), byref(self.con))
+        width = rect.width, height = rect.height
+
+        functions.mjv_addGeoms(self.model, self.data, self.vopt, None, CAT_ALL,
+                               self.scn)
+        functions.mjv_updateCamera(self.model, self.data, self.scn)
+        functions.mjv_moveCamera(self.model, None, width / height,
+                                 height / height, self.scn, self.cam)
+        functions.mjr_render(rect, self.scn, self.con)
 
     def render_internal(self):
         if not self.data:
@@ -99,19 +106,13 @@ def start(self, window):
 
         self.window = window
 
-        mjlib.mjv_makeObjects(byref(self.objects), 1000)
-
-        mjlib.mjv_defaultCamera(byref(self.cam))
-        mjlib.mjv_defaultOption(byref(self.vopt))
-        mjlib.mjr_defaultOption(byref(self.ropt))
-
-        mjlib.mjr_defaultContext(byref(self.con))
+        functions.mjv_makeScene(self.scn, 1000)
 
         if self.model:
-            mjlib.mjr_makeContext(self.model.ptr, byref(self.con), 150)
+            functions.mjr_makeContext(self.model, self.con, 150)
             self.autoscale()
         else:
-            mjlib.mjr_makeContext(None, byref(self.con), 150)
+            functions.mjr_makeContext(None, self.con, 150)
 
     def handle_mouse_move(self, window, xpos, ypos):
 
@@ -149,7 +150,8 @@ def handle_mouse_move(self, window, xpos, ypos):
 
         self.gui_lock.acquire()
 
-        mjlib.mjv_moveCamera(action, dx, dy, byref(self.cam), width, height)
+        functions.mjv_moveCamera(self.model, action, dx / height, dy / height,
+                                 self.scn, self.cam)
 
         self.gui_lock.release()
 
@@ -191,8 +193,9 @@ def handle_scroll(self, window, x_offset, y_offset):
 
         # scroll
         self.gui_lock.acquire()
-        mjlib.mjv_moveCamera(C.MOUSE_ZOOM, 0, (-20 * y_offset),
-                             byref(self.cam), width, height)
+        functions.mjv_moveCamera(self.model, C.MOUSE_ZOOM, 0,
+                                 (-20 * y_offset) / height, self.scn, self.cam)
+
         self.gui_lock.release()
 
     def should_stop(self):
@@ -207,6 +210,6 @@ def loop_once(self):
 
     def finish(self):
         glfw.terminate()
-        mjlib.mjr_freeContext(byref(self.con))
-        mjlib.mjv_freeObjects(byref(self.objects))
+        functions.mjr_freeContext(self.con)
+        functions.mjv_freeScene(self.scn)
         self.running = False