Skip to content

Commit

Permalink
Refactor rllab from rllab.mujoco_py to mujoco_py (v1.5.0) (#91)
Browse files Browse the repository at this point in the history
In this change, rllab is updated to use the latest version of MuJoCo
(i.e. v1.5.0) by replacing all applications of rllab.mujoco_py in the
code with mujoco_py, and subsequently remove the rllab.mujoco_py folder.
This also updates the setup_mujoco.sh script to support installation
using MuJoCo v1.5.0 in lieu of v1.3.0. Most changes are minor, such as
replacing applications of self.model with self.sim due to MuJoCo
transferring functionality from model to sim in the new version.
Instead, sim is instantiated by passing a reference to model: self.sim =
MjSim(self.model).

However, some changes are major, most notably embedded_viewer.py and
gather_env.py. Many functions within these files were using deprecated
APIs that did not have a direct and easy complement in the new MuJoCo
documentation. Some of these functions were renamed, others were moved
around underneath new or pre-existing classes under a new guise, and
others were most likely removed. These files are not as of now
rigorously tested, but no major functionality has been broken thus far,
and so can be used tentatively.
  • Loading branch information
jonashen authored May 30, 2018
1 parent 49e7505 commit 2e55295
Show file tree
Hide file tree
Showing 36 changed files with 509 additions and 9,769 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies:
- PyOpenGL
- nose2
- pyzmq
- mujoco-py<1.50.2,>=1.50.1
- cached_property
- cloudpickle
- git+https://github.com/Lasagne/Lasagne.git@7992faa80fa5233a786e2582a605e854cea7d1cf#egg=Lasagne
Expand Down
4 changes: 2 additions & 2 deletions examples/trpo_cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
policy = GaussianMLPPolicy(
env_spec=env.spec,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)
hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)

Expand All @@ -23,5 +22,6 @@
n_itr=40,
discount=0.99,
step_size=0.01,
# plot=True
)
algo.train()
4 changes: 2 additions & 2 deletions examples/trpo_swimmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
policy = GaussianMLPPolicy(
env_spec=env.spec,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)
hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)

Expand All @@ -23,5 +22,6 @@
n_itr=40,
discount=0.99,
step_size=0.01,
# plot=True
)
algo.train()
51 changes: 26 additions & 25 deletions rllab/algos/batch_polopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def __init__(self, algo):
self.algo = algo

def start_worker(self):
parallel_sampler.populate_task(self.algo.env, self.algo.policy, scope=self.algo.scope)
parallel_sampler.populate_task(
self.algo.env, self.algo.policy, scope=self.algo.scope)

def shutdown_worker(self):
parallel_sampler.terminate_task(scope=self.algo.scope)
Expand All @@ -30,7 +31,8 @@ def obtain_samples(self, itr):
if self.algo.whole_paths:
return paths
else:
paths_truncated = parallel_sampler.truncate_paths(paths, self.algo.batch_size)
paths_truncated = parallel_sampler.truncate_paths(
paths, self.algo.batch_size)
return paths_truncated


Expand All @@ -40,28 +42,26 @@ class BatchPolopt(RLAlgorithm):
This includes various policy gradient methods like vpg, npg, ppo, trpo, etc.
"""

def __init__(
self,
env,
policy,
baseline,
scope=None,
n_itr=500,
start_itr=0,
batch_size=5000,
max_path_length=500,
discount=0.99,
gae_lambda=1,
plot=False,
pause_for_plot=False,
center_adv=True,
positive_adv=False,
store_paths=False,
whole_paths=True,
sampler_cls=None,
sampler_args=None,
**kwargs
):
def __init__(self,
env,
policy,
baseline,
scope=None,
n_itr=500,
start_itr=0,
batch_size=5000,
max_path_length=500,
discount=0.99,
gae_lambda=1,
plot=False,
pause_for_plot=False,
center_adv=True,
positive_adv=False,
store_paths=False,
whole_paths=True,
sampler_cls=None,
sampler_args=None,
**kwargs):
"""
:param env: Environment
:param policy: Policy
Expand Down Expand Up @@ -107,6 +107,7 @@ def __init__(
def start_worker(self):
self.sampler.start_worker()
if self.plot:
plotter.init_worker()
plotter.init_plot(self.env, self.policy)

def shutdown_worker(self):
Expand Down Expand Up @@ -134,7 +135,7 @@ def train(self):
self.update_plot()
if self.pause_for_plot:
input("Plotting evaluation run: Press Enter to "
"continue...")
"continue...")

self.shutdown_worker()

Expand Down
15 changes: 8 additions & 7 deletions rllab/envs/mujoco/ant_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def __init__(self, *args, **kwargs):

def get_current_obs(self):
return np.concatenate([
self.model.data.qpos.flat,
self.model.data.qvel.flat,
np.clip(self.model.data.cfrc_ext, -1, 1).flat,
self.sim.data.qpos.flat,
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
self.get_body_xmat("torso").flat,
self.get_body_com("torso"),
]).reshape(-1)
Expand All @@ -35,7 +35,7 @@ def step(self, action):
scaling = (ub - lb) * 0.5
ctrl_cost = 0.5 * 1e-2 * np.sum(np.square(action / scaling))
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1))),
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))),
survive_reward = 0.05
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self._state
Expand All @@ -48,8 +48,10 @@ def step(self, action):
@overrides
def get_ori(self):
ori = [0, 1, 0, 0]
rot = self.model.data.qpos[self.__class__.ORI_IND:self.__class__.ORI_IND + 4] # take the quaternion
ori = q_mult(q_mult(rot, ori), q_inv(rot))[1:3] # project onto x-y plane
rot = self.sim.data.qpos[self.__class__.ORI_IND:self.__class__.ORI_IND
+ 4] # take the quaternion
ori = q_mult(q_mult(rot, ori),
q_inv(rot))[1:3] # project onto x-y plane
ori = math.atan2(ori[1], ori[0])
return ori

Expand All @@ -63,4 +65,3 @@ def log_diagnostics(self, paths):
logger.record_tabular('MaxForwardProgress', np.max(progs))
logger.record_tabular('MinForwardProgress', np.min(progs))
logger.record_tabular('StdForwardProgress', np.std(progs))

93 changes: 48 additions & 45 deletions rllab/envs/mujoco/gather/embedded_viewer.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
from rllab.mujoco_py import glfw, mjcore
import rllab.mujoco_py.mjconstants as C
from rllab.mujoco_py.mjlib import mjlib
from ctypes import byref
import ctypes
from ctypes import byref
from threading import Lock

mjCAT_ALL = 7
import mujoco_py
from mujoco_py import const as C
from mujoco_py import functions
from mujoco_py import MjRenderContext
from mujoco_py import MjSim
from mujoco_py.builder import cymj
from mujoco_py.generated.const import CAT_ALL
import glfw
# https://github.com/openai/mujoco-py/blob/6ac6ac203a875ef35b1505827264cadccbfd9f05/mujoco_py/builder.py#L61

PyMjrRect = cymj.PyMjrRect
PyMjvCamera = cymj.PyMjvCamera

class EmbeddedViewer(object):

class EmbeddedViewer(object):
def __init__(self):
self.last_render_time = 0
self.objects = mjcore.MJVOBJECTS()
self.cam = mjcore.MJVCAMERA()
self.vopt = mjcore.MJVOPTION()
self.ropt = mjcore.MJROPTION()
self.con = mjcore.MJRCONTEXT()
self.cam = PyMjvCamera()

self.running = False
self.speedtype = 1
self.window = None
Expand All @@ -35,52 +39,55 @@ def __init__(self):
def set_model(self, model):
self.model = model
if model:
self.data = model.data
self.sim = MjSim(self.model)
self.data = self.sim.data
self.con = MjRenderContext(self.sim)
self.scn = self.con.scn
self.vopt = self.con.vopt
else:
self.data = None
if self.running:
if model:
mjlib.mjr_makeContext(model.ptr, byref(self.con), 150)
functions.mjr_makeContext(self.model, self.con, 150)
else:
mjlib.mjr_makeContext(None, byref(self.con), 150)
functions.mjr_makeContext(None, self.con, 150)
self.render()
if model:
self.autoscale()

def autoscale(self):
self.cam.lookat[0] = self.model.stat.center[0]
self.cam.lookat[1] = self.model.stat.center[1]
self.cam.lookat[2] = self.model.stat.center[2]
self.cam.distance = 1.0 * self.model.stat.extent
self.cam.lookat[0] = self.sim.stat.center[0]
self.cam.lookat[1] = self.sim.stat.center[1]
self.cam.lookat[2] = self.sim.stat.center[2]
self.cam.distance = 1.0 * self.sim.stat.extent
self.cam.camid = -1
self.cam.trackbodyid = -1
if self.window:
width, height = glfw.get_framebuffer_size(self.window)
mjlib.mjv_updateCameraPose(byref(self.cam), width * 1.0 / height)
functions.mjv_moveCamera(self.model, None, width, height, self.scn,
self.cam)

def get_rect(self):
rect = mjcore.MJRRECT(0, 0, 0, 0)
rect = PyMjrRect()
rect.width, rect.height = glfw.get_framebuffer_size(self.window)
return rect

def record_frame(self, **kwargs):
self.frames.append({'pos': self.model.data.qpos, 'extra': kwargs})
self.frames.append({'pos': self.sim.data.qpos, 'extra': kwargs})

def clear_frames(self):
self.frames = []

def render(self):
rect = self.get_rect()
arr = (ctypes.c_double * 3)(0, 0, 0)
mjlib.mjv_makeGeoms(
self.model.ptr, self.data.ptr, byref(self.objects),
byref(self.vopt), mjCAT_ALL, 0, None, None,
ctypes.cast(arr, ctypes.POINTER(ctypes.c_double)))
mjlib.mjv_setCamera(self.model.ptr, self.data.ptr, byref(self.cam))
mjlib.mjv_updateCameraPose(
byref(self.cam), rect.width * 1.0 / rect.height)
mjlib.mjr_render(0, rect, byref(self.objects), byref(
self.ropt), byref(self.cam.pose), byref(self.con))
width = rect.width, height = rect.height

functions.mjv_addGeoms(self.model, self.data, self.vopt, None, CAT_ALL,
self.scn)
functions.mjv_updateCamera(self.model, self.data, self.scn)
functions.mjv_moveCamera(self.model, None, width / height,
height / height, self.scn, self.cam)
functions.mjr_render(rect, self.scn, self.con)

def render_internal(self):
if not self.data:
Expand All @@ -99,19 +106,13 @@ def start(self, window):

self.window = window

mjlib.mjv_makeObjects(byref(self.objects), 1000)

mjlib.mjv_defaultCamera(byref(self.cam))
mjlib.mjv_defaultOption(byref(self.vopt))
mjlib.mjr_defaultOption(byref(self.ropt))

mjlib.mjr_defaultContext(byref(self.con))
functions.mjv_makeScene(self.scn, 1000)

if self.model:
mjlib.mjr_makeContext(self.model.ptr, byref(self.con), 150)
functions.mjr_makeContext(self.model, self.con, 150)
self.autoscale()
else:
mjlib.mjr_makeContext(None, byref(self.con), 150)
functions.mjr_makeContext(None, self.con, 150)

def handle_mouse_move(self, window, xpos, ypos):

Expand Down Expand Up @@ -149,7 +150,8 @@ def handle_mouse_move(self, window, xpos, ypos):

self.gui_lock.acquire()

mjlib.mjv_moveCamera(action, dx, dy, byref(self.cam), width, height)
functions.mjv_moveCamera(self.model, action, dx / height, dy / height,
self.scn, self.cam)

self.gui_lock.release()

Expand Down Expand Up @@ -191,8 +193,9 @@ def handle_scroll(self, window, x_offset, y_offset):

# scroll
self.gui_lock.acquire()
mjlib.mjv_moveCamera(C.MOUSE_ZOOM, 0, (-20 * y_offset),
byref(self.cam), width, height)
functions.mjv_moveCamera(self.model, C.MOUSE_ZOOM, 0,
(-20 * y_offset) / height, self.scn, self.cam)

self.gui_lock.release()

def should_stop(self):
Expand All @@ -207,6 +210,6 @@ def loop_once(self):

def finish(self):
glfw.terminate()
mjlib.mjr_freeContext(byref(self.con))
mjlib.mjv_freeObjects(byref(self.objects))
functions.mjr_freeContext(self.con)
functions.mjv_freeScene(self.scn)
self.running = False
Loading

0 comments on commit 2e55295

Please sign in to comment.