Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize env wrapper #302

Merged
merged 10 commits into from
Apr 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ dependencies:
- pydash=4.2.1=py_0
- pytest-cov=2.5.1=py36_0
- pytest-timeout=1.2.1=py_0
- pytest-xdist=1.26.1=py36_0
- pytest=3.6.0=py36_0
- python=3.6.4=0
- pyyaml=3.12=py36_1
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from setuptools.command.test import test as TestCommand

test_args = [
'-n 2',
'--verbose',
'--capture=sys',
'--log-level=INFO',
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/agent/algorithm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def post_init_nets(self):
'''
assert hasattr(self, 'net_names')
if util.in_eval_lab_modes():
logger.info(f'Loaded algorithm models for lab_mode: {util.get_lab_mode()}')
self.load()
logger.info(f'Loaded algorithm models for lab_mode: {util.get_lab_mode()}')
else:
logger.info(f'Initialized algorithm models for lab_mode: {util.get_lab_mode()}')

Expand Down
17 changes: 5 additions & 12 deletions slm_lab/env/openai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from slm_lab.env.base import BaseEnv, ENV_DATA_NAMES
from slm_lab.env.wrapper import wrap_atari, wrap_deepmind
from slm_lab.env.wrapper import make_gym_env
from slm_lab.env.registration import register_env
from slm_lab.lib import logger, util
from slm_lab.lib.decorator import lab_api
Expand Down Expand Up @@ -27,7 +27,7 @@ class OpenAIEnv(BaseEnv):
"env": [{
"name": "CartPole-v0",
"max_t": null,
"max_tick": 150,
"max_tick": 10000,
}],
'''

Expand All @@ -38,16 +38,9 @@ def __init__(self, spec, e=None, env_space=None):
register_env(spec)
except Exception as e:
pass
env = gym.make(self.name)
if 'NoFrameskip' in env.spec.id: # for Atari
stack_len = ps.get(spec, 'agent.0.memory.stack_len')
env = wrap_atari(env)
if util.get_lab_mode() == 'eval':
env = wrap_deepmind(env, stack_len=stack_len, clip_rewards=False, episode_life=False)
else:
# no reward clipping in training since Atari Memory classes handle it
env = wrap_deepmind(env, stack_len=stack_len, clip_rewards=False)
self.u_env = env
seed = ps.get(spec, 'meta.random_seed')
stack_len = ps.get(spec, 'agent.0.memory.stack_len')
self.u_env = make_gym_env(self.name, seed, stack_len)
self._set_attr_from_u_env(self.u_env)
self.max_t = self.max_t or self.u_env.spec.max_episode_steps
assert self.max_t is not None
Expand Down
2 changes: 2 additions & 0 deletions slm_lab/env/unity.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def __init__(self, spec, e=None, env_space=None):
super(UnityEnv, self).__init__(spec, e, env_space)
util.set_attr(self, self.env_spec, ['unity'])
worker_id = int(f'{os.getpid()}{self.e+int(ps.unique_id())}'[-4:])
seed = ps.get(spec, 'meta.random_seed')
# TODO update Unity ml-agents to use seed=seed below
self.u_env = UnityEnvironment(file_name=get_env_path(self.name), worker_id=worker_id)
self.patch_gym_spaces(self.u_env)
self._set_attr_from_u_env(self.u_env)
Expand Down
42 changes: 30 additions & 12 deletions slm_lab/env/wrapper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Module of custom Atari wrappers modified from OpenAI baselines (MIT)
# these don't come with Gym but are crucial for Atari to work
# https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py
# Generic env wrappers, including for Atari/images
# They don't come with Gym but are crucial for Atari to work
# Many were adapted from OpenAI Baselines (MIT) https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py
from collections import deque
from gym import spaces
from slm_lab.lib import util
Expand All @@ -10,7 +10,8 @@

class NoopResetEnv(gym.Wrapper):
def __init__(self, env, noop_max=30):
'''Sample initial states by taking random number of no-ops on reset.
'''
Sample initial states by taking random number of no-ops on reset.
No-op is assumed to be action 0.
'''
gym.Wrapper.__init__(self, env)
Expand All @@ -25,7 +26,7 @@ def reset(self, **kwargs):
if self.override_num_noops is not None:
noops = self.override_num_noops
else:
noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) # pylint: disable=E1101
noops = self.unwrapped.np_random.randint(1, self.noop_max + 1)
assert noops > 0
obs = None
for _ in range(noops):
Expand Down Expand Up @@ -61,7 +62,8 @@ def step(self, ac):

class EpisodicLifeEnv(gym.Wrapper):
def __init__(self, env):
'''Make end-of-life == end-of-episode, but only reset on true game over.
'''
Make end-of-life == end-of-episode, but only reset on true game over.
Done by DeepMind for the DQN and co. since it helps value estimation.
'''
gym.Wrapper.__init__(self, env)
Expand All @@ -83,7 +85,8 @@ def step(self, action):
return obs, reward, done, info

def reset(self, **kwargs):
'''Reset only when lives are exhausted.
'''
Reset only when lives are exhausted.
This way all states are still reachable even though lives are episodic,
and the learner need not know about any of this behind-the-scenes.
'''
Expand All @@ -97,9 +100,7 @@ def reset(self, **kwargs):


class MaxAndSkipEnv(gym.Wrapper):
'''
OpenAI max-skipframe wrapper from baselines (not available from gym itself)
'''
'''OpenAI max-skipframe wrapper used for a NoFrameskip env'''

def __init__(self, env, skip=4):
'''Return only every `skip`-th frame'''
Expand Down Expand Up @@ -141,7 +142,8 @@ def __init__(self, env):
Apply image preprocessing:
- grayscale
- downsize to 84x84
- transform shape from w,h,c to PyTorch format c,h,w '''
- transpose shape from w,h,c to PyTorch format c,h,w
'''
gym.ObservationWrapper.__init__(self, env)
self.width = 84
self.height = 84
Expand All @@ -157,7 +159,8 @@ def observation(self, frame):

class LazyFrames(object):
def __init__(self, frames):
'''This object ensures that common frames between the observations are only stored once.
'''
This object ensures that common frames between the observations are only stored once.
It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay buffers.
This object should only be converted to numpy array before being passed to the model.
'''
Expand Down Expand Up @@ -229,3 +232,18 @@ def wrap_deepmind(env, episode_life=True, clip_rewards=True, stack_len=None):
if stack_len is not None:
env = FrameStack(env, stack_len)
return env


def make_gym_env(name, seed=None, stack_len=None):
'''General method to create any Gym env; auto wraps Atari'''
env = gym.make(name)
if seed is not None:
env.seed(seed)
if 'NoFrameskip' in env.spec.id: # for Atari
env = wrap_atari(env)
# no reward clipping to allow monitoring; Atari memory clips it
if util.get_lab_mode() == 'eval':
env = wrap_deepmind(env, stack_len=stack_len, clip_rewards=False, episode_life=False)
else:
env = wrap_deepmind(env, stack_len=stack_len, clip_rewards=False, episode_life=True)
return env
15 changes: 8 additions & 7 deletions slm_lab/experiment/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ def __init__(self, spec, info_space, global_nets=None):
self.spec = spec
self.info_space = info_space
self.index = self.info_space.get('session')
util.set_random_seed(self.info_space.get('trial'), self.index, self.spec)
util.set_cuda_id(self.spec, self.info_space)
util.set_logger(self.spec, self.info_space, logger, 'session')
analysis.save_spec(spec, info_space, unit='session')
self.data = None

# init singleton agent and env
self.env = make_env(self.spec)
util.set_rand_seed(self.info_space.get_random_seed(), self.env)
with util.ctx_lab_mode('eval'): # env for eval
self.eval_env = make_env(self.spec)
util.set_rand_seed(self.info_space.get_random_seed(), self.eval_env)
util.try_set_cuda_id(self.spec, self.info_space)
body = Body(self.env, self.spec['agent'])
self.agent = Agent(self.spec, self.info_space, body=body, global_nets=global_nets)

Expand Down Expand Up @@ -121,14 +121,15 @@ def __init__(self, spec, info_space, global_nets=None):
self.spec = spec
self.info_space = info_space
self.index = self.info_space.get('session')
util.set_random_seed(self.info_space.get('trial'), self.index, self.spec)
util.set_cuda_id(self.spec, self.info_space)
util.set_logger(self.spec, self.info_space, logger, 'session')
analysis.save_spec(spec, info_space, unit='session')
self.data = None

self.aeb_space = AEBSpace(self.spec, self.info_space)
self.env_space = EnvSpace(self.spec, self.aeb_space)
self.aeb_space.init_body_space()
util.set_rand_seed(self.info_space.get_random_seed(), self.env_space)
util.try_set_cuda_id(self.spec, self.info_space)
self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets)

logger.info(util.self_desc(self))
Expand Down Expand Up @@ -203,10 +204,10 @@ def __init__(self, spec, info_space):
self.index = self.info_space.get('trial')
info_space.set('session', None) # Session starts anew for new trial
util.set_logger(self.spec, self.info_space, logger, 'trial')
analysis.save_spec(spec, info_space, unit='trial')
self.session_data_dict = {}
self.data = None

analysis.save_spec(spec, info_space, unit='trial')
self.is_singleton = spec_util.is_singleton(spec) # singleton mode as opposed to multi-agent-env space
self.SessionClass = Session if self.is_singleton else SpaceSession
self.mp_runner = init_run_session if self.is_singleton else init_run_space_session
Expand Down Expand Up @@ -298,9 +299,9 @@ def __init__(self, spec, info_space):
self.info_space = info_space
self.index = self.info_space.get('experiment')
util.set_logger(self.spec, self.info_space, logger, 'trial')
analysis.save_spec(spec, info_space, unit='experiment')
self.trial_data_dict = {}
self.data = None
analysis.save_spec(spec, info_space, unit='experiment')
SearchClass = getattr(search, spec['meta'].get('search'))
self.search = SearchClass(self)
logger.info(f'Initialized experiment {self.index}')
Expand Down
4 changes: 0 additions & 4 deletions slm_lab/experiment/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,3 @@ def get(self, axis):
def set(self, axis, val):
self.coor[axis] = val
return self.coor[axis]

def get_random_seed(self):
'''Standard method to get random seed for a session'''
return int(1e5 * (self.get('trial') or 0) + 1e3 * (self.get('session') or 0) + time.time())
26 changes: 12 additions & 14 deletions slm_lab/lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import regex as re
import subprocess
import sys
import time
import torch
import torch.multiprocessing as mp
import ujson
Expand Down Expand Up @@ -564,25 +565,22 @@ def set_attr(obj, attr_dict, keys=None):
return obj


def set_rand_seed(random_seed, env_space):
'''Set all the module random seeds'''
torch.cuda.manual_seed_all(random_seed)
torch.manual_seed(random_seed)
np.random.seed(random_seed)
envs = env_space.envs if hasattr(env_space, 'envs') else [env_space]
for env in envs:
try:
env.u_env.seed(random_seed)
except Exception as e:
pass


def set_logger(spec, info_space, logger, unit=None):
'''Set the logger for a lab unit give its spec and info_space'''
os.environ['PREPATH'] = get_prepath(spec, info_space, unit=unit)
reload(logger) # to set session-specific logger


def set_random_seed(trial, session, spec):
'''Generate and set random seed for relevant modules, and record it in spec.meta.random_seed'''
random_seed = int(1e5 * (trial or 0) + 1e3 * (session or 0) + time.time())
torch.cuda.manual_seed_all(random_seed)
torch.manual_seed(random_seed)
np.random.seed(random_seed)
spec['meta']['random_seed'] = random_seed
return random_seed


def _sizeof(obj, seen=None):
'''Recursively finds size of objects'''
size = sys.getsizeof(obj)
Expand Down Expand Up @@ -654,7 +652,7 @@ def to_torch_batch(batch, device, is_episodic):
return batch


def try_set_cuda_id(spec, info_space):
def set_cuda_id(spec, info_space):
'''Use trial and session id to hash and modulo cuda device count for a cuda_id to maximize device usage. Sets the net_spec for the base Net class to pick up.'''
# Don't trigger any cuda call if not using GPU. Otherwise will break multiprocessing on machines with CUDA.
# see issues https://github.com/pytorch/pytorch/issues/334 https://github.com/pytorch/pytorch/issues/3491 https://github.com/pytorch/pytorch/issues/9996
Expand Down