Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update due to the latest introduction of Gym 0.25.0 #369

Merged
merged 12 commits into from
Jul 21, 2022
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ jobs:
pip install matplotlib
pip install dill
pip install stable-baselines3
pip install gym
pip install PettingZoo
pip install gym==0.25.0
pip install pyglet
pip install pymunk
pip install pygame
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
## -- 2021-11-15 1.2.0 DA Refactoring
## -- 2021-11-16 1.2.1 DA Added explicit scenario reset with constant seeding
## -- 2021-12-03 1.2.2 DA Refactoring
## -- 2022-07-20 1.2.3 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.2.2 (2021-12-03)
Ver. 1.2.3 (2022-07-20)

This module shows how to run an own policy inside the standard agent model with an OpenAI Gym environment using
the fhswf_at_ml framework.
Expand Down Expand Up @@ -72,7 +73,7 @@ class MyScenario (RLScenario):

def _setup(self, p_mode, p_ada, p_logging):
# 1 Setup environment
gym_env = gym.make('CartPole-v1')
gym_env = gym.make('CartPole-v1', new_step_api=True, render_mode=None)
self._env = WrEnvGYM2MLPro(gym_env, p_logging=p_logging)

# 2 Setup standard single-agent with own policy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
## -- 2021-11-15 1.3.0 DA Refactoring
## -- 2021-12-03 1.3.1 DA Refactoring
## -- 2021-12-07 1.3.2 DA Refactoring
## -- 2022-07-20 1.3.3 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.3.2 (2021-12-07)
Ver. 1.3.3 (2022-07-20)

This module shows how to train an agent with a custom policy inside on an OpenAI Gym environment using the fhswf_at_ml framework.
"""
Expand Down Expand Up @@ -76,7 +77,7 @@ class MyScenario (RLScenario):

def _setup(self, p_mode, p_ada, p_logging):
# 1 Setup environment
gym_env = gym.make('CartPole-v1')
gym_env = gym.make('CartPole-v1', new_step_api=True, render_mode=None)
self._env = WrEnvGYM2MLPro(gym_env, p_logging=p_logging)

# 2 Setup and return standard single-agent with own policy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
## -- 2021-12-03 1.0.5 DA Refactoring
## -- 2021-12-07 1.0.6 DA Refactoring
## -- 2022-02-25 1.0.7 SY Refactoring due to auto generated ID in class Dimension
## -- 2022-07-20 1.0.8 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.0.7 (2022-02-25)
Ver. 1.0.8 (2022-07-20)

This module shows how to train with SB3 Wrapper for On-Policy Algorithm
"""
Expand All @@ -38,7 +39,7 @@ class MyScenario(RLScenario):
def _setup(self, p_mode, p_ada, p_logging):
# 1 Setup environment
# self._env = RobotHTM(p_logging=False)
gym_env = gym.make('CartPole-v1')
gym_env = gym.make('CartPole-v1', new_step_api=True, render_mode=None)
self._env = WrEnvGYM2MLPro(gym_env, p_logging=p_logging)

# 2 Instantiate Policy From SB3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
## -- 2022-01-20 0.0.0 MRD Creation
## -- 2022-01-20 1.0.0 MRD Released first version
## -- 2022-05-17 1.0.1 DA Just a litte comment maintenance
## -- 2022-07-20 1.0.2 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.0.1 (2022-05-17)
Ver. 1.0.2 (2022-07-20)

This module shows how to train with SB3 Wrapper and stagnation detection
"""
Expand All @@ -31,7 +32,7 @@ class MyScenario(RLScenario):

def _setup(self, p_mode, p_ada, p_logging):
# 1 Setup environment
gym_env = gym.make('CartPole-v1')
gym_env = gym.make('CartPole-v1', new_step_api=True, render_mode=None)
self._env = WrEnvGYM2MLPro(gym_env, p_logging=p_logging)

# 2 Instantiate PPO Policy from SB3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
## -- 2021-12-24 1.0.5 DA Replaced separtor in log line by Training.C_LOG_SEPARATOR
## -- 2022-02-27 1.0.6 SY Refactoring due to auto generated ID in class Dimension
## -- 2022-03-21 1.0.7 WB Rewrite module description
## -- 2022-07-20 1.0.8 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.0.7 (2022-03-21)
Ver. 1.0.8 (2022-07-20)

This module compares the native and wrapped implementation of the SB3 Policy on an
environment.
Expand Down Expand Up @@ -77,7 +78,7 @@ def _reset(self, p_seed=None):
self._set_state(state)

# 1 Setup environment
gym_env = gym.make('CartPole-v1')
gym_env = gym.make('CartPole-v1', new_step_api=True, render_mode=None)
gym_env.seed(1)
# self._env = mlpro_env
self._env = CustomWrapperFixedSeed(gym_env, p_logging=p_logging)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
## -- 2022-01-11 0.0.0 MRD Creation
## -- 2022-01-18 1.0.0 MRD Released first version
## -- 2022-02-27 1.0.1 SY Refactoring due to auto generated ID in class Dimension
## -- 2022-07-20 1.0.2 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.0.1 (2022-02-27)
Ver. 1.0.2 (2022-07-20)

This module shows comparison between native and wrapped SB3 policy (Off-policy).
"""
Expand Down Expand Up @@ -69,7 +70,7 @@ def _reset(self, p_seed=None):
self._set_state(state)

# 1 Setup environment
gym_env = gym.make('CartPole-v1')
gym_env = gym.make('CartPole-v1', new_step_api=True, render_mode=None)
gym_env.seed(2)
self._env = CustomWrapperFixedSeed(gym_env, p_logging=p_logging)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
## -- 2022-05-19 1.0.1 MRD Re-use the agent not for the re-training process
## -- Remove commenting and numbering
## -- 2022-05-19 1.0.2 MRD Re-add the commneting and reformat the numbering in comment
## -- 2022-07-20 1.0.3 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.0.2 (2022-05-19)
Ver. 1.0.3 (2022-07-20)

This module shows how to train a single agent and load it again to do some extra cycles
"""
Expand All @@ -33,7 +34,7 @@ class MyScenario(RLScenario):

def _setup(self, p_mode, p_ada, p_logging):
# 1.1 Setup environment
gym_env = gym.make('CartPole-v1')
gym_env = gym.make('CartPole-v1', new_step_api=True, render_mode=None)
self._env = WrEnvGYM2MLPro(gym_env, p_logging=p_logging)

# 1.2 Setup Policy From SB3
Expand Down
6 changes: 4 additions & 2 deletions src/mlpro/rl/pool/envs/multicartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
## -- 2021-12-21 1.2.4 DA Class MultiCartPole: renamed method reset() to _reset()
## -- 2022-02-25 1.2.5 SY Refactoring due to auto generated ID in class Dimension
## -- 2022-04-06 1.2.6 LSB Freezing single environment after done returns true
## -- 2022-07-20 1.2.7 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.2.6 (2022-04-06)
Ver. 1.2.7 (2022-07-20)

This module provides an environment with multivariate state and action spaces based on the
OpenAI Gym environment 'CartPole-v1'.
Expand Down Expand Up @@ -67,7 +68,8 @@ def __init__(self,
action_space_id = self._action_space.get_dim_ids()
state_space_env = self._state_space.spawn([state_space_id[i*4], state_space_id[i*4+1], state_space_id[i*4+2], state_space_id[i*4+3]])
action_space_env = self._action_space.spawn([action_space_id[i]])
env = WrEnvGYM2MLPro(gym.make('CartPole-v1'), state_space_env, action_space_env, p_logging=p_logging)
env_make = gym.make('CartPole-v1', new_step_api=True, render_mode=None)
env = WrEnvGYM2MLPro(env_make, state_space_env, action_space_env, p_logging=p_logging)
env.C_NAME = env.C_NAME + ' (' + str(i) + ')'
self._envs.append(env)

Expand Down
68 changes: 57 additions & 11 deletions src/mlpro/wrappers/openai_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,15 @@
## -- 2022-02-27 1.3.4 SY Refactoring due to auto generated ID in class Dimension
## -- 2022-03-21 1.3.5 MRD Added new parameter to the WrEnvMLPro2GYM.reset()
## -- 2022-05-19 1.3.6 SY Gym 0.23: Replace function env.seed(seed) to env.reset(seed=seed)
## -- 2022-07-20 1.4.0 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.3.6 (2022-05-19)
Ver. 1.4.0 (2022-07-20)

This module provides wrapper classes for reinforcement learning tasks.
This wrappers has been updated and follow the gym version of 0.25.0.
The previous gym versions are still compatible, but it will not be available in the future.
"""

import gym
Expand Down Expand Up @@ -147,14 +150,37 @@ def simulate_reaction(self, p_state: State, p_action: Action) -> State:

# 2 Process step of Gym environment
try:
observation, reward_gym, done, info = self._gym_env.step(action_gym)
# For gym version 0.25 or above
if self._gym_env.new_step_api:
try:
observation, reward_gym, termination, truncation, info = self._gym_env.step(action_gym)
except:
observation, reward_gym, termination, truncation, info = self._gym_env.step(np.atleast_1d(action_gym))
else:
try:
observation, reward_gym, done, info = self._gym_env.step(action_gym)
except:
observation, reward_gym, done, info = self._gym_env.step(np.atleast_1d(action_gym))
except:
observation, reward_gym, done, info = self._gym_env.step(np.atleast_1d(action_gym))

# For gym version below than 0.25 (This will be removed soon)
self.log(self.C_LOG_TYPE_W, 'Please upgrade your gym version to 0.25.0 or above. This behaviour will be removed in near future.')
try:
observation, reward_gym, done, info = self._gym_env.step(action_gym)
except:
observation, reward_gym, done, info = self._gym_env.step(np.atleast_1d(action_gym))

obs = DataObject(observation)

# 3 Create state object from Gym observation
state = State(self._state_space, p_terminal=done)
try:
# For gym version 0.25 or above
if self._gym_env.new_step_api:
state = State(self._state_space, p_terminal=termination, p_timeout=truncation)
else:
state = State(self._state_space, p_terminal=done)
except:
# For gym version below than 0.25 (This will be removed soon)
state = State(self._state_space, p_terminal=done)
state.set_values(obs.get_data())

# 4 Create reward object
Expand Down Expand Up @@ -205,7 +231,8 @@ class WrEnvMLPro2GYM(gym.Env):
metadata = {'render.modes': ['human']}

## -------------------------------------------------------------------------------------------------
def __init__(self, p_mlpro_env, p_state_space: MSpace = None, p_action_space: MSpace = None):
def __init__(self, p_mlpro_env, p_state_space: MSpace = None, p_action_space: MSpace = None, p_new_step_api: bool = False,
p_render_mode: str = None):
"""
Parameters:
p_mlpro_env MLPro's Environment object
Expand All @@ -227,6 +254,13 @@ def __init__(self, p_mlpro_env, p_state_space: MSpace = None, p_action_space: MS
else:
self.action_space = self.recognize_space(self._mlpro_env.get_action_space())

if p_render_mode is not None:
self.render_mode = p_render_mode
else:
self.render_mode = 'human'

self.new_step_api = p_new_step_api

self.first_refresh = True

## -------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -281,22 +315,34 @@ def step(self, action):
obs = np.array(self._mlpro_env.get_state().get_values())

state = self._mlpro_env.get_state()
done = state.get_terminal()
terminated = state.get_terminal()
truncated = state.get_timeout()

info = {}
info["TimeLimit.truncated"] = state.get_timeout()

return obs, reward.get_overall_reward(), done, info
if self.new_step_api:
return obs, reward.get_overall_reward(), terminated, truncated, info
else:
info["TimeLimit.truncated"] = state.get_timeout()
return obs, reward.get_overall_reward(), terminated, info

## -------------------------------------------------------------------------------------------------
def reset(self, seed=None, options=None):
def reset(self, seed=None, return_info=False, options=None):
# We need the following line to seed self.np_random
super().reset(seed=seed)

self._mlpro_env.reset(seed)
obs = None
if isinstance(self.observation_space, gym.spaces.Box):
obs = np.array(self._mlpro_env.get_state().get_values(), dtype=np.float32)
else:
obs = np.array(self._mlpro_env.get_state().get_values())
return obs

if return_info:
info = {}
return obs, info
else:
return obs

## -------------------------------------------------------------------------------------------------
def render(self, mode='human'):
Expand Down
7 changes: 4 additions & 3 deletions src/mlpro/wrappers/pettingzoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@
## -- 2022-03-21 1.3.5 SY Refactoring due to PettingZoo version 1.17.0
## -- 2022-05-20 1.3.6 SY Refactoring: Action space boundaries in WrEnvPZOO2MLPro
## -- 2022-05-30 1.3.7 SY Replace function env.seed(seed) to env.reset(seed=seed)
## -- 2022-07-20 1.3.8 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.3.7 (2022-05-30)
Ver. 1.3.8 (2022-07-20)
This module provides wrapper classes for reinforcement learning tasks.
"""

Expand Down Expand Up @@ -357,7 +358,7 @@ def observe(self, agent_id):


## -------------------------------------------------------------------------------------------------
def reset(self):
def reset(self, seed, options):
self.agents = self.possible_agents[:]
self.rewards = {agent: 0 for agent in self.agents}
self._cumulative_rewards = {agent: 0 for agent in self.agents}
Expand All @@ -366,7 +367,7 @@ def reset(self):
self.state = {agent: None for agent in self.agents}
self.observations = {agent: None for agent in self.agents}

self._mlpro_env.reset()
self._mlpro_env.reset(seed)

self._agent_selector = agent_selector(self.agents)
self.agent_selection = self._agent_selector.next()
Expand Down
7 changes: 4 additions & 3 deletions test/test_sb3_policy_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
## -- 2021-12-20 1.0.3 DA Refactoring
## -- 2022-01-18 2.0.0 MRD Add Off Policy Algorithm into the test
## -- 2022-01-21 2.0.1 MRD Include RobotHTM as the continues action envrionment
## -- 2022-07-21 2.0.2 SY Update due to the latest introduction of Gym 0.25
## -------------------------------------------------------------------------------------------------

"""
Ver. 2.0.1 (2022-01-21)
Ver. 2.0.2 (2022-07-21)

Unit test classes for environment.
"""
Expand Down Expand Up @@ -76,7 +77,7 @@ def _reset(self, p_seed=None):
else:
if issubclass(env_cls, DQN):
# 1 Setup environment
gym_env = gym.make('CartPole-v1')
gym_env = gym.make('CartPole-v1', render_mode=None)
gym_env.seed(2)
self._env = CustomWrapperFixedSeed(gym_env, p_logging=False)
else:
Expand Down Expand Up @@ -199,7 +200,7 @@ def _on_rollout_end(self) -> None:
else:
if issubclass(env_cls, DQN):
# 1 Setup environment
gym_env = gym.make('CartPole-v1')
gym_env = gym.make('CartPole-v1', render_mode=None)
gym_env.seed(2)
else:
env = RobotHTM(p_reset_seed=False, p_target_mode="fix", p_logging=False)
Expand Down