Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WR: Gym removal, SB3 update to 2.0 alpha #741

Merged
merged 10 commits into from
Apr 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ jobs:

- name: Install dependencies
run: |
pip install setuptools==65.5.0
pip install flake8 pytest
pip install -r requirements.txt

Expand Down Expand Up @@ -98,7 +97,6 @@ jobs:

- name: Install dependencies
run: |
pip install setuptools==65.5.0
pip install flake8 pytest
pip install -r requirements.txt
pip install sphinx
Expand Down Expand Up @@ -131,7 +129,6 @@ jobs:

- name: Install dependencies
run: |
pip install setuptools==65.5.0
pip install flake8 pytest
pip install -r requirements.txt

Expand Down Expand Up @@ -160,7 +157,6 @@ jobs:

- name: Install dependencies
run: |
pip install setuptools==65.5.0
pip install flake8 pytest
pip install -r requirements.txt

Expand Down
3 changes: 1 addition & 2 deletions doc/rtd/content/01_welcome/sub/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ numpy,1.24.2
torch,2.0.0
matplotlib,3.7.1
transformations,2022.9.26
stable-baselines3,1.8.0
gym,0.21.0
stable_baselines3,2.0.0a1
scipy,1.10.1
pettingzoo,1.22.3
pygame,2.1.3
Expand Down
3 changes: 1 addition & 2 deletions doc/rtd/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ numpy>=1.24.2
torch>=2.0.0
matplotlib>=3.7.1
transformations>=2022.9.26
stable-baselines3>=1.8.0
gym>=0.21.0
stable_baselines3>=2.0.0a1
scipy>=1.10.1
pettingzoo>=1.22.3
pygame>=2.1.3
Expand Down
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ numpy>=1.24.2
torch>=2.0.0
matplotlib>=3.7.1
transformations>=2022.9.26
stable-baselines3>=1.8.0
gym>=0.21.0
stable_baselines3>=2.0.0a1
scipy>=1.10.1
pettingzoo>=1.22.3
pygame>=2.1.3
Expand Down
3 changes: 1 addition & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ full =
torch>=2.0.0
matplotlib>=3.7.1
transformations>=2022.9.26
stable-baselines3>=1.8.0
gym>=0.21.0
stable_baselines3>=2.0.0a1
scipy>=1.10.1
pettingzoo>=1.22.3
pygame>=2.1.3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@
## -- 2022-11-02 1.2.6 DA Refactoring
## -- 2022-11-07 1.3.0 DA Refactoring
## -- 2023-01-14 1.3.1 MRD Removing default parameter new_step_api and render_mode for gym
## -- 2023-04-19 1.3.2 MRD Refactor module import gym to gymnasium
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.3.1 (2023-01-14)
Ver. 1.3.2 (2023-04-19)

This module shows how to run an own policy inside the standard agent model with an OpenAI Gym environment using
MLPro framework.
Expand All @@ -46,8 +47,8 @@

from mlpro.bf.math import *
from mlpro.rl import *
from mlpro.wrappers.openai_gym import WrEnvGYM2MLPro
import gym
from mlpro.wrappers.gymnasium import WrEnvGYM2MLPro
import gymnasium as gym
import random


Expand Down Expand Up @@ -91,7 +92,10 @@ class MyScenario (RLScenario):

def _setup(self, p_mode, p_ada: bool, p_visualize:bool, p_logging) -> Model:
# 2.1 Setup environment
gym_env = gym.make('CartPole-v1')
if p_visualize:
gym_env = gym.make('CartPole-v1', render_mode="human")
else:
gym_env = gym.make('CartPole-v1')
self._env = WrEnvGYM2MLPro( p_gym_env=gym_env, p_visualize=p_visualize, p_logging=p_logging)

# 2.2 Setup standard single-agent with own policy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@
## -- 2022-11-02 1.3.6 DA Refactoring
## -- 2022-11-07 1.4.0 DA Refactoring
## -- 2023-01-14 1.4.1 MRD Removing default parameter new_step_api and render_mode for gym
## -- 2023-04-19 1.4.2 MRD Refactor module import gym to gymnasium
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.4.1 (2023-01-14)
Ver. 1.4.2 (2023-04-19)

This module shows how to train an agent with a custom policy inside on an OpenAI Gym environment using
MLPro framework.
Expand All @@ -49,8 +50,8 @@

from mlpro.bf.math import *
from mlpro.rl import *
from mlpro.wrappers.openai_gym import WrEnvGYM2MLPro
import gym
from mlpro.wrappers.gymnasium import WrEnvGYM2MLPro
import gymnasium as gym
import random
from pathlib import Path

Expand Down Expand Up @@ -95,7 +96,10 @@ class MyScenario (RLScenario):

def _setup(self, p_mode, p_ada: bool, p_visualize: bool, p_logging) -> Model:
# 2.1 Setup environment
gym_env = gym.make('CartPole-v1')
if p_visualize:
gym_env = gym.make('CartPole-v1', render_mode="human")
else:
gym_env = gym.make('CartPole-v1')
self._env = WrEnvGYM2MLPro(gym_env, p_visualize=p_visualize, p_logging=p_logging)

# 2.2 Setup and return standard single-agent with own policy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@
## -- 2023-03-02 1.2.1 LSB Refactoring
## -- 2023-03-04 1.3.0 DA Renamed
## -- 2023-03-27 1.3.1 DA Refactoring
## -- 2023-04-19 1.3.2 MRD Refactor module import gym to gymnasium
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.3.1 (2023-03-27)
Ver. 1.3.2 (2023-04-19)

This module shows how to train a single agent and load it again to do some extra cycles.

Expand All @@ -40,10 +41,10 @@
"""


import gym
import gymnasium as gym
from stable_baselines3 import PPO
from mlpro.rl import *
from mlpro.wrappers.openai_gym import WrEnvGYM2MLPro
from mlpro.wrappers.gymnasium import WrEnvGYM2MLPro
from mlpro.wrappers.sb3 import WrPolicySB32MLPro
from pathlib import Path

Expand All @@ -55,7 +56,10 @@ class MyScenario (RLScenario):

def _setup(self, p_mode, p_ada: bool, p_visualize: bool, p_logging) -> Model:
# 1.1 Setup environment
gym_env = gym.make('CartPole-v1')
if p_visualize:
gym_env = gym.make('CartPole-v1', render_mode="human")
else:
gym_env = gym.make('CartPole-v1')
self._env = WrEnvGYM2MLPro(gym_env, p_visualize=p_visualize, p_logging=p_logging)

# 1.2 Setup Policy From SB3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
## -- yyyy-mm-dd Ver. Auth. Description
## -- 2023-03-04 1.0.0 DA Creation as derivate of howto_rl_agent_011
## -- 2023-03-27 1.0.1 DA Refactoring
## -- 2023-04-19 1.0.2 MRD Refactor module import gym to gymnasium
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.0.1 (2023-03-27)
Ver. 1.0.2 (2023-04-19)

As in Howto RL AGENT 011, this module shows how to train a single agent and load it again to do some
extra cycles. In opposite to howto 011, stagnation detection is used to automatically end the
Expand All @@ -29,10 +30,10 @@
"""


import gym
import gymnasium as gym
from stable_baselines3 import PPO
from mlpro.rl import *
from mlpro.wrappers.openai_gym import WrEnvGYM2MLPro
from mlpro.wrappers.gymnasium import WrEnvGYM2MLPro
from mlpro.wrappers.sb3 import WrPolicySB32MLPro
from pathlib import Path

Expand All @@ -44,7 +45,10 @@ class MyScenario (RLScenario):

def _setup(self, p_mode, p_ada: bool, p_visualize: bool, p_logging) -> Model:
# 1.1 Setup environment
gym_env = gym.make('CartPole-v1')
if p_visualize:
gym_env = gym.make('CartPole-v1', render_mode="human")
else:
gym_env = gym.make('CartPole-v1')
self._env = WrEnvGYM2MLPro(gym_env, p_visualize=p_visualize, p_logging=p_logging)

# 1.2 Setup Policy From SB3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
## -- 2023-03-10 1.0.2 SY Renumbering module
## -- 2023-03-10 1.0.3 SY Refactoring
## -- 2023-03-27 1.0.4 DA Refactoring
## -- 2023-04-19 1.0.5 MRD Refactor module import gym to gymnasium
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.0.4 (2023-03-27)
Ver. 1.0.5 (2023-04-19)

This module shows how to train a single agent in MBRL and load it again to do some extra cycles.

Expand All @@ -32,11 +33,11 @@
"""


import gym
import gymnasium as gym
import torch
from stable_baselines3 import PPO
from mlpro.rl import *
from mlpro.wrappers.openai_gym import WrEnvGYM2MLPro
from mlpro.wrappers.gymnasium import WrEnvGYM2MLPro
from mlpro.wrappers.sb3 import WrPolicySB32MLPro
from mlpro.sl.pool.afct.fnn.pytorch.mlp import PyTorchMLP
from pathlib import Path
Expand All @@ -57,7 +58,10 @@ class MyScenario (RLScenario):
def _setup(self, p_mode, p_ada: bool, p_visualize: bool, p_logging) -> Model:

# 1.1 Setup environment
gym_env = gym.make('CartPole-v1')
if p_visualize:
gym_env = gym.make('CartPole-v1', render_mode="human")
else:
gym_env = gym.make('CartPole-v1')
self._env = WrEnvGYM2MLPro(gym_env, p_visualize=p_visualize, p_logging=p_logging)
self._env.reset()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
## -- 2022-10-14 1.0.6 SY Refactoring
## -- 2022-11-02 1.0.7 SY Unable logging in unit test model
## -- 2023-03-02 1.0.8 LSB Refactoring
## -- 2023-04-19 1.0.9 MRD Refactor module import gym to gymnasium
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.0.8 (2023-03-02)
Ver. 1.0.9 (2023-04-19)

This module shows how to wrap a native MLPro environment class to OpenAI Gym environment.

Expand All @@ -31,9 +32,9 @@


from mlpro.bf.various import Log
from mlpro.wrappers.openai_gym import WrEnvMLPro2GYM
from mlpro.wrappers.gymnasium import WrEnvMLPro2GYM
from mlpro.rl.pool.envs.gridworld import GridWorld
from gym.utils.env_checker import check_env
from gymnasium.utils.env_checker import check_env


if __name__ == "__main__":
Expand All @@ -48,7 +49,6 @@
env = WrEnvMLPro2GYM(mlpro_env,
p_state_space=None,
p_action_space=None,
p_new_step_api=False,
p_logging=logging)

# 3. Check whether the environment is valid
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
## -- 2022-11-07 1.1.0 DA Refactoring
## -- 2023-01-14 1.1.1 MRD Removing default parameter new_step_api and render_mode for gym
## -- 2023-02-13 1.1.2 DA Optimization of dark mode
## -- 2023-04-19 1.1.3 MRD Refactor module import gym to gymnasium
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.1.2 (2023-02-13)
Ver. 1.1.3 (2023-04-19)

This module shows how to train agent with SB3 Wrapper for On- and Off-Policy Algorithms

Expand All @@ -35,10 +36,10 @@
"""


import gym
import gymnasium as gym
from stable_baselines3 import A2C, PPO, DQN, DDPG, SAC
from mlpro.rl.models import *
from mlpro.wrappers.openai_gym import WrEnvGYM2MLPro
from mlpro.wrappers.gymnasium import WrEnvGYM2MLPro
from mlpro.wrappers.sb3 import WrPolicySB32MLPro
from collections import deque
from pathlib import Path
Expand All @@ -51,7 +52,10 @@ class MyScenario(RLScenario):
def _setup(self, p_mode, p_ada: bool, p_visualize: bool, p_logging) -> Model:
# 1 Setup environment
# self._env = RobotHTM(p_logging=False)
gym_env = gym.make('CartPole-v1')
if p_visualize:
gym_env = gym.make('CartPole-v1', render_mode="human")
else:
gym_env = gym.make('CartPole-v1')
self._env = WrEnvGYM2MLPro(gym_env, p_visualize=p_visualize, p_logging=p_logging)

# 2 Instantiate Policy From SB3
Expand Down
Loading