From fbdca00944c713849613170ccf50b859331d8022 Mon Sep 17 00:00:00 2001 From: Arman717 <125295315+Arman717@users.noreply.github.com> Date: Wed, 19 Apr 2023 10:50:29 +0200 Subject: [PATCH] Update to gymnasium (#82) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update install_requires * § * fix imports: gym → gymnasium * fix step function * fix step * gym->gymnasium * § * fix * fix * fix * fix * fix * fix render * . * fix all step returns * Delete build/lib/carl directory * Delete myenv directory * Update README.md Fix documentation link. * make pre-commit and format * rename every occurrence of trunched to truncated * @Arman717 * change trunched to truncated * Fix gymnasium version with box2d --------- Co-authored-by: C. Benjamins <75323339+benjamc@users.noreply.github.com> --- README.md | 2 +- carl/envs/box2d/carl_bipedal_walker.py | 10 +++---- carl/envs/box2d/carl_lunarlander.py | 17 ++++++------ carl/envs/box2d/carl_vehicle_racing.py | 4 +-- carl/envs/box2d/parking_garage/bus.py | 2 +- carl/envs/box2d/parking_garage/race_car.py | 2 +- carl/envs/box2d/parking_garage/street_car.py | 2 +- carl/envs/box2d/parking_garage/trike.py | 2 +- carl/envs/carl_env.py | 26 +++++++++++++------ carl/envs/classic_control/carl_acrobot.py | 2 +- carl/envs/classic_control/carl_cartpole.py | 2 +- carl/envs/classic_control/carl_mountaincar.py | 2 +- .../carl_mountaincarcontinuous.py | 2 +- carl/envs/classic_control/carl_pendulum.py | 2 +- carl/envs/dmc/wrappers.py | 4 +-- carl/envs/mario/carl_mario.py | 2 +- carl/envs/mario/mario_env.py | 8 +++--- carl/envs/rna/carl_rna.py | 8 +++--- carl/envs/rna/carl_rna_definitions.py | 2 +- carl/envs/rna/rna_environment.py | 2 +- examples/demo_carracing.py | 4 +-- examples/demo_heuristic_lunarlander.py | 26 +++++++++++-------- setup.py | 4 +-- test/test_CARLEnv.py | 23 +++++++++------- 24 files changed, 89 insertions(+), 71 deletions(-) diff --git a/README.md b/README.md index 2773f81c..10df90d5 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Benchmarks include: ![Screenshot of each environment included in CARL.](./docs/source/figures/envs_overview.png) -For more information, check out our [documentation](https://carl.readthedocs.io/en/latest/)! +For more information, check out our [documentation](https://automl.github.io/CARL/)! ## Installation diff --git a/carl/envs/box2d/carl_bipedal_walker.py b/carl/envs/box2d/carl_bipedal_walker.py index 33e66453..b0c959e4 100644 --- a/carl/envs/box2d/carl_bipedal_walker.py +++ b/carl/envs/box2d/carl_bipedal_walker.py @@ -2,8 +2,8 @@ import numpy as np from Box2D.b2 import edgeShape, fixtureDef, polygonShape -from gym.envs.box2d import bipedal_walker -from gym.envs.box2d import bipedal_walker as bpw +from gymnasium.envs.box2d import bipedal_walker +from gymnasium.envs.box2d import bipedal_walker as bpw from carl.context.selection import AbstractSelector from carl.envs.carl_env import CARLEnv @@ -212,9 +212,9 @@ def demo_heuristic( SUPPORT_KNEE_ANGLE = +0.1 supporting_knee_angle = SUPPORT_KNEE_ANGLE while True: - s, r, done, info = env.step(a) + s, r, terminated, truncated, info = env.step(a) total_reward += r - if steps % 20 == 0 or done: + if steps % 20 == 0 or terminated or truncated: print("\naction " + str(["{:+0.2f}".format(x) for x in a])) print("step {} total_reward {:+0.2f}".format(steps, total_reward)) print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4]])) @@ -278,7 +278,7 @@ def demo_heuristic( a = np.clip(0.5 * a, -1.0, 1.0) env.render() - if done: + if terminated or truncated: break diff --git a/carl/envs/box2d/carl_lunarlander.py b/carl/envs/box2d/carl_lunarlander.py index 8b8964b8..fee390c0 100644 --- a/carl/envs/box2d/carl_lunarlander.py +++ b/carl/envs/box2d/carl_lunarlander.py @@ -1,7 +1,7 @@ from typing import Dict, List, Optional, Tuple, TypeVar, Union -from gym import Wrapper -from gym.envs.box2d import lunar_lander +from gymnasium import Wrapper +from gymnasium.envs.box2d import lunar_lander from carl.context.selection import AbstractSelector from carl.envs.carl_env import CARLEnv @@ -82,19 +82,20 @@ def __init__( super().__init__(env=env) self.high_gameover_penalty = high_gameover_penalty - self.active_seed = None + # self.active_seed = None - def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: + def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]: self.env: lunar_lander.LunarLander - state, reward, done, info = self.env.step(action) + state, reward, terminated, truncated, info = self.env.step(action) + if self.env.game_over and self.high_gameover_penalty: reward = -10000 - return state, reward, done, info + return state, reward, terminated, truncated, info - def seed(self, seed: Optional[int] = None) -> Optional[int]: + """def seed(self, seed: Optional[int] = None) -> Optional[int]: seed_ = self.env.seed(seed) self.active_seed = seed_[0] - return seed_ + return seed_""" class CARLLunarLanderEnv(CARLEnv): diff --git a/carl/envs/box2d/carl_vehicle_racing.py b/carl/envs/box2d/carl_vehicle_racing.py index eb2ca06a..fcb4e92a 100644 --- a/carl/envs/box2d/carl_vehicle_racing.py +++ b/carl/envs/box2d/carl_vehicle_racing.py @@ -2,8 +2,8 @@ import numpy as np import pyglet -from gym.envs.box2d import CarRacing -from gym.envs.box2d.car_dynamics import Car +from gymnasium.envs.box2d import CarRacing +from gymnasium.envs.box2d.car_dynamics import Car from pyglet import gl from carl.context.selection import AbstractSelector diff --git a/carl/envs/box2d/parking_garage/bus.py b/carl/envs/box2d/parking_garage/bus.py index 7d6e810e..68640e3e 100644 --- a/carl/envs/box2d/parking_garage/bus.py +++ b/carl/envs/box2d/parking_garage/bus.py @@ -12,7 +12,7 @@ from Box2D.b2 import revoluteJointDef # noqa: F401 from Box2D.b2 import ropeJointDef # noqa: F401 from Box2D.b2 import shape # noqa: F401; noqa: F401 -from gym.envs.box2d.car_dynamics import Car +from gymnasium.envs.box2d.car_dynamics import Car from carl.envs.box2d.parking_garage.utils import Particle diff --git a/carl/envs/box2d/parking_garage/race_car.py b/carl/envs/box2d/parking_garage/race_car.py index c82c3e58..2a08b76b 100644 --- a/carl/envs/box2d/parking_garage/race_car.py +++ b/carl/envs/box2d/parking_garage/race_car.py @@ -12,7 +12,7 @@ from Box2D.b2 import revoluteJointDef # noqa: F401 from Box2D.b2 import ropeJointDef # noqa: F401 from Box2D.b2 import shape # noqa: F401; noqa: F401 -from gym.envs.box2d.car_dynamics import Car +from gymnasium.envs.box2d.car_dynamics import Car from carl.envs.box2d.parking_garage.utils import Particle diff --git a/carl/envs/box2d/parking_garage/street_car.py b/carl/envs/box2d/parking_garage/street_car.py index e609627f..4e6abaa4 100644 --- a/carl/envs/box2d/parking_garage/street_car.py +++ b/carl/envs/box2d/parking_garage/street_car.py @@ -12,7 +12,7 @@ from Box2D.b2 import revoluteJointDef # noqa: F401 from Box2D.b2 import ropeJointDef # noqa: F401 from Box2D.b2 import shape # noqa: F401; noqa: F401 -from gym.envs.box2d.car_dynamics import Car +from gymnasium.envs.box2d.car_dynamics import Car __author__ = "André Biedenkapp" diff --git a/carl/envs/box2d/parking_garage/trike.py b/carl/envs/box2d/parking_garage/trike.py index d2bcf95a..d9ee001e 100644 --- a/carl/envs/box2d/parking_garage/trike.py +++ b/carl/envs/box2d/parking_garage/trike.py @@ -12,7 +12,7 @@ from Box2D.b2 import revoluteJointDef # noqa: F401 from Box2D.b2 import ropeJointDef # noqa: F401 from Box2D.b2 import shape # noqa: F401; noqa: F401 -from gym.envs.box2d.car_dynamics import Car +from gymnasium.envs.box2d.car_dynamics import Car from carl.envs.box2d.parking_garage.utils import Particle diff --git a/carl/envs/carl_env.py b/carl/envs/carl_env.py index e0f094c1..ba742e91 100644 --- a/carl/envs/carl_env.py +++ b/carl/envs/carl_env.py @@ -8,9 +8,9 @@ import os from types import ModuleType -import gym +import gymnasium as gym import numpy as np -from gym import Wrapper, spaces +from gymnasium import Wrapper, spaces from carl.context.augmentation import add_gaussian_noise from carl.context.selection import AbstractSelector, RoundRobinSelector @@ -86,6 +86,7 @@ class CARLEnv(Wrapper): available_scale_methods = ["by_default", "by_mean", "no"] available_instance_modes = ["random", "rr", "roundrobin"] + metadata = {"render_modes": ["human", "rgb_array"]} def __init__( self, @@ -111,6 +112,7 @@ def __init__( # Gather args self._context: Context # init for property self._contexts: Contexts # init for property + self.default_context = default_context self.contexts = contexts self.context_mask = context_mask @@ -254,8 +256,14 @@ def contexts(self, contexts: Contexts) -> None: self._contexts = { k: self.fill_context_with_default(context=v) for k, v in contexts.items() } + return - def reset(self, **kwargs: Dict) -> Union[ObsType, tuple[ObsType, dict]]: # type: ignore [override] + def reset( + self, + seed: int | None = None, + options: dict[str, Any] | None = None, + **kwargs: Dict, + ) -> Union[ObsType, tuple[ObsType, dict]]: # type: ignore [override] """ Reset environment. @@ -278,7 +286,7 @@ def reset(self, **kwargs: Dict) -> Union[ObsType, tuple[ObsType, dict]]: # type self._update_context() self._log_context() return_info = kwargs.get("return_info", False) - _ret = self.env.reset(**kwargs) # type: ignore [arg-type] + _ret = self.env.reset(seed=seed, options=options, **kwargs) # type: ignore [arg-type] info_dict = dict() if return_info: state, info_dict = _ret @@ -288,6 +296,7 @@ def reset(self, **kwargs: Dict) -> Union[ObsType, tuple[ObsType, dict]]: # type ret = state if return_info: ret = state, info_dict + return ret def build_context_adaptive_state( @@ -324,7 +333,7 @@ def build_context_adaptive_state( state = tnp.concatenate((state, context_values)) return state - def step(self, action: Any) -> Tuple[Any, Any, bool, Dict]: + def step(self, action: Any) -> Tuple[Any, Any, bool, bool, Dict]: """ Step the environment. @@ -345,7 +354,7 @@ def step(self, action: Any) -> Tuple[Any, Any, bool, Dict]: """ # Step the environment - state, reward, done, info = self.env.step(action) + state, reward, terminated, truncated, info = self.env.step(action) if not self.hide_context: # Scale context features @@ -369,8 +378,9 @@ def step(self, action: Any) -> Tuple[Any, Any, bool, Dict]: self.total_timestep_counter += 1 self.step_counter += 1 if self.step_counter >= self.cutoff: - done = True - return state, reward, done, info + truncated = True + + return state, reward, terminated, truncated, info def __getattr__(self, name: str) -> Any: # TODO: does this work with activated noise? I think we need to update it diff --git a/carl/envs/classic_control/carl_acrobot.py b/carl/envs/classic_control/carl_acrobot.py index a3c9aea0..4da7e499 100644 --- a/carl/envs/classic_control/carl_acrobot.py +++ b/carl/envs/classic_control/carl_acrobot.py @@ -1,7 +1,7 @@ from typing import Dict, List, Optional, Union import numpy as np -from gym.envs.classic_control import AcrobotEnv +from gymnasium.envs.classic_control import AcrobotEnv from carl.context.selection import AbstractSelector from carl.envs.carl_env import CARLEnv diff --git a/carl/envs/classic_control/carl_cartpole.py b/carl/envs/classic_control/carl_cartpole.py index ba1f1507..e9029605 100644 --- a/carl/envs/classic_control/carl_cartpole.py +++ b/carl/envs/classic_control/carl_cartpole.py @@ -1,7 +1,7 @@ from typing import Dict, List, Optional, Union import numpy as np -from gym.envs.classic_control import CartPoleEnv +from gymnasium.envs.classic_control import CartPoleEnv from carl.context.selection import AbstractSelector from carl.envs.carl_env import CARLEnv diff --git a/carl/envs/classic_control/carl_mountaincar.py b/carl/envs/classic_control/carl_mountaincar.py index 0407ea67..5dc2ceaa 100644 --- a/carl/envs/classic_control/carl_mountaincar.py +++ b/carl/envs/classic_control/carl_mountaincar.py @@ -1,6 +1,6 @@ from typing import Dict, List, Optional, Tuple, Union -import gym.envs.classic_control as gccenvs +import gymnasium.envs.classic_control as gccenvs import numpy as np from carl.context.selection import AbstractSelector diff --git a/carl/envs/classic_control/carl_mountaincarcontinuous.py b/carl/envs/classic_control/carl_mountaincarcontinuous.py index 9d833236..9abf3195 100644 --- a/carl/envs/classic_control/carl_mountaincarcontinuous.py +++ b/carl/envs/classic_control/carl_mountaincarcontinuous.py @@ -1,6 +1,6 @@ from typing import Dict, List, Optional, Union -import gym.envs.classic_control as gccenvs +import gymnasium.envs.classic_control as gccenvs import numpy as np from carl.context.selection import AbstractSelector diff --git a/carl/envs/classic_control/carl_pendulum.py b/carl/envs/classic_control/carl_pendulum.py index 6a293020..1e766800 100644 --- a/carl/envs/classic_control/carl_pendulum.py +++ b/carl/envs/classic_control/carl_pendulum.py @@ -1,6 +1,6 @@ from typing import Dict, List, Optional, Union -import gym.envs.classic_control as gccenvs +import gymnasium.envs.classic_control as gccenvs import numpy as np from carl.context.selection import AbstractSelector diff --git a/carl/envs/dmc/wrappers.py b/carl/envs/dmc/wrappers.py index 1a9b203f..e905c0f8 100644 --- a/carl/envs/dmc/wrappers.py +++ b/carl/envs/dmc/wrappers.py @@ -1,10 +1,10 @@ from typing import Any, Optional, Tuple, TypeVar, Union import dm_env # type: ignore -import gym +import gymnasium as gym import numpy as np from dm_env import StepType -from gym import spaces +from gymnasium import spaces ObsType = TypeVar("ObsType") ActType = TypeVar("ActType") diff --git a/carl/envs/mario/carl_mario.py b/carl/envs/mario/carl_mario.py index 1e8e9edf..6f94e754 100644 --- a/carl/envs/mario/carl_mario.py +++ b/carl/envs/mario/carl_mario.py @@ -1,6 +1,6 @@ from typing import Dict, List, Optional, Union -import gym +import gymnasium as gym from carl.context.selection import AbstractSelector from carl.envs.carl_env import CARLEnv diff --git a/carl/envs/mario/mario_env.py b/carl/envs/mario/mario_env.py index 127a75db..f16876e9 100644 --- a/carl/envs/mario/mario_env.py +++ b/carl/envs/mario/mario_env.py @@ -6,11 +6,11 @@ from collections import deque import cv2 -import gym +import gymnasium as gym import numpy as np -from gym import spaces -from gym.core import ObsType -from gym.utils import seeding +from gymnasium import spaces +from gymnasium.core import ObsType +from gymnasium.utils import seeding from PIL import Image from py4j.java_gateway import GatewayParameters, JavaGateway diff --git a/carl/envs/rna/carl_rna.py b/carl/envs/rna/carl_rna.py index a04d4f04..2c6fc674 100644 --- a/carl/envs/rna/carl_rna.py +++ b/carl/envs/rna/carl_rna.py @@ -1,7 +1,7 @@ # pylint: disable=missing-module-docstring # isort: skip_file from typing import Optional, Dict, Union, List, Tuple, Any import numpy as np -import gym +import gymnasium as gym from carl.envs.carl_env import CARLEnv from carl.envs.rna.parse_dot_brackets import parse_dot_brackets @@ -94,11 +94,11 @@ def __init__( self.obs_low = obs_low self.obs_high = obs_high - def step(self, action: np.ndarray) -> Tuple[List[int], float, Any, Any]: + def step(self, action: np.ndarray) -> Tuple[List[int], float, Any, Any, Any]: # Step function has a different name in this env - state, reward, done = self.env.execute(action) # type: ignore[has-type] + state, reward, terminated, truncated = self.env.execute(action) # type: ignore[has-type] self.step_counter += 1 - return state, reward, done, {} + return state, reward, terminated, truncated, {} def _update_context(self) -> None: dot_brackets = parse_dot_brackets( diff --git a/carl/envs/rna/carl_rna_definitions.py b/carl/envs/rna/carl_rna_definitions.py index 34051af2..8b5b012f 100644 --- a/carl/envs/rna/carl_rna_definitions.py +++ b/carl/envs/rna/carl_rna_definitions.py @@ -1,5 +1,5 @@ import numpy as np -from gym import spaces +from gymnasium import spaces DEFAULT_CONTEXT = { "mutation_threshold": 5, diff --git a/carl/envs/rna/rna_environment.py b/carl/envs/rna/rna_environment.py index 073b9dad..312ca650 100644 --- a/carl/envs/rna/rna_environment.py +++ b/carl/envs/rna/rna_environment.py @@ -12,7 +12,7 @@ import numpy as np from RNA import fold -import gym +import gymnasium as gym from typing import Any, List diff --git a/examples/demo_carracing.py b/examples/demo_carracing.py index 9f76272b..d25b8d13 100644 --- a/examples/demo_carracing.py +++ b/examples/demo_carracing.py @@ -4,7 +4,7 @@ from typing import Any import numpy as np -import gym +import gymnasium as gym import time import pygame from carl.envs.box2d.carl_vehicle_racing import CARLVehicleRacingEnv, VEHICLE_NAMES @@ -44,7 +44,7 @@ def register_input(): env.render() record_video = False if record_video: - from gym.wrappers.record_video import RecordVideo + from gymnasium.wrappers.record_video import RecordVideo env = RecordVideo( env=env, video_folder="/tmp/video-test", name_prefix="CARLVehicleRacing" diff --git a/examples/demo_heuristic_lunarlander.py b/examples/demo_heuristic_lunarlander.py index d05e0ed4..688d9bc0 100644 --- a/examples/demo_heuristic_lunarlander.py +++ b/examples/demo_heuristic_lunarlander.py @@ -1,8 +1,8 @@ from typing import Union, Optional -from gym.envs.box2d.lunar_lander import heuristic -import gym.envs.box2d.lunar_lander as lunar_lander - +from gymnasium.envs.box2d.lunar_lander import heuristic +import gymnasium.envs.box2d.lunar_lander as lunar_lander +from gymnasium.utils.step_api_compatibility import step_api_compatibility from carl.envs import CARLLunarLanderEnv @@ -16,22 +16,26 @@ def demo_heuristic_lander( """ Copied from LunarLander """ - env.seed(seed) + total_reward = 0 steps = 0 - env.render() - s = env.reset() + if render: + env.render() + s = env.reset( + seed=seed, + ) + while True: a = heuristic(env, s) - s, r, done, info = env.step(a) + + s, r, done, truncated, info = env.step(a) + total_reward += r - if render: + if render and steps % 20 == 0: still_open = env.render() - if not still_open: - break - if done: # or steps % 20 == 0: + if done or truncated: # or steps % 20 == 0: # print("observations:", " ".join(["{:+0.2f}".format(x) for x in s])) print("step {} total_reward {:+0.2f}".format(steps, total_reward)) steps += 1 diff --git a/setup.py b/setup.py index c526b670..ded774a4 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ def read_file(filepath: str) -> str: extras_require = { "box2d": [ - "gym[box2d]==0.24.1", + "gymnasium[box2d]>=0.27.1", ], "brax": [ "brax>=0.0.10,<=0.0.16", @@ -76,7 +76,7 @@ def read_file(filepath: str) -> str: include_package_data=True, python_requires=">=3.9", install_requires=[ - "gym==0.24.1", + "gymnasium>=0.27.1", "scipy>=1.7.0", "ConfigArgParse>=1.5.1", "numpy>=1.19.5", diff --git a/test/test_CARLEnv.py b/test/test_CARLEnv.py index b53ebee0..7a054e3d 100644 --- a/test/test_CARLEnv.py +++ b/test/test_CARLEnv.py @@ -22,7 +22,7 @@ def test_hiddenstate(self): ) env.reset() action = [0.01] # torque - state, reward, done, info = env.step(action=action) + state, reward, terminated, truncated, info = env.step(action=action) env.close() self.assertEqual(3, len(state)) @@ -40,7 +40,7 @@ def test_visiblestate(self): ) env.reset() action = [0.01] # torque - state, reward, done, info = env.step(action=action) + state, reward, terminated, truncated, info = env.step(action=action) env.close() self.assertEqual(10, len(state)) @@ -58,7 +58,7 @@ def test_visiblestate_customnone(self): ) env.reset() action = [0.01] # torque - state, reward, done, info = env.step(action=action) + state, reward, terminated, truncated, info = env.step(action=action) env.close() # Because we don't change any context features the state length should be 3 self.assertEqual(3, len(state)) @@ -77,7 +77,7 @@ def test_visiblestate_custom(self): ) env.reset() action = [0.01] # torque - state, reward, done, info = env.step(action=action) + state, reward, terminated, truncated, info = env.step(action=action) env.close() # state should be of length 5 because we add two context features self.assertEqual(5, len(state)) @@ -103,7 +103,7 @@ def test_visiblestate_changingcontextfeatures_nochange(self): ) env.reset() action = [0.01] # torque - state, reward, done, info = env.step(action=action) + state, reward, terminated, truncated, info = env.step(action=action) env.close() # state should be of length 3 because all contexts are the same self.assertEqual(3, len(state)) @@ -129,7 +129,7 @@ def test_visiblestate_changingcontextfeatures_change(self): ) env.reset() action = [0.01] # torque - state, reward, done, info = env.step(action=action) + state, reward, terminated, truncated, info = env.step(action=action) env.close() # state should be of length 5 because two features are changing (dt and l) self.assertEqual(5, len(state)) @@ -149,7 +149,7 @@ def test_dict_observation_space(self): self.assertTrue("state" in obs) self.assertTrue("context" in obs) action = [0.01] # torque - next_obs, reward, done, info = env.step(action=action) + next_obs, reward, terminated, truncated, info = env.step(action=action) env.close() def test_state_context_feature_population(self): @@ -185,9 +185,12 @@ def test_episode_termination(self): done = False counter = 0 while not done: - state, reward, done, info = env.step(action=action) + state, reward, terminated, truncated, info = env.step(action=action) counter += 1 self.assertTrue(counter <= ep_length) + if terminated or truncated: + done = True + if counter > ep_length: break env.close() @@ -223,7 +226,7 @@ def test_context_feature_scaling_by_mean(self): ) env.reset() action = [0.0] - state, reward, done, info = env.step(action=action) + state, reward, terminated, truncated, info = env.step(action=action) n_c = len(env.default_context) scaled_contexts = state[-n_c:] target = np.array( @@ -258,7 +261,7 @@ def test_context_feature_scaling_by_default(self): ) env.reset() action = [0.0] - state, reward, done, info = env.step(action=action) + state, reward, terminated, truncated, info = env.step(action=action) n_c = len(default_context) scaled_contexts = state[-n_c:] self.assertTrue(