diff --git a/.gitignore b/.gitignore index 6f716243..ce1a3128 100644 --- a/.gitignore +++ b/.gitignore @@ -20,19 +20,9 @@ carl.egg-info exp_sweep multirun outputs -testvenv -*.egg-info +experiments runs -*.png -*.pdf -*.csv -*.pickle -*.ipynb_checkpoints -*optgap* -*smac3* -*.json generated +*egg* core -*.tex -build -target \ No newline at end of file +*.png \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 7227e87f..091048df 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,4 +6,4 @@ url = https://github.com/Mawiszus/TOAD-GUI [submodule "src/envs/mario/Mario-AI-Framework"] path = src/envs/mario/Mario-AI-Framework - url = https://github.com/frederikschubert/Mario-AI-Framework + url = https://github.com/frederikschubert/Mario-AI-Framework \ No newline at end of file diff --git a/CITATION.bib b/CITATION.bib index 0b17f84c..f6a2b599 100644 --- a/CITATION.bib +++ b/CITATION.bib @@ -11,4 +11,4 @@ @inproceedings { BenEim2023a title = {Contextualize Me - The Case for Context in Reinforcement Learning}, journal = {Transactions on Machine Learning Research}, year = {2023}, -} \ No newline at end of file +} diff --git a/README.md b/README.md index e89b0fed..10df90d5 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ pip install . This will only install the basic classic control environments, which should run on most operating systems. For the full set of environments, use the install options: ```bash -pip install -e .[box2d,brax,mario,dm_control] +pip install -e .[box2d, brax, mario, dm_control] ``` These may not be compatible with Windows systems. Box2D environment may need to be installed via conda on MacOS systems: @@ -68,12 +68,12 @@ Different instiations can be achieved by setting the context features to differe ## Cite Us If you use CARL in your research, please cite our paper on the benchmark: ```bibtex -@inproceedings{Benjamins2023, - title = {Contextualize Me -- The Case for Context in Reinforcement Learning}, - author = {Carolin Benjamins and Theresa Eimer and Frederik Schubert and Aditya Mohan and Sebastian Döhler and André Biedenkapp and Bodo Rosenhan and Frank Hutter and Marius Lindauer}, - booktitle = {Transactions on Machine Learning Research}, - year = {2023}, - month = Apr +@inproceedings{BenEim2021a, + title = {CARL: A Benchmark for Contextual and Adaptive Reinforcement Learning}, + author = {Carolin Benjamins and Theresa Eimer and Frederik Schubert and André Biedenkapp and Bodo Rosenhahn and Frank Hutter and Marius Lindauer}, + booktitle = {NeurIPS 2021 Workshop on Ecological Theory of Reinforcement Learning}, + year = {2021}, + month = dec } ``` diff --git a/carl/context/sampling.py b/carl/context/sampling.py index 9cf7a349..31fec750 100644 --- a/carl/context/sampling.py +++ b/carl/context/sampling.py @@ -1,27 +1,16 @@ # flake8: noqa: W605 -from __future__ import annotations -from typing import Any, Dict, List, Optional, Tuple, Union - -import importlib +from typing import Any, Dict, List, Tuple import numpy as np -from scipy.stats import norm, rv_continuous, uniform +from scipy.stats import norm -import carl.envs +from carl import envs from carl.utils.types import Context, Contexts def get_default_context_and_bounds( env_name: str, -) -> Tuple[ - Context, - Dict[ - str, - Union[ - Tuple[Any, Any, Union[type, Tuple[type, type]]], Tuple[Any, Any, str, list] - ], - ], -]: +) -> Tuple[Dict[Any, Any], Dict[Any, Any]]: """ Get context feature defaults and bounds for environment. @@ -46,11 +35,11 @@ def get_default_context_and_bounds( categorical context features: ``"VEHICLE": (None, None, "categorical", np.arange(0, len(PARKING_GARAGE)))`` """ - env_cls = getattr(carl.envs, env_name) - env_module = importlib.import_module(env_cls.__module__) - context_def = getattr(env_module, "DEFAULT_CONTEXT") - context_bounds = getattr(env_module, "CONTEXT_BOUNDS") - return context_def, context_bounds + # TODO make less hacky / make explicit + env_defaults = getattr(envs, f"{env_name}_defaults") + env_bounds = getattr(envs, f"{env_name}_bounds") + + return env_defaults, env_bounds def sample_contexts( @@ -59,9 +48,6 @@ def sample_contexts( num_contexts: int, default_sample_std_percentage: float = 0.05, fallback_sample_std: float = 0.1, - seed: Optional[int] = None, - uniform_distribution: bool = False, - uniform_bounds_rel: tuple(float, float) | None = None ) -> Dict[int, Dict[str, Any]]: """ Sample contexts. @@ -116,8 +102,6 @@ def sample_contexts( 0.05. fallback_sample_std: float, optional The fallback relative standard deviation. Defaults to 0.1. - seed: int, optional - The seed for the sampling of the random variables. Returns ------- @@ -126,15 +110,11 @@ def sample_contexts( names as keys and context feature values as values, e.g., """ - rng = np.random.default_rng(seed=seed) - # Get default context features and bounds env_defaults, env_bounds = get_default_context_and_bounds(env_name=env_name) # Create sample distributions/rules - sample_dists: Dict[ - str, Tuple[rv_continuous, Union[str, type, Tuple[type, type]]] - ] = {} + sample_dists = {} for context_feature_name in env_defaults.keys(): if context_feature_name in context_feature_args: if f"{context_feature_name}_mean" in context_feature_args: @@ -161,21 +141,7 @@ def sample_contexts( # the sample mean. Therefore we use a fallback sample standard deviation. sample_std = fallback_sample_std # TODO change this back to sample_std - if not uniform_distribution: - random_variable = norm(loc=sample_mean, scale=sample_std) - else: - # bounds defined as [loc, loc+scale] - if sample_mean == 0: - # relative bounds are centered around 1 so subtract here for the percentages - loc = uniform_bounds_rel[0] - 1 - scale = uniform_bounds_rel[1] - uniform_bounds_rel[0] - elif sample_mean < 0: - loc = uniform_bounds_rel[1] * sample_mean - scale = uniform_bounds_rel[0] * sample_mean - loc - else: - loc = uniform_bounds_rel[0] * sample_mean - scale = uniform_bounds_rel[1] * sample_mean - loc - random_variable = uniform(loc=loc, scale=scale) + random_variable = norm(loc=sample_mean, scale=sample_std) context_feature_type = env_bounds[context_feature_name][2] sample_dists[context_feature_name] = (random_variable, context_feature_type) @@ -190,30 +156,27 @@ def sample_contexts( random_variable = sample_dists[k][0] context_feature_type = sample_dists[k][1] lower_bound, upper_bound = env_bounds[k][0], env_bounds[k][1] - assert lower_bound <= upper_bound, f"context variable {k}: lower bound [{lower_bound}] is higher than upper bound [{upper_bound}]!" if context_feature_type == list: length = np.random.randint( 500000 ) # TODO should we allow lists to be this long? or should we parametrize this? - arg_class = sample_dists[k][1][1] # type: ignore [index] - context_list = random_variable.rvs(size=length, random_state=rng) + arg_class = sample_dists[k][1][1] + context_list = random_variable.rvs(size=length) context_list = np.clip(context_list, lower_bound, upper_bound) - c[k] = [arg_class(c) for c in context_list] # type: ignore [operator] + c[k] = [arg_class(c) for c in context_list] elif context_feature_type == "categorical": - choices = env_bounds[k][3] # type: ignore [misc] - choice = rng.choice(choices) + choices = env_bounds[k][3] + choice = np.random.choice(choices) c[k] = choice elif context_feature_type == "conditional": - condition = env_bounds[k][4] # type: ignore [misc] - choices = env_bounds[k][3][condition] # type: ignore [misc] - choice = rng.choice(choices) + condition = env_bounds[k][4] + choices = env_bounds[k][3][condition] + choice = np.random.choice(choices) c[k] = choice else: - c[k] = random_variable.rvs(size=1, random_state=rng)[ - 0 - ] # sample variable + c[k] = random_variable.rvs(size=1)[0] # sample variable c[k] = np.clip(c[k], lower_bound, upper_bound) # check bounds - c[k] = context_feature_type(c[k]) # type: ignore [operator] # cast to given type + c[k] = context_feature_type(c[k]) # cast to given type else: # No special sampling rule for context feature k, use the default context feature value c[k] = env_defaults[k] diff --git a/carl/context/selection.py b/carl/context/selection.py index d66857bd..7be40f35 100644 --- a/carl/context/selection.py +++ b/carl/context/selection.py @@ -88,7 +88,7 @@ def context_key(self) -> Any | None: Any | None The key of the current context or None """ - if self.context_id is not None: + if self.context_id: key = self.contexts_keys[self.context_id] else: key = None diff --git a/carl/envs/box2d/__init__.py b/carl/envs/box2d/__init__.py index 6f727873..ad6a3424 100644 --- a/carl/envs/box2d/__init__.py +++ b/carl/envs/box2d/__init__.py @@ -1,42 +1,22 @@ # flake8: noqa: F401 +from carl.envs.box2d.carl_bipedal_walker import ( + CONTEXT_BOUNDS as CARLBipedalWalkerEnv_bounds, +) +from carl.envs.box2d.carl_bipedal_walker import ( + DEFAULT_CONTEXT as CARLBipedalWalkerEnv_defaults, +) +from carl.envs.box2d.carl_bipedal_walker import CARLBipedalWalkerEnv # Contextenvs.s and bounds by name -from functools import partial -import warnings - -import gym -from carl.envs.box2d.carl_lunarlander import CARLLunarLanderEnv +from carl.envs.box2d.carl_lunarlander import CONTEXT_BOUNDS as CARLLunarLanderEnv_bounds from carl.envs.box2d.carl_lunarlander import ( DEFAULT_CONTEXT as CARLLunarLanderEnv_defaults, ) +from carl.envs.box2d.carl_lunarlander import CARLLunarLanderEnv from carl.envs.box2d.carl_vehicle_racing import ( CONTEXT_BOUNDS as CARLVehicleRacingEnv_bounds, ) - -from carl.envs.box2d.carl_vehicle_racing import CARLVehicleRacingEnv from carl.envs.box2d.carl_vehicle_racing import ( DEFAULT_CONTEXT as CARLVehicleRacingEnv_defaults, ) -from carl.envs.box2d.carl_vehicle_racing import ( - CONTEXT_BOUNDS as CARLVehicleRacingEnv_bounds, -) - -from carl.envs.box2d.carl_bipedal_walker import CARLBipedalWalkerEnv -from carl.envs.box2d.carl_bipedal_walker import ( - DEFAULT_CONTEXT as CARLBipedalWalkerEnv_defaults, -) -from carl.envs.box2d.carl_bipedal_walker import ( - CONTEXT_BOUNDS as CARLBipedalWalkerEnv_bounds, -) - -try: - from carl.envs.box2d.carl_bipedal_walker import CARLBipedalWalkerEnv - from gym.envs.registration import register - - def make_env(**kwargs): - return CARLBipedalWalkerEnv(**kwargs) - register("CARLBipedalWalkerEnv-v0", entry_point=make_env) - register("CARLBipedalWalkerHardcoreEnv-v0", entry_point=partial(make_env, env=gym.make("BipedalWalkerHardcore-v3"))) -except Exception as e: - warnings.warn( - f"Could not load CARLMarioEnv which is probably not installed ({e}).") +from carl.envs.box2d.carl_vehicle_racing import CARLVehicleRacingEnv diff --git a/carl/envs/box2d/carl_bipedal_walker.py b/carl/envs/box2d/carl_bipedal_walker.py index bf54b776..33e66453 100644 --- a/carl/envs/box2d/carl_bipedal_walker.py +++ b/carl/envs/box2d/carl_bipedal_walker.py @@ -2,7 +2,6 @@ import numpy as np from Box2D.b2 import edgeShape, fixtureDef, polygonShape -import gym from gym.envs.box2d import bipedal_walker from gym.envs.box2d import bipedal_walker as bpw @@ -106,8 +105,7 @@ def __init__( instance_mode: str, optional """ if env is None: - # env = bipedal_walker.BipedalWalker() - env = gym.make(id="BipedalWalker-v3") + env = bipedal_walker.BipedalWalker() if not contexts: contexts = {0: DEFAULT_CONTEXT} super().__init__( diff --git a/carl/envs/brax/__init__.py b/carl/envs/brax/__init__.py index e381dadd..eee221fb 100644 --- a/carl/envs/brax/__init__.py +++ b/carl/envs/brax/__init__.py @@ -1,23 +1,20 @@ # flake8: noqa: F401 # Contexts and bounds by name -from carl.envs.braxenvs.carl_ant import CONTEXT_BOUNDS as CARLAnt_bounds -from carl.envs.braxenvs.carl_ant import DEFAULT_CONTEXT as CARLAnt_defaults -from carl.envs.braxenvs.carl_ant import CARLAnt -from carl.envs.braxenvs.carl_halfcheetah import CONTEXT_BOUNDS as CARLHalfcheetah_bounds -from carl.envs.braxenvs.carl_halfcheetah import DEFAULT_CONTEXT as CARLHalfcheetah_defaults -from carl.envs.braxenvs.carl_halfcheetah import CARLHalfcheetah -from carl.envs.braxenvs.carl_humanoid import CONTEXT_BOUNDS as CARLHumanoid_bounds -from carl.envs.braxenvs.carl_humanoid import DEFAULT_CONTEXT as CARLHumanoid_defaults -from carl.envs.braxenvs.carl_humanoid import CARLHumanoid -from carl.envs.braxenvs.carl_hopper import CONTEXT_BOUNDS as CARLHopper_bounds -from carl.envs.braxenvs.carl_hopper import DEFAULT_CONTEXT as CARLHopper_defaults -from carl.envs.braxenvs.carl_hopper import CARLHopper -from carl.envs.braxenvs.carl_reacher import CONTEXT_BOUNDS as CARLReacher_bounds -from carl.envs.braxenvs.carl_reacher import DEFAULT_CONTEXT as CARLReacher_defaults -from carl.envs.braxenvs.carl_reacher import CARLReacher -from carl.envs.braxenvs.carl_pusher import CONTEXT_BOUNDS as CARLPusher_bounds -from carl.envs.braxenvs.carl_pusher import DEFAULT_CONTEXT as CARLPusher_defaults -from carl.envs.braxenvs.carl_pusher import CARLPusher -from carl.envs.braxenvs.carl_double_pendulum import CONTEXT_BOUNDS as CARLInvertedDoublePendulum_bounds -from carl.envs.braxenvs.carl_double_pendulum import DEFAULT_CONTEXT as CARLInvertedDoublePendulum_defaults -from carl.envs.braxenvs.carl_double_pendulum import CARLInvertedDoublePendulum +from carl.envs.brax.carl_ant import CONTEXT_BOUNDS as CARLAnt_bounds +from carl.envs.brax.carl_ant import DEFAULT_CONTEXT as CARLAnt_defaults +from carl.envs.brax.carl_ant import CARLAnt +from carl.envs.brax.carl_fetch import CONTEXT_BOUNDS as CARLFetch_bounds +from carl.envs.brax.carl_fetch import DEFAULT_CONTEXT as CARLFetch_defaults +from carl.envs.brax.carl_fetch import CARLFetch +from carl.envs.brax.carl_grasp import CONTEXT_BOUNDS as CARLGrasp_bounds +from carl.envs.brax.carl_grasp import DEFAULT_CONTEXT as CARLGrasp_defaults +from carl.envs.brax.carl_grasp import CARLGrasp +from carl.envs.brax.carl_halfcheetah import CONTEXT_BOUNDS as CARLHalfcheetah_bounds +from carl.envs.brax.carl_halfcheetah import DEFAULT_CONTEXT as CARLHalfcheetah_defaults +from carl.envs.brax.carl_halfcheetah import CARLHalfcheetah +from carl.envs.brax.carl_humanoid import CONTEXT_BOUNDS as CARLHumanoid_bounds +from carl.envs.brax.carl_humanoid import DEFAULT_CONTEXT as CARLHumanoid_defaults +from carl.envs.brax.carl_humanoid import CARLHumanoid +from carl.envs.brax.carl_ur5e import CONTEXT_BOUNDS as CARLUr5e_bounds +from carl.envs.brax.carl_ur5e import DEFAULT_CONTEXT as CARLUr5e_defaults +from carl.envs.brax.carl_ur5e import CARLUr5e diff --git a/carl/envs/brax/brax_wrappers.py b/carl/envs/brax/brax_wrappers.py deleted file mode 100644 index 1013caf7..00000000 --- a/carl/envs/brax/brax_wrappers.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright 2023 The Brax Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Wrappers to convert brax envs to gym envs.""" -from typing import ClassVar, Optional - -from brax.envs import Env -import gym -from gym import spaces -from gym.vector import utils -import jax -import numpy as np -from functools import partial - - -class GymWrapper(gym.Env): - """A wrapper that converts Brax Env to one that follows Gym API.""" - - # Flag that prevents `gym.register` from misinterpreting the `_step` and - # `_reset` as signs of a deprecated gym Env API. - _gym_disable_underscore_compat: ClassVar[bool] = True - - def __init__(self, - env: Env, - seed: int = 0, - backend: Optional[str] = None): - self._env = env - self.metadata = { - 'render.modes': ['human', 'rgb_array'], - 'video.frames_per_second': 1 / self._env.dt - } - self.seed(seed) - self.backend = backend - self._state = None - - obs = np.inf * np.ones(self._env.observation_size, dtype='float32') - self.observation_space = spaces.Box(-obs, obs, dtype='float32') - - action = np.ones(self._env.action_size, dtype='float32') - self.action_space = spaces.Box(-action, action, dtype='float32') - - def reset(key): - key1, key2 = jax.random.split(key) - state = self._env.reset(key2) - return state, state.obs, key1 - - self._reset = partial(reset) - - def step(state, action): - state = self._env.step(state, action) - info = {**state.metrics, **state.info} - return state, state.obs, state.reward, state.done, info - - self._step = partial(step) - - def reset(self): - self._state, obs, self._key = self._reset(self._key) - # We return device arrays for pytorch users. - return obs - - def step(self, action): - self._state, obs, reward, done, info = self._step(self._state, action) - # We return device arrays for pytorch users. - return obs, reward, done, info - - def seed(self, seed: int = 0): - self._key = jax.random.PRNGKey(seed) - - def render(self, mode='human'): - return super().render(mode=mode) # just raise an exception - - -class VectorGymWrapper(gym.vector.VectorEnv): - """A wrapper that converts batched Brax Env to one that follows Gym VectorEnv API.""" - - # Flag that prevents `gym.register` from misinterpreting the `_step` and - # `_reset` as signs of a deprecated gym Env API. - _gym_disable_underscore_compat: ClassVar[bool] = True - - def __init__(self, - env: Env, - seed: int = 0, - backend: Optional[str] = None): - self._env = env - self.metadata = { - 'render.modes': ['human', 'rgb_array'], - 'video.frames_per_second': 1 / self._env.dt - } - if not hasattr(self._env, 'batch_size'): - raise ValueError('underlying env must be batched') - - self.num_envs = self._env.batch_size - self.seed(seed) - self.backend = backend - self._state = None - - obs = np.inf * np.ones(self._env.observation_size, dtype='float32') - obs_space = spaces.Box(-obs, obs, dtype='float32') - self.observation_space = utils.batch_space(obs_space, self.num_envs) - - action = np.ones(self._env.action_size, dtype='float32') - action_space = spaces.Box(-action, action, dtype='float32') - self.action_space = utils.batch_space(action_space, self.num_envs) - - def reset(key): - key1, key2 = jax.random.split(key) - state = self._env.reset(key2) - return state, state.obs, key1 - - self._reset = partial(reset) - - def step(state, action): - state = self._env.step(state, action) - info = {**state.metrics, **state.info} - return state, state.obs, state.reward, state.done, info - - self._step = partial(step) - - def reset(self): - self._state, obs, self._key = self._reset(self._key) - return obs - - def step(self, action): - self._state, obs, reward, done, info = self._step(self._state, action) - return obs, reward, done, info - - def seed(self, seed: int = 0): - self._key = jax.random.PRNGKey(seed) - - def render(self, mode='human'): - if mode == 'rgb_array': - sys, state = self._env.sys, self._state - if state is None: - raise RuntimeError('must call reset or step before rendering') - return image.render_array(sys, state.state.take(0), 256, 256) - else: - return super().render(mode=mode) # just raise an exception diff --git a/carl/envs/brax/carl_ant.py b/carl/envs/brax/carl_ant.py index 001c4b3a..c53fd64f 100644 --- a/carl/envs/brax/carl_ant.py +++ b/carl/envs/brax/carl_ant.py @@ -1,103 +1,113 @@ -from __future__ import annotations +from typing import Any, Dict, List, Optional, Union -import numpy as np -import jax.numpy as jnp -from brax.envs.ant import Ant +import copy +import json +import brax +import numpy as np +from brax.envs.ant import _SYSTEM_CONFIG, Ant +from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper +from google.protobuf import json_format, text_format +from google.protobuf.json_format import MessageToDict +from numpyencoder import NumpyEncoder from carl.context.selection import AbstractSelector -# from carl.envs.carl_brax_env import CARLBraxEnv +from carl.envs.carl_env import CARLEnv +from carl.utils.trial_logger import TrialLogger from carl.utils.types import Context, Contexts -from carl.envs.brax.carl_brax_env import CARLBraxEnv DEFAULT_CONTEXT = { - "stiffness_factor": 1, - "gravity": -9.81, - "friction": 1, - "damping_factor": 1, - "actuator_strength_factor": 1, + "joint_stiffness": 5000, + "gravity": -9.8, + "friction": 0.6, + "angular_damping": -0.05, + "actuator_strength": 300, + "joint_angular_damping": 35, "torso_mass": 10, - "dt": 0.01 } CONTEXT_BOUNDS = { - "stiffness_factor": (0, np.inf, float), + "joint_stiffness": (1, np.inf, float), "gravity": (-np.inf, -0.1, float), "friction": (-np.inf, np.inf, float), - "damping_factor": (-np.inf, np.inf, float), - "actuator_strength_factor": (1, np.inf, float), + "angular_damping": (-np.inf, np.inf, float), + "actuator_strength": (1, np.inf, float), + "joint_angular_damping": (0, np.inf, float), "torso_mass": (0.1, np.inf, float), - "dt": (0.0001, 0.03, float), } +class CARLAnt(CARLEnv): + def __init__( + self, + env: Ant = Ant(), + n_envs: int = 1, + contexts: Contexts = {}, + hide_context: bool = False, + add_gaussian_noise_to_context: bool = False, + gaussian_noise_std_percentage: float = 0.01, + logger: Optional[TrialLogger] = None, + scale_context_features: str = "no", + default_context: Optional[Context] = DEFAULT_CONTEXT, + state_context_features: Optional[List[str]] = None, + context_mask: Optional[List[str]] = None, + dict_observation_space: bool = False, + context_selector: Optional[ + Union[AbstractSelector, type[AbstractSelector]] + ] = None, + context_selector_kwargs: Optional[Dict] = None, + ): + if n_envs == 1: + env = GymWrapper(env) + else: + env = VectorGymWrapper(VectorWrapper(env, n_envs)) -class CARLAnt(CARLBraxEnv): - env_name: str = "ant" - DEFAULT_CONTEXT: Context = DEFAULT_CONTEXT + self.base_config = MessageToDict( + text_format.Parse(_SYSTEM_CONFIG, brax.Config()) + ) + if not contexts: + contexts = {0: DEFAULT_CONTEXT} + super().__init__( + env=env, + n_envs=n_envs, + contexts=contexts, + hide_context=hide_context, + add_gaussian_noise_to_context=add_gaussian_noise_to_context, + gaussian_noise_std_percentage=gaussian_noise_std_percentage, + logger=logger, + scale_context_features=scale_context_features, + default_context=default_context, + state_context_features=state_context_features, + dict_observation_space=dict_observation_space, + context_selector=context_selector, + context_selector_kwargs=context_selector_kwargs, + context_mask=context_mask, + ) + self.whitelist_gaussian_noise = list( + DEFAULT_CONTEXT.keys() + ) # allow to augment all values def _update_context(self) -> None: self.env: Ant - config = {} - config["gravity"] = jnp.array([0, 0, self.context["gravity"]]) - config["dt"] = jnp.array(self.context["dt"]) - new_mass = self.env._env.sys.link.inertia.mass.at[0].set(self.context["torso_mass"]) - # TODO: do we want to implement this? - #new_com = self.env.sys.link.inertia.transform - #new_inertia = self.env.sys.link.inertia.i - inertia = self.env._env.sys.link.inertia.replace(mass=new_mass) - config["link"] = self.env._env.sys.link.replace(inertia=inertia) - new_stiffness = self.context["stiffness_factor"]*self.env._env.sys.dof.stiffness - new_damping = self.context["damping_factor"]*self.env._env.sys.dof.damping - config["dof"] = self.env._env.sys.dof.replace(stiffness=new_stiffness, damping=new_damping) - new_gear = self.context["actuator_strength_factor"]*self.env._env.sys.actuator.gear - config["actuator"] = self.env._env.sys.actuator.replace(gear=new_gear) - geoms = self.env._env.sys.geoms - geoms[0] = geoms[0].replace(friction=jnp.array([self.context["friction"]])) - config["geoms"] = geoms - self.env._env.sys = self.env._env.sys.replace(**config) - - -# # NOTE: this is not up to date! -# class CARLBraxAnt(CARLBraxEnv): -# def __init__( -# self, -# env: Ant = Ant(), -# contexts: Contexts = {}, -# state_context_features: list[str] | None = None, -# dict_observation: bool = False, -# context_selector: AbstractSelector | type[AbstractSelector] | None = None, -# context_selector_kwargs: dict = None, -# ): -# super().__init__( -# env=env, -# contexts=contexts, -# state_context_features=state_context_features, -# dict_observation=dict_observation, -# context_selector=context_selector, -# context_selector_kwargs=context_selector_kwargs -# ) - -# #self.base_config = MessageToDict( -# # text_format.Parse(_SYSTEM_CONFIG_SPRING, brax.Config()) -# #) + config = copy.deepcopy(self.base_config) + config["gravity"] = {"z": self.context["gravity"]} + config["friction"] = self.context["friction"] + config["angularDamping"] = self.context["angular_damping"] + for j in range(len(config["joints"])): + config["joints"][j]["angularDamping"] = self.context[ + "joint_angular_damping" + ] + config["joints"][j]["stiffness"] = self.context["joint_stiffness"] + for a in range(len(config["actuators"])): + config["actuators"][a]["strength"] = self.context["actuator_strength"] + config["bodies"][0]["mass"] = self.context["torso_mass"] + # This converts the dict to a JSON String, then parses it into an empty brax config + self.env.sys = brax.System( + json_format.Parse(json.dumps(config, cls=NumpyEncoder), brax.Config()) + ) -# def _update_context(self) -> None: -# #self.env: Ant -# config = {}#copy.deepcopy(self.base_config) -# config["gravity"] = jnp.array([0, 0, self.context["gravity"]]) -# #config["friction"] = self.context["friction"] -# config["dt"] = self.context["dt"] -# #for j in range(len(config["joints"])): -# # config["joints"][j]["angularDamping"] = self.context[ -# # "joint_angular_damping" -# # ] -# # config["joints"][j]["stiffness"] = self.context["joint_stiffness"] -# #for a in range(len(config["actuators"])): -# # config["actuators"][a]["strength"] = self.context["actuator_strength"] -# #config["bodies"][0]["mass"] = self.context["torso_mass"] -# # This converts the dict to a JSON String, then parses it into an empty brax config -# #self.env.sys = brax.System( -# # json_format.Parse(json.dumps(config), brax.Config()) -# #) -# self.env.sys = self.env.sys.replace(**config) \ No newline at end of file + def __getattr__(self, name: str) -> Any: + if name in ["sys", "__getstate__"]: + return getattr(self.env._environment, name) + else: + return getattr(self, name) diff --git a/carl/envs/brax/carl_brax_env.py b/carl/envs/brax/carl_brax_env.py deleted file mode 100644 index e266d6c1..00000000 --- a/carl/envs/brax/carl_brax_env.py +++ /dev/null @@ -1,82 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, List, Optional, Union - -from brax.envs import create -from carl.envs.brax.brax_wrappers import GymWrapper, VectorGymWrapper - -from carl.context.selection import AbstractSelector -from carl.envs.carl_env import CARLEnv -from carl.utils.trial_logger import TrialLogger -from carl.utils.types import Context, Contexts - - -class CARLBraxEnv(CARLEnv): - env_name: str - DEFAULT_CONTEXT: Context - - def __init__( - self, - env=None, - n_envs: int = 1, - contexts: Contexts = {}, - hide_context: bool = True, - add_gaussian_noise_to_context: bool = False, - gaussian_noise_std_percentage: float = 0.01, - logger: Optional[TrialLogger] = None, - scale_context_features: str = "no", - default_context: Optional[Context] = None, - state_context_features: Optional[List[str]] = None, - context_mask: Optional[List[str]] = None, - dict_observation_space: bool = False, - context_selector: Optional[ - Union[AbstractSelector, type[AbstractSelector]] - ] = None, - context_selector_kwargs: Optional[Dict] = None, - max_episode_length = 1000, - ): - if env is None: - batch_size = None if n_envs == 1 else n_envs # TODO check if batched env works with concat state - env = create(self.env_name, batch_size=batch_size) - - self.n_envs=n_envs - if n_envs == 1: - env = GymWrapper(env) - else: - env = VectorGymWrapper(env, n_envs) - - if not contexts: - contexts = {0: self.DEFAULT_CONTEXT} - if not default_context: - default_context = self.DEFAULT_CONTEXT - super().__init__( - env=env, - n_envs=n_envs, - contexts=contexts, - hide_context=hide_context, - add_gaussian_noise_to_context=add_gaussian_noise_to_context, - gaussian_noise_std_percentage=gaussian_noise_std_percentage, - logger=logger, - scale_context_features=scale_context_features, - default_context=default_context, - state_context_features=state_context_features, - dict_observation_space=dict_observation_space, - context_selector=context_selector, - context_selector_kwargs=context_selector_kwargs, - context_mask=context_mask, - max_episode_length=max_episode_length, - ) - self.whitelist_gaussian_noise = list( - self.DEFAULT_CONTEXT.keys() - ) # allow to augment all values - - def _update_context(self) -> None: - raise NotImplementedError - - def __getattr__(self, name: str) -> Any: - if name in ["sys", "__getstate__"]: - return getattr(self.env._environment, name) - else: - return getattr(self, name) - - diff --git a/carl/envs/brax/carl_double_pendulum.py b/carl/envs/brax/carl_double_pendulum.py deleted file mode 100644 index 3290307d..00000000 --- a/carl/envs/brax/carl_double_pendulum.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import Any, Dict, List, Optional, Union - -import numpy as np -import jax.numpy as jnp -from brax.envs import create -from brax.envs.inverted_double_pendulum import InvertedDoublePendulum -from carl.envs.brax.brax_wrappers import GymWrapper, VectorGymWrapper - -from carl.context.selection import AbstractSelector -from carl.envs.carl_env import CARLEnv -from carl.utils.trial_logger import TrialLogger -from carl.utils.types import Context, Contexts -from carl.envs.brax.carl_brax_env import CARLBraxEnv - -DEFAULT_CONTEXT = { - "stiffness_factor": 1, - "gravity_x": 1e-5, - "gravity_z": -9.81, - "friction": 0.8, - "damping_factor": 1, - "actuator_strength_factor": 1, - "mass_cart": 10.5, - "mass_pole_0": 4.2, - "mass_pole_1": 4.2, - "dt": 0.01 -} - -CONTEXT_BOUNDS = { - "stiffness_factor": (0, np.inf, float), - "gravity_x": (-np.inf, np.inf, float), - "gravity_z": (-np.inf, -0.1, float), - "friction": (-np.inf, np.inf, float), - "damping_factor": (-np.inf, np.inf, float), - "actuator_strength_factor": (1, np.inf, float), - "mass_cart": (0.1, np.inf, float), - "mass_pole_0": (0.1, np.inf, float), - "mass_pole_1": (0.1, np.inf, float), - "dt": (0.0001, 0.03, float), -} - - - -class CARLInvertedDoublePendulum(CARLBraxEnv): - env_name: str = "inverted_double_pendulum" - DEFAULT_CONTEXT: Context = DEFAULT_CONTEXT - - def _update_context(self) -> None: - self.env: InvertedDoublePendulum - config = {} - config["gravity"] = jnp.array([self.context["gravity_x"], 0, self.context["gravity_z"]]) - config["dt"] = jnp.array(self.context["dt"]) - new_mass = self.env._env.sys.link.inertia.mass.at[0].set(self.context["mass_cart"]) - new_mass = new_mass.at[1].set(self.context["mass_pole_0"]) - new_mass = new_mass.at[2].set(self.context["mass_pole_1"]) - # TODO: do we want to implement this? - #new_com = self.env.sys.link.inertia.transform - #new_inertia = self.env.sys.link.inertia.i - inertia = self.env._env.sys.link.inertia.replace(mass=new_mass) - config["link"] = self.env._env.sys.link.replace(inertia=inertia) - new_stiffness = self.context["stiffness_factor"]*self.env._env.sys.dof.stiffness - new_damping = self.context["damping_factor"]*self.env._env.sys.dof.damping - config["dof"] = self.env._env.sys.dof.replace(stiffness=new_stiffness, damping=new_damping) - new_gear = self.context["actuator_strength_factor"]*self.env._env.sys.actuator.gear - config["actuator"] = self.env._env.sys.actuator.replace(gear=new_gear) - geoms = self.env._env.sys.geoms - geoms[0] = geoms[0].replace(friction=jnp.array([self.context["friction"]])) - config["geoms"] = geoms - self.env._env.sys = self.env._env.sys.replace(**config) diff --git a/carl/envs/brax/carl_fetch.py b/carl/envs/brax/carl_fetch.py new file mode 100644 index 00000000..272e6481 --- /dev/null +++ b/carl/envs/brax/carl_fetch.py @@ -0,0 +1,127 @@ +from typing import Any, Dict, List, Optional, Union + +import copy +import json + +import brax +import numpy as np +from brax.envs.fetch import _SYSTEM_CONFIG, Fetch +from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper +from google.protobuf import json_format, text_format +from google.protobuf.json_format import MessageToDict +from numpyencoder import NumpyEncoder + +from carl.context.selection import AbstractSelector +from carl.envs.carl_env import CARLEnv +from carl.utils.trial_logger import TrialLogger +from carl.utils.types import Context, Contexts + +DEFAULT_CONTEXT = { + "joint_stiffness": 5000, + "gravity": -9.8, + "friction": 0.6, + "angular_damping": -0.05, # Angular velocity damping applied to each body + "actuator_strength": 300, + "joint_angular_damping": 35, # Damps parent and child angular velocities to be equal + "torso_mass": 1, + "target_radius": 2, + "target_distance": 15, +} + +CONTEXT_BOUNDS = { + "joint_stiffness": (1, np.inf, float), + "gravity": (-np.inf, -0.1, float), + "friction": (-np.inf, np.inf, float), + "angular_damping": (-np.inf, np.inf, float), + "actuator_strength": (1, np.inf, float), + "joint_angular_damping": (0, np.inf, float), + "torso_mass": (0.1, np.inf, float), + "target_radius": (0.1, np.inf, float), + "target_distance": (0.1, np.inf, float), +} + + +class CARLFetch(CARLEnv): + def __init__( + self, + env: Fetch = Fetch(), + n_envs: int = 1, + contexts: Contexts = {}, + hide_context: bool = False, + add_gaussian_noise_to_context: bool = False, + gaussian_noise_std_percentage: float = 0.01, + logger: Optional[TrialLogger] = None, + scale_context_features: str = "no", + default_context: Optional[Context] = DEFAULT_CONTEXT, + state_context_features: Optional[List[str]] = None, + context_mask: Optional[List[str]] = None, + dict_observation_space: bool = False, + context_selector: Optional[ + Union[AbstractSelector, type[AbstractSelector]] + ] = None, + context_selector_kwargs: Optional[Dict] = None, + ): + if n_envs == 1: + env = GymWrapper(env) + else: + env = VectorGymWrapper(VectorWrapper(env, n_envs)) + + self.base_config = MessageToDict( + text_format.Parse(_SYSTEM_CONFIG, brax.Config()) + ) + if not contexts: + contexts = {0: DEFAULT_CONTEXT} + super().__init__( + env=env, + n_envs=n_envs, + contexts=contexts, + hide_context=hide_context, + add_gaussian_noise_to_context=add_gaussian_noise_to_context, + gaussian_noise_std_percentage=gaussian_noise_std_percentage, + logger=logger, + scale_context_features=scale_context_features, + default_context=default_context, + state_context_features=state_context_features, + dict_observation_space=dict_observation_space, + context_selector=context_selector, + context_selector_kwargs=context_selector_kwargs, + context_mask=context_mask, + ) + self.whitelist_gaussian_noise = list( + DEFAULT_CONTEXT.keys() + ) # allow to augment all values + + def _update_context(self) -> None: + self.env: Fetch + config = copy.deepcopy(self.base_config) + config["gravity"] = {"z": self.context["gravity"]} + config["friction"] = self.context["friction"] + config["angularDamping"] = self.context["angular_damping"] + for j in range(len(config["joints"])): + config["joints"][j]["angularDamping"] = self.context[ + "joint_angular_damping" + ] + config["joints"][j]["stiffness"] = self.context["joint_stiffness"] + for a in range(len(config["actuators"])): + config["actuators"][a]["strength"] = self.context["actuator_strength"] + config["bodies"][0]["mass"] = self.context["torso_mass"] + # This converts the dict to a JSON String, then parses it into an empty brax config + self.env.sys = brax.System( + json_format.Parse(json.dumps(config, cls=NumpyEncoder), brax.Config()) + ) + self.env.target_idx = self.env.sys.body.index["Target"] + self.env.torso_idx = self.env.sys.body.index["Torso"] + self.env.target_radius = self.context["target_radius"] + self.env.target_distance = self.context["target_distance"] + + def __getattr__(self, name: str) -> Any: + if name in [ + "sys", + "target_distance", + "target_radius", + "target_idx", + "torso_idx", + ]: + return getattr(self.env._environment, name) + else: + return getattr(self, name) diff --git a/carl/envs/brax/carl_grasp.py b/carl/envs/brax/carl_grasp.py new file mode 100644 index 00000000..f7795a42 --- /dev/null +++ b/carl/envs/brax/carl_grasp.py @@ -0,0 +1,132 @@ +from typing import Any, Dict, List, Optional, Union + +import copy +import json + +import brax +import numpy as np +from brax.envs.grasp import _SYSTEM_CONFIG, Grasp +from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper +from google.protobuf import json_format, text_format +from google.protobuf.json_format import MessageToDict +from numpyencoder import NumpyEncoder + +from carl.context.selection import AbstractSelector +from carl.envs.carl_env import CARLEnv +from carl.utils.trial_logger import TrialLogger +from carl.utils.types import Context, Contexts + +DEFAULT_CONTEXT = { + "joint_stiffness": 5000, + "gravity": -9.8, + "friction": 0.6, + "angular_damping": -0.05, + "actuator_strength": 300, + "joint_angular_damping": 50, + "target_radius": 1.1, + "target_distance": 10.0, + "target_height": 8.0, +} + +CONTEXT_BOUNDS = { + "joint_stiffness": (1, np.inf, float), + "gravity": (-np.inf, -0.1, float), + "friction": (-np.inf, np.inf, float), + "angular_damping": (-np.inf, np.inf, float), + "actuator_strength": (1, np.inf, float), + "joint_angular_damping": (0, np.inf, float), + "target_radius": (0.1, np.inf, float), + "target_distance": (0.1, np.inf, float), + "target_height": (0.1, np.inf, float), +} + + +class CARLGrasp(CARLEnv): + def __init__( + self, + env: Grasp = Grasp(), + n_envs: int = 1, + contexts: Contexts = {}, + hide_context: bool = False, + add_gaussian_noise_to_context: bool = False, + gaussian_noise_std_percentage: float = 0.01, + logger: Optional[TrialLogger] = None, + scale_context_features: str = "no", + default_context: Optional[Context] = DEFAULT_CONTEXT, + state_context_features: Optional[List[str]] = None, + context_mask: Optional[List[str]] = None, + dict_observation_space: bool = False, + context_selector: Optional[ + Union[AbstractSelector, type[AbstractSelector]] + ] = None, + context_selector_kwargs: Optional[Dict] = None, + ): + if n_envs == 1: + env = GymWrapper(env) + else: + env = VectorGymWrapper(VectorWrapper(env, n_envs)) + + self.base_config = MessageToDict( + text_format.Parse(_SYSTEM_CONFIG, brax.Config()) + ) + if not contexts: + contexts = {0: DEFAULT_CONTEXT} + super().__init__( + env=env, + n_envs=n_envs, + contexts=contexts, + hide_context=hide_context, + add_gaussian_noise_to_context=add_gaussian_noise_to_context, + gaussian_noise_std_percentage=gaussian_noise_std_percentage, + logger=logger, + scale_context_features=scale_context_features, + default_context=default_context, + state_context_features=state_context_features, + dict_observation_space=dict_observation_space, + context_selector=context_selector, + context_selector_kwargs=context_selector_kwargs, + context_mask=context_mask, + ) + self.whitelist_gaussian_noise = list( + DEFAULT_CONTEXT.keys() + ) # allow to augment all values + + def _update_context(self) -> None: + self.env: Grasp + config = copy.deepcopy(self.base_config) + config["gravity"] = {"z": self.context["gravity"]} + config["friction"] = self.context["friction"] + config["angularDamping"] = self.context["angular_damping"] + for j in range(len(config["joints"])): + config["joints"][j]["angularDamping"] = self.context[ + "joint_angular_damping" + ] + config["joints"][j]["stiffness"] = self.context["joint_stiffness"] + for a in range(len(config["actuators"])): + config["actuators"][a]["strength"] = self.context["actuator_strength"] + # This converts the dict to a JSON String, then parses it into an empty brax config + self.env.sys = brax.System( + json_format.Parse(json.dumps(config, cls=NumpyEncoder), brax.Config()) + ) + self.env.object_idx = self.env.sys.body.index["Object"] + self.env.target_idx = self.env.sys.body.index["Target"] + self.env.hand_idx = self.env.sys.body.index["HandThumbProximal"] + self.env.palm_idx = self.env.sys.body.index["HandPalm"] + self.env.target_radius = self.context["target_radius"] + self.env.target_distance = self.context["target_distance"] + self.env.target_height = self.context["target_height"] + + def __getattr__(self, name: str) -> Any: + if name in [ + "sys", + "object_idx", + "target_idx", + "hand_idx", + "palm_idx", + "target_radius", + "target_distance", + "target_height", + ]: + return getattr(self.env._environment, name) + else: + return getattr(self, name) diff --git a/carl/envs/brax/carl_halfcheetah.py b/carl/envs/brax/carl_halfcheetah.py index 1cc93f8b..aa014088 100644 --- a/carl/envs/brax/carl_halfcheetah.py +++ b/carl/envs/brax/carl_halfcheetah.py @@ -1,61 +1,109 @@ from typing import Any, Dict, List, Optional, Union +import copy +import json + +import brax import numpy as np -import jax.numpy as jnp -from brax.envs.half_cheetah import Halfcheetah -from brax.envs import create -from carl.envs.brax.brax_wrappers import GymWrapper, VectorGymWrapper +from brax.envs.half_cheetah import _SYSTEM_CONFIG, Halfcheetah +from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper +from google.protobuf import json_format, text_format +from google.protobuf.json_format import MessageToDict +from numpyencoder import NumpyEncoder from carl.context.selection import AbstractSelector from carl.envs.carl_env import CARLEnv from carl.utils.trial_logger import TrialLogger from carl.utils.types import Context, Contexts -from carl.envs.brax.carl_brax_env import CARLBraxEnv DEFAULT_CONTEXT = { - "stiffness_factor": 1, - "gravity": -9.81, - "friction": 0.4, - "damping_factor": 1, - "actuator_strength_factor": 1, - "torso_mass": 6.25, - "dt": 0.01 + "joint_stiffness": 15000.0, + "gravity": -9.8, + "friction": 0.6, + "angular_damping": -0.05, + "joint_angular_damping": 20, + "torso_mass": 9.457333, } CONTEXT_BOUNDS = { - "stiffness_factor": (0, np.inf, float), + "joint_stiffness": (1, np.inf, float), "gravity": (-np.inf, -0.1, float), "friction": (-np.inf, np.inf, float), - "damping_factor": (-np.inf, np.inf, float), - "actuator_strength_factor": (1, np.inf, float), + "angular_damping": (-np.inf, np.inf, float), + "joint_angular_damping": (0, np.inf, float), "torso_mass": (0.1, np.inf, float), - "dt": (0.0001, 0.03, float), } +class CARLHalfcheetah(CARLEnv): + def __init__( + self, + env: Halfcheetah = Halfcheetah(), + n_envs: int = 1, + contexts: Contexts = {}, + hide_context: bool = False, + add_gaussian_noise_to_context: bool = False, + gaussian_noise_std_percentage: float = 0.01, + logger: Optional[TrialLogger] = None, + scale_context_features: str = "no", + default_context: Optional[Context] = DEFAULT_CONTEXT, + state_context_features: Optional[List[str]] = None, + context_mask: Optional[List[str]] = None, + dict_observation_space: bool = False, + context_selector: Optional[ + Union[AbstractSelector, type[AbstractSelector]] + ] = None, + context_selector_kwargs: Optional[Dict] = None, + ): + if n_envs == 1: + env = GymWrapper(env) + else: + env = VectorGymWrapper(VectorWrapper(env, n_envs)) -class CARLHalfcheetah(CARLBraxEnv): - env_name: str = "halfcheetah" - DEFAULT_CONTEXT: Context = DEFAULT_CONTEXT + self.base_config = MessageToDict( + text_format.Parse(_SYSTEM_CONFIG, brax.Config()) + ) + if not contexts: + contexts = {0: DEFAULT_CONTEXT} + super().__init__( + env=env, + n_envs=n_envs, + contexts=contexts, + hide_context=hide_context, + add_gaussian_noise_to_context=add_gaussian_noise_to_context, + gaussian_noise_std_percentage=gaussian_noise_std_percentage, + logger=logger, + scale_context_features=scale_context_features, + default_context=default_context, + state_context_features=state_context_features, + dict_observation_space=dict_observation_space, + context_selector=context_selector, + context_selector_kwargs=context_selector_kwargs, + context_mask=context_mask, + ) + self.whitelist_gaussian_noise = list( + DEFAULT_CONTEXT.keys() + ) # allow to augment all values def _update_context(self) -> None: self.env: Halfcheetah - config = {} - config["gravity"] = jnp.array([0, 0, self.context["gravity"]]) - config["dt"] = jnp.array(self.context["dt"]) - new_mass = self.env._env.sys.link.inertia.mass.at[0].set(self.context["torso_mass"]) - # TODO: do we want to implement this? - #new_com = self.env.sys.link.inertia.transform - #new_inertia = self.env.sys.link.inertia.i - inertia = self.env._env.sys.link.inertia.replace(mass=new_mass) - config["link"] = self.env._env.sys.link.replace(inertia=inertia) - new_stiffness = self.context["stiffness_factor"]*self.env._env.sys.dof.stiffness - new_damping = self.context["damping_factor"]*self.env._env.sys.dof.damping - config["dof"] = self.env._env.sys.dof.replace(stiffness=new_stiffness, damping=new_damping) - new_gear = self.context["actuator_strength_factor"]*self.env._env.sys.actuator.gear - config["actuator"] = self.env._env.sys.actuator.replace(gear=new_gear) - geoms = self.env._env.sys.geoms - geoms[0] = geoms[0].replace(friction=jnp.array([self.context["friction"]])) - config["geoms"] = geoms - self.env._env.sys = self.env._env.sys.replace(**config) + config = copy.deepcopy(self.base_config) + config["gravity"] = {"z": self.context["gravity"]} + config["friction"] = self.context["friction"] + config["angularDamping"] = self.context["angular_damping"] + for j in range(len(config["joints"])): + config["joints"][j]["angularDamping"] = self.context[ + "joint_angular_damping" + ] + config["joints"][j]["stiffness"] = self.context["joint_stiffness"] + config["bodies"][0]["mass"] = self.context["torso_mass"] + # This converts the dict to a JSON String, then parses it into an empty brax config + self.env.sys = brax.System( + json_format.Parse(json.dumps(config, cls=NumpyEncoder), brax.Config()) + ) + def __getattr__(self, name: str) -> Any: + if name in ["sys"]: + return getattr(self.env._environment, name) + else: + return getattr(self, name) diff --git a/carl/envs/brax/carl_hopper.py b/carl/envs/brax/carl_hopper.py deleted file mode 100644 index 4fb5eaa3..00000000 --- a/carl/envs/brax/carl_hopper.py +++ /dev/null @@ -1,60 +0,0 @@ -from typing import Any, Dict, List, Optional, Union - -import numpy as np -import jax.numpy as jnp -from brax.envs.hopper import Hopper -from brax.envs import create -from carl.envs.brax.brax_wrappers import GymWrapper, VectorGymWrapper - -from carl.context.selection import AbstractSelector -from carl.envs.carl_env import CARLEnv -from carl.utils.trial_logger import TrialLogger -from carl.utils.types import Context, Contexts -from carl.envs.brax.carl_brax_env import CARLBraxEnv - -DEFAULT_CONTEXT = { - "stiffness_factor": 1, - "gravity": -9.81, - "friction": 1, - "damping_factor": 1, - "actuator_strength_factor": 1, - "torso_mass": 3.67, - "dt": 0.002 -} - -CONTEXT_BOUNDS = { - "stiffness_factor": (0, np.inf, float), - "gravity": (-np.inf, -0.1, float), - "friction": (-np.inf, np.inf, float), - "damping_factor": (-np.inf, np.inf, float), - "actuator_strength_factor": (1, np.inf, float), - "torso_mass": (0.1, np.inf, float), - "dt": (0.0001, 0.03, float), -} - - - -class CARLHopper(CARLBraxEnv): - env_name: str = "hopper" - DEFAULT_CONTEXT: Context = DEFAULT_CONTEXT - - def _update_context(self) -> None: - self.env: Hopper - config = {} - config["gravity"] = jnp.array([0, 0, self.context["gravity"]]) - config["dt"] = jnp.array(self.context["dt"]) - new_mass = self.env._env.sys.link.inertia.mass.at[0].set(self.context["torso_mass"]) - # TODO: do we wHopper to implement this? - #new_com = self.env.sys.link.inertia.transform - #new_inertia = self.env.sys.link.inertia.i - inertia = self.env._env.sys.link.inertia.replace(mass=new_mass) - config["link"] = self.env._env.sys.link.replace(inertia=inertia) - new_stiffness = self.context["stiffness_factor"]*self.env._env.sys.dof.stiffness - new_damping = self.context["damping_factor"]*self.env._env.sys.dof.damping - config["dof"] = self.env._env.sys.dof.replace(stiffness=new_stiffness, damping=new_damping) - new_gear = self.context["actuator_strength_factor"]*self.env._env.sys.actuator.gear - config["actuator"] = self.env._env.sys.actuator.replace(gear=new_gear) - geoms = self.env._env.sys.geoms - geoms[0] = geoms[0].replace(friction=jnp.array([self.context["friction"]])) - config["geoms"] = geoms - self.env._env.sys = self.env._env.sys.replace(**config) diff --git a/carl/envs/brax/carl_humanoid.py b/carl/envs/brax/carl_humanoid.py index 93fc358b..873473ca 100644 --- a/carl/envs/brax/carl_humanoid.py +++ b/carl/envs/brax/carl_humanoid.py @@ -1,59 +1,113 @@ from typing import Any, Dict, List, Optional, Union +import copy +import json + +import brax import numpy as np -import jax.numpy as jnp -from brax.envs.humanoid import Humanoid -from brax.envs import create -from carl.envs.brax.brax_wrappers import GymWrapper, VectorGymWrapper +from brax import jumpy as jp +from brax.envs.humanoid import _SYSTEM_CONFIG, Humanoid +from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper +from brax.physics import bodies +from google.protobuf import json_format, text_format +from google.protobuf.json_format import MessageToDict +from numpyencoder import NumpyEncoder from carl.context.selection import AbstractSelector from carl.envs.carl_env import CARLEnv from carl.utils.trial_logger import TrialLogger from carl.utils.types import Context, Contexts -from carl.envs.brax.carl_brax_env import CARLBraxEnv DEFAULT_CONTEXT = { - "stiffness_factor": 1, - "gravity": -9.81, - "friction": 1.0, - "damping_factor": 1, - "actuator_strength_factor": 1, - "torso_mass": 8.9, - "dt": 0.003 + "gravity": -9.8, + "friction": 0.6, + "angular_damping": -0.05, + "joint_angular_damping": 20, + "torso_mass": 8.907463, } CONTEXT_BOUNDS = { - "stiffness_factor": (0, np.inf, float), "gravity": (-np.inf, -0.1, float), "friction": (-np.inf, np.inf, float), - "damping_factor": (-np.inf, np.inf, float), - "actuator_strength_factor": (1, np.inf, float), + "angular_damping": (-np.inf, np.inf, float), + "joint_angular_damping": (0, np.inf, float), "torso_mass": (0.1, np.inf, float), - "dt": (0.0001, 0.03, float), } -class CARLHumanoid(CARLBraxEnv): - env_name: str = "humanoid" - DEFAULT_CONTEXT: Context = DEFAULT_CONTEXT +class CARLHumanoid(CARLEnv): + def __init__( + self, + env: Humanoid = Humanoid(), + n_envs: int = 1, + contexts: Contexts = {}, + hide_context: bool = False, + add_gaussian_noise_to_context: bool = False, + gaussian_noise_std_percentage: float = 0.01, + logger: Optional[TrialLogger] = None, + scale_context_features: str = "no", + default_context: Optional[Context] = DEFAULT_CONTEXT, + state_context_features: Optional[List[str]] = None, + context_mask: Optional[List[str]] = None, + dict_observation_space: bool = False, + context_selector: Optional[ + Union[AbstractSelector, type[AbstractSelector]] + ] = None, + context_selector_kwargs: Optional[Dict] = None, + ): + if n_envs == 1: + env = GymWrapper(env) + else: + env = VectorGymWrapper(VectorWrapper(env, n_envs)) + + self.base_config = MessageToDict( + text_format.Parse(_SYSTEM_CONFIG, brax.Config()) + ) + if not contexts: + contexts = {0: DEFAULT_CONTEXT} + super().__init__( + env=env, + n_envs=n_envs, + contexts=contexts, + hide_context=hide_context, + add_gaussian_noise_to_context=add_gaussian_noise_to_context, + gaussian_noise_std_percentage=gaussian_noise_std_percentage, + logger=logger, + scale_context_features=scale_context_features, + default_context=default_context, + state_context_features=state_context_features, + dict_observation_space=dict_observation_space, + context_selector=context_selector, + context_selector_kwargs=context_selector_kwargs, + context_mask=context_mask, + ) + self.whitelist_gaussian_noise = list( + DEFAULT_CONTEXT.keys() + ) # allow to augment all values def _update_context(self) -> None: self.env: Humanoid - config = {} - config["gravity"] = jnp.array([0, 0, self.context["gravity"]]) - config["dt"] = jnp.array(self.context["dt"]) - new_mass = self.env._env.sys.link.inertia.mass.at[0].set(self.context["torso_mass"]) - # TODO: do we want to implement this? - #new_com = self.env.sys.link.inertia.transform - #new_inertia = self.env.sys.link.inertia.i - inertia = self.env._env.sys.link.inertia.replace(mass=new_mass) - config["link"] = self.env._env.sys.link.replace(inertia=inertia) - new_stiffness = self.context["stiffness_factor"]*self.env._env.sys.dof.stiffness - new_damping = self.context["damping_factor"]*self.env._env.sys.dof.damping - config["dof"] = self.env._env.sys.dof.replace(stiffness=new_stiffness, damping=new_damping) - new_gear = self.context["actuator_strength_factor"]*self.env._env.sys.actuator.gear - config["actuator"] = self.env._env.sys.actuator.replace(gear=new_gear) - geoms = self.env._env.sys.geoms - geoms[0] = geoms[0].replace(friction=jnp.array([self.context["friction"]])) - config["geoms"] = geoms - self.env._env.sys = self.env._env.sys.replace(**config) + config = copy.deepcopy(self.base_config) + config["gravity"] = {"z": self.context["gravity"]} + config["friction"] = self.context["friction"] + config["angularDamping"] = self.context["angular_damping"] + for j in range(len(config["joints"])): + config["joints"][j]["angularDamping"] = self.context[ + "joint_angular_damping" + ] + config["bodies"][0]["mass"] = self.context["torso_mass"] + # This converts the dict to a JSON String, then parses it into an empty brax config + protobuf_config = json_format.Parse( + json.dumps(config, cls=NumpyEncoder), brax.Config() + ) + self.env.sys = brax.System(protobuf_config) + body = bodies.Body(config=self.env.sys.config) + body = jp.take(body, body.idx[:-1]) # skip the floor body + self.env.mass = body.mass.reshape(-1, 1) + self.env.inertia = body.inertia + + def __getattr__(self, name: str) -> Any: + if name in ["sys", "body", "mass", "inertia"]: + return getattr(self.env._environment, name) + else: + return getattr(self, name) diff --git a/carl/envs/brax/carl_pusher.py b/carl/envs/brax/carl_pusher.py deleted file mode 100644 index a9cd2a34..00000000 --- a/carl/envs/brax/carl_pusher.py +++ /dev/null @@ -1,52 +0,0 @@ -from typing import Any, Dict, List, Optional, Union - -import numpy as np -import jax.numpy as jnp -from brax.envs.pusher import Pusher -from brax.envs import create -from carl.envs.brax.brax_wrappers import GymWrapper, VectorGymWrapper - -from carl.context.selection import AbstractSelector -from carl.envs.carl_env import CARLEnv -from carl.utils.trial_logger import TrialLogger -from carl.utils.types import Context, Contexts -from carl.envs.brax.carl_brax_env import CARLBraxEnv - -DEFAULT_CONTEXT = { - "stiffness_factor": 1, - "gravity": -9.81, - "friction": 0.8, - "damping_factor": 1, - "actuator_strength_factor": 1, - "dt": 0.01 -} - -CONTEXT_BOUNDS = { - "stiffness_factor": (0, np.inf, float), - "gravity": (-np.inf, -0.1, float), - "friction": (-np.inf, np.inf, float), - "damping_factor": (-np.inf, np.inf, float), - "actuator_strength_factor": (1, np.inf, float), - "dt": (0.0001, 0.03, float), -} - - - -class CARLPusher(CARLBraxEnv): - env_name: str = "pusher" - DEFAULT_CONTEXT: Context = DEFAULT_CONTEXT - - def _update_context(self) -> None: - self.env: Pusher - config = {} - config["gravity"] = jnp.array([0, 0, self.context["gravity"]]) - config["dt"] = jnp.array(self.context["dt"]) - new_stiffness = self.context["stiffness_factor"]*self.env._env.sys.dof.stiffness - new_damping = self.context["damping_factor"]*self.env._env.sys.dof.damping - config["dof"] = self.env._env.sys.dof.replace(stiffness=new_stiffness, damping=new_damping) - new_gear = self.context["actuator_strength_factor"]*self.env._env.sys.actuator.gear - config["actuator"] = self.env._env.sys.actuator.replace(gear=new_gear) - geoms = self.env._env.sys.geoms - geoms[0] = geoms[0].replace(friction=jnp.array([self.context["friction"]])) - config["geoms"] = geoms - self.env._env.sys = self.env._env.sys.replace(**config) diff --git a/carl/envs/brax/carl_reacher.py b/carl/envs/brax/carl_reacher.py deleted file mode 100644 index a4820b97..00000000 --- a/carl/envs/brax/carl_reacher.py +++ /dev/null @@ -1,63 +0,0 @@ -from typing import Any, Dict, List, Optional, Union - -import numpy as np -import jax.numpy as jnp -from brax.envs.reacher import Reacher -from brax.envs import create -from carl.envs.brax.brax_wrappers import GymWrapper, VectorGymWrapper - -from carl.context.selection import AbstractSelector -from carl.envs.carl_env import CARLEnv -from carl.utils.trial_logger import TrialLogger -from carl.utils.types import Context, Contexts -from carl.envs.brax.carl_brax_env import CARLBraxEnv - -DEFAULT_CONTEXT = { - "stiffness_factor": 1, - "gravity": -9.81, - "friction": 1, - "damping_factor": 1, - "actuator_strength_factor": 1, - "body_mass_0": 0.036, - "body_mass_1": 0.04, - "dt": 0.01 -} - -CONTEXT_BOUNDS = { - "stiffness_factor": (0, np.inf, float), - "gravity": (-np.inf, -0.1, float), - "friction": (-np.inf, np.inf, float), - "damping_factor": (-np.inf, np.inf, float), - "actuator_strength_factor": (1, np.inf, float), - "body_mass_0": (0.1, np.inf, float), - "body_mass_1": (0.1, np.inf, float), - "dt": (0.0001, 0.03, float), -} - - - -class CARLReacher(CARLBraxEnv): - env_name: str = "reacher" - DEFAULT_CONTEXT: Context = DEFAULT_CONTEXT - - def _update_context(self) -> None: - self.env: Reacher - config = {} - config["gravity"] = jnp.array([0, 0, self.context["gravity"]]) - config["dt"] = jnp.array(self.context["dt"]) - new_mass = self.env._env.sys.link.inertia.mass.at[0].set(self.context["body_mass_0"]) - new_mass = new_mass.at[1].set(self.context["body_mass_1"]) - # TODO: do we wReacher to implement this? - #new_com = self.env.sys.link.inertia.transform - #new_inertia = self.env.sys.link.inertia.i - inertia = self.env._env.sys.link.inertia.replace(mass=new_mass) - config["link"] = self.env._env.sys.link.replace(inertia=inertia) - new_stiffness = self.context["stiffness_factor"]*self.env._env.sys.dof.stiffness - new_damping = self.context["damping_factor"]*self.env._env.sys.dof.damping - config["dof"] = self.env._env.sys.dof.replace(stiffness=new_stiffness, damping=new_damping) - new_gear = self.context["actuator_strength_factor"]*self.env._env.sys.actuator.gear - config["actuator"] = self.env._env.sys.actuator.replace(gear=new_gear) - geoms = self.env._env.sys.geoms - geoms[0] = geoms[0].replace(friction=jnp.array([self.context["friction"]])) - config["geoms"] = geoms - self.env._env.sys = self.env._env.sys.replace(**config) diff --git a/carl/envs/brax/carl_ur5e.py b/carl/envs/brax/carl_ur5e.py new file mode 100644 index 00000000..02ebd518 --- /dev/null +++ b/carl/envs/brax/carl_ur5e.py @@ -0,0 +1,127 @@ +from typing import Any, Dict, List, Optional, Union + +import copy +import json + +import brax +import numpy as np +from brax.envs.ur5e import _SYSTEM_CONFIG, Ur5e +from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper +from google.protobuf import json_format, text_format +from google.protobuf.json_format import MessageToDict +from numpyencoder import NumpyEncoder + +from carl.context.selection import AbstractSelector +from carl.envs.carl_env import CARLEnv +from carl.utils.trial_logger import TrialLogger +from carl.utils.types import Context, Contexts + +DEFAULT_CONTEXT = { + "joint_stiffness": 40000, + "gravity": -9.81, + "friction": 0.6, + "angular_damping": -0.05, + "actuator_strength": 100, + "joint_angular_damping": 50, + "target_radius": 0.02, + "target_distance": 0.5, + "torso_mass": 1.0, +} + +CONTEXT_BOUNDS = { + "joint_stiffness": (1, np.inf, float), + "gravity": (-np.inf, -0.1, float), + "friction": (-np.inf, np.inf, float), + "angular_damping": (-np.inf, np.inf, float), + "actuator_strength": (1, np.inf, float), + "joint_angular_damping": (0, 360, float), + "target_radius": (0.01, np.inf, float), + "target_distance": (0.01, np.inf, float), + "torso_mass": (0, np.inf, float), +} + + +class CARLUr5e(CARLEnv): + def __init__( + self, + env: Ur5e = Ur5e(), + n_envs: int = 1, + contexts: Contexts = {}, + hide_context: bool = False, + add_gaussian_noise_to_context: bool = False, + gaussian_noise_std_percentage: float = 0.01, + logger: Optional[TrialLogger] = None, + scale_context_features: str = "no", + default_context: Optional[Context] = DEFAULT_CONTEXT, + state_context_features: Optional[List[str]] = None, + context_mask: Optional[List[str]] = None, + dict_observation_space: bool = False, + context_selector: Optional[ + Union[AbstractSelector, type[AbstractSelector]] + ] = None, + context_selector_kwargs: Optional[Dict] = None, + ): + if n_envs == 1: + env = GymWrapper(env) + else: + env = VectorGymWrapper(VectorWrapper(env, n_envs)) + + self.base_config = MessageToDict( + text_format.Parse(_SYSTEM_CONFIG, brax.Config()) + ) + if not contexts: + contexts = {0: DEFAULT_CONTEXT} + super().__init__( + env=env, + n_envs=n_envs, + contexts=contexts, + hide_context=hide_context, + add_gaussian_noise_to_context=add_gaussian_noise_to_context, + gaussian_noise_std_percentage=gaussian_noise_std_percentage, + logger=logger, + scale_context_features=scale_context_features, + default_context=default_context, + state_context_features=state_context_features, + dict_observation_space=dict_observation_space, + context_selector=context_selector, + context_selector_kwargs=context_selector_kwargs, + context_mask=context_mask, + ) + self.whitelist_gaussian_noise = list( + DEFAULT_CONTEXT.keys() + ) # allow to augment all values + + def _update_context(self) -> None: + self.env: Ur5e + config = copy.deepcopy(self.base_config) + config["gravity"] = {"z": self.context["gravity"]} + config["friction"] = self.context["friction"] + config["angularDamping"] = self.context["angular_damping"] + for j in range(len(config["joints"])): + config["joints"][j]["angularDamping"] = self.context[ + "joint_angular_damping" + ] + config["joints"][j]["stiffness"] = self.context["joint_stiffness"] + for a in range(len(config["actuators"])): + config["actuators"][a]["strength"] = self.context["actuator_strength"] + config["bodies"][0]["mass"] = self.context["torso_mass"] + # This converts the dict to a JSON String, then parses it into an empty brax config + self.env.sys = brax.System( + json_format.Parse(json.dumps(config, cls=NumpyEncoder), brax.Config()) + ) + self.env.target_idx = self.env.sys.body.index["Target"] + self.env.torso_idx = self.env.sys.body.index["wrist_3_link"] + self.env.target_radius = self.context["target_radius"] + self.env.target_distance = self.context["target_distance"] + + def __getattr__(self, name: str) -> Any: + if name in [ + "sys", + "target_idx", + "torso_idx", + "target_radius", + "target_distance", + ]: + return getattr(self.env._environment, name) + else: + return getattr(self, name) diff --git a/carl/envs/carl_env.py b/carl/envs/carl_env.py index 2393143c..e0f094c1 100644 --- a/carl/envs/carl_env.py +++ b/carl/envs/carl_env.py @@ -43,7 +43,7 @@ class CARLEnv(Wrapper): contexts: Contexts Dict of contexts/instances. Key are context id, values are contexts as Dict[context feature id, context feature value]. - hide_context: bool = True + hide_context: bool = False If False, the context will be appended to the original environment's state. add_gaussian_noise_to_context: bool = False Wether to add Gaussian noise to the context with the relative standard deviation @@ -274,10 +274,7 @@ def reset(self, **kwargs: Dict) -> Union[ObsType, tuple[ObsType, dict]]: # type """ self.episode_counter += 1 self.step_counter = 0 - if "context_id" in kwargs.keys(): - self.context = self.contexts[kwargs["context_id"]] - else: - self._progress_instance() + self._progress_instance() self._update_context() self._log_context() return_info = kwargs.get("return_info", False) @@ -285,7 +282,6 @@ def reset(self, **kwargs: Dict) -> Union[ObsType, tuple[ObsType, dict]]: # type info_dict = dict() if return_info: state, info_dict = _ret - info_dict["context_key"] = self.context_key else: state = _ret state = self.build_context_adaptive_state(state=state) @@ -299,7 +295,7 @@ def build_context_adaptive_state( ) -> Union[Vector, Dict]: tnp: ModuleType = np if brax_spec is not None: - if type(state) == jaxlib.xla_extension.ArrayImpl: + if type(state) == jaxlib.xla_extension.DeviceArray: tnp = jnp if not self.hide_context: if context_feature_values is None: @@ -349,12 +345,7 @@ def step(self, action: Any) -> Tuple[Any, Any, bool, Dict]: """ # Step the environment - step_output = self.env.step(action) - if len(step_output) == 5: - state, reward, terminated, truncated, info = step_output - done = terminated or truncated - else: - state, reward, done, info = step_output + state, reward, done, info = self.env.step(action) if not self.hide_context: # Scale context features @@ -379,7 +370,6 @@ def step(self, action: Any) -> Tuple[Any, Any, bool, Dict]: self.step_counter += 1 if self.step_counter >= self.cutoff: done = True - info["context_key"] = self.context_key return state, reward, done, info def __getattr__(self, name: str) -> Any: @@ -498,12 +488,12 @@ def build_observation_space( else self.env.observation_space.low # type: ignore [attr-defined] ) obs_shape = obs_space.shape - if len(obs_shape) == 3 or self.hide_context: + if len(obs_shape) == 3 and self.hide_context: # do not touch pixel state pass else: if env_lower_bounds is None and env_upper_bounds is None: - obs_dim = obs_shape[0] if len(obs_shape) == 1 else obs_shape + obs_dim = obs_shape[0] env_lower_bounds = -np.inf * np.ones(obs_dim) env_upper_bounds = np.inf * np.ones(obs_dim) diff --git a/carl/envs/classic_control/__init__.py b/carl/envs/classic_control/__init__.py index 1c34fe42..1acb7d10 100644 --- a/carl/envs/classic_control/__init__.py +++ b/carl/envs/classic_control/__init__.py @@ -7,15 +7,13 @@ DEFAULT_CONTEXT as CARLAcrobotEnv_defaults, ) from carl.envs.classic_control.carl_acrobot import CARLAcrobotEnv - from carl.envs.classic_control.carl_cartpole import ( - DEFAULT_CONTEXT as CARLCartPoleEnv_defaults, + CONTEXT_BOUNDS as CARLCartPoleEnv_bounds, ) from carl.envs.classic_control.carl_cartpole import ( - CONTEXT_BOUNDS as CARLCartPoleEnv_bounds, + DEFAULT_CONTEXT as CARLCartPoleEnv_defaults, ) from carl.envs.classic_control.carl_cartpole import CARLCartPoleEnv - from carl.envs.classic_control.carl_mountaincar import ( CONTEXT_BOUNDS as CARLMountainCarEnv_bounds, ) @@ -23,7 +21,6 @@ DEFAULT_CONTEXT as CARLMountainCarEnv_defaults, ) from carl.envs.classic_control.carl_mountaincar import CARLMountainCarEnv - from carl.envs.classic_control.carl_mountaincarcontinuous import ( CONTEXT_BOUNDS as CARLMountainCarContinuousEnv_bounds, ) @@ -33,7 +30,6 @@ from carl.envs.classic_control.carl_mountaincarcontinuous import ( CARLMountainCarContinuousEnv, ) - from carl.envs.classic_control.carl_pendulum import ( CONTEXT_BOUNDS as CARLPendulumEnv_bounds, ) diff --git a/carl/envs/dmc/README.md b/carl/envs/dmc/README.md deleted file mode 100644 index 1ab21757..00000000 --- a/carl/envs/dmc/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Headless Rendering -If you have problems with OpenGL, this helped: -Set this in your script -```python -os.environ['DISABLE_MUJOCO_RENDERING'] = '1' -os.environ['MUJOCO_GL'] = 'osmesa' -os.environ['PYOPENGL_PLATFORM'] = 'osmesa' -``` - -And set ErrorChecker to None in `OpenGL/raw/GL/_errors.py`. \ No newline at end of file diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py index 2b2bef72..03225e66 100644 --- a/carl/envs/dmc/__init__.py +++ b/carl/envs/dmc/__init__.py @@ -14,7 +14,6 @@ DEFAULT_CONTEXT as CARLDmcQuadrupedEnv_defaults, ) from carl.envs.dmc.carl_dm_quadruped import CARLDmcQuadrupedEnv -from carl.envs.dmc.carl_dm_walker import CONTEXT_BOUNDS as CARLDmcWalkerEnv_bounds from carl.envs.dmc.carl_dm_walker import CONTEXT_MASK as CARLDmcWalkerEnv_mask from carl.envs.dmc.carl_dm_walker import DEFAULT_CONTEXT as CARLDmcWalkerEnv_defaults from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv diff --git a/carl/envs/dmc/carl_dm_finger.py b/carl/envs/dmc/carl_dm_finger.py index a30ae3a4..ed8469a5 100644 --- a/carl/envs/dmc/carl_dm_finger.py +++ b/carl/envs/dmc/carl_dm_finger.py @@ -9,7 +9,7 @@ from carl.utils.types import Context, Contexts DEFAULT_CONTEXT = { - "gravity": 9.81, # Gravity is disabled via flag + "gravity": -9.81, # Gravity is disabled via flag "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects) "friction_torsional": 1, # Scaling factor for torsional friction of all geoms (objects) "friction_rolling": 1, # Scaling factor for rolling friction of all geoms (objects) @@ -30,7 +30,7 @@ } CONTEXT_BOUNDS = { - "gravity": (0.1, np.inf, float), + "gravity": (-np.inf, -0.1, float), "friction_tangential": (0, np.inf, float), "friction_torsional": (0, np.inf, float), "friction_rolling": (0, np.inf, float), diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py index 973fab16..b7886baa 100644 --- a/carl/envs/dmc/carl_dm_fish.py +++ b/carl/envs/dmc/carl_dm_fish.py @@ -9,7 +9,7 @@ from carl.utils.types import Context, Contexts DEFAULT_CONTEXT = { - "gravity": 9.81, # Gravity is disabled via flag + "gravity": -9.81, # Gravity is disabled via flag "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects) "friction_torsional": 1, # Scaling factor for torsional friction of all geoms (objects) "friction_rolling": 1, # Scaling factor for rolling friction of all geoms (objects) @@ -26,7 +26,7 @@ } CONTEXT_BOUNDS = { - "gravity": (0.1, np.inf, float), + "gravity": (-np.inf, -0.1, float), "friction_tangential": (0, np.inf, float), "friction_torsional": (0, np.inf, float), "friction_rolling": (0, np.inf, float), diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py index 7949b356..29554d98 100644 --- a/carl/envs/dmc/carl_dm_quadruped.py +++ b/carl/envs/dmc/carl_dm_quadruped.py @@ -9,7 +9,7 @@ from carl.utils.types import Context, Contexts DEFAULT_CONTEXT = { - "gravity": 9.81, + "gravity": -9.81, "friction_tangential": 1.0, # Scaling factor for tangential friction of all geoms (objects) "friction_torsional": 1.0, # Scaling factor for torsional friction of all geoms (objects) "friction_rolling": 1.0, # Scaling factor for rolling friction of all geoms (objects) @@ -26,7 +26,7 @@ } CONTEXT_BOUNDS = { - "gravity": (0.1, np.inf, float), + "gravity": (-np.inf, -0.1, float), "friction_tangential": (0, np.inf, float), "friction_torsional": (0, np.inf, float), "friction_rolling": (0, np.inf, float), diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py index 6b9d0662..524aa2a3 100644 --- a/carl/envs/dmc/carl_dm_walker.py +++ b/carl/envs/dmc/carl_dm_walker.py @@ -9,7 +9,7 @@ from carl.utils.types import Context, Contexts DEFAULT_CONTEXT = { - "gravity": 9.81, + "gravity": -9.81, "friction_tangential": 1.0, # Scaling factor for tangential friction of all geoms (objects) "friction_torsional": 1.0, # Scaling factor for torsional friction of all geoms (objects) "friction_rolling": 1.0, # Scaling factor for rolling friction of all geoms (objects) @@ -26,7 +26,7 @@ } CONTEXT_BOUNDS = { - "gravity": (0.1, np.inf, float), + "gravity": (-np.inf, -0.1, float), "friction_tangential": (0, np.inf, float), "friction_torsional": (0, np.inf, float), "friction_rolling": (0, np.inf, float), diff --git a/carl/envs/dmc/dmc_tasks/finger.py b/carl/envs/dmc/dmc_tasks/finger.py index 2c02cbb5..9bad7621 100644 --- a/carl/envs/dmc/dmc_tasks/finger.py +++ b/carl/envs/dmc/dmc_tasks/finger.py @@ -45,34 +45,25 @@ def check_constraints( limb_length_1: float, x_spinner: float = 0.2, x_finger: float = -0.2, - raise_error: bool = False, - **kwargs: Any -) -> bool: - is_okay = True +) -> None: spinner_half_length = spinner_length / 2 # Check if spinner collides with finger hinge distance_spinner_to_fingerhinge = (x_spinner - x_finger) - spinner_half_length if distance_spinner_to_fingerhinge < 0: - is_okay = False - if raise_error: - raise ValueError( - f"Distance finger to spinner ({distance_spinner_to_fingerhinge}) not big enough, " - f"spinner can't spin. Decrease spinner_length ({spinner_length})." - ) + raise ValueError( + f"Distance finger to spinner ({distance_spinner_to_fingerhinge}) not big enough, " + f"spinner can't spin. Decrease spinner_length ({spinner_length})." + ) # Check if finger can reach spinner (distance should be negative) distance_fingertip_to_spinner = (x_spinner - spinner_half_length) - ( x_finger + limb_length_0 + limb_length_1 ) if distance_fingertip_to_spinner > 0: - is_okay = False - if raise_error: - raise ValueError( - f"Finger cannot reach spinner ({distance_fingertip_to_spinner}). Increase either " - f"limb_length_0, limb_length_1 or spinner_length." - ) - - return is_okay + raise ValueError( + f"Finger cannot reach spinner ({distance_fingertip_to_spinner}). Increase either " + f"limb_length_0, limb_length_1 or spinner_length." + ) def get_finger_xml_string( @@ -107,7 +98,6 @@ def get_finger_xml_string( x_spinner=x_spinner, x_finger=x_finger, spinner_length=spinner_length, - raise_error=True ) proximal_to = -limb_length_0 diff --git a/carl/envs/dmc/dmc_tasks/utils.py b/carl/envs/dmc/dmc_tasks/utils.py index f5199955..ab449618 100644 --- a/carl/envs/dmc/dmc_tasks/utils.py +++ b/carl/envs/dmc/dmc_tasks/utils.py @@ -162,9 +162,9 @@ def check_okay_to_set(context_feature: str | list[str]) -> bool: gravity = option.get("gravity") if gravity is not None: g = gravity.split(" ") - gravity = " ".join([g[0], g[1], str(-context["gravity"])]) + gravity = " ".join([g[0], g[1], str(context["gravity"])]) else: - gravity = " ".join(["0", "0", str(-context["gravity"])]) + gravity = " ".join(["0", "0", str(context["gravity"])]) option.set("gravity", gravity) if check_okay_to_set("wind"): diff --git a/carl/envs/mario/__init__.py b/carl/envs/mario/__init__.py new file mode 100644 index 00000000..c59871f8 --- /dev/null +++ b/carl/envs/mario/__init__.py @@ -0,0 +1,12 @@ +# flake8: noqa: F401 +import warnings + +try: + from carl.envs.mario.carl_mario import CARLMarioEnv +except Exception as e: + warnings.warn(f"Could not load CARLMarioEnv which is probably not installed ({e}).") + +from carl.envs.mario.carl_mario_definitions import CONTEXT_BOUNDS as CARLMarioEnv_bounds +from carl.envs.mario.carl_mario_definitions import ( + DEFAULT_CONTEXT as CARLMarioEnv_defaults, +) diff --git a/carl/envs/mario/carl_mario.py b/carl/envs/mario/carl_mario.py new file mode 100644 index 00000000..1e8e9edf --- /dev/null +++ b/carl/envs/mario/carl_mario.py @@ -0,0 +1,77 @@ +from typing import Dict, List, Optional, Union + +import gym + +from carl.context.selection import AbstractSelector +from carl.envs.carl_env import CARLEnv +from carl.envs.mario.carl_mario_definitions import ( + DEFAULT_CONTEXT, + INITIAL_HEIGHT, + INITIAL_WIDTH, +) +from carl.envs.mario.mario_env import MarioEnv +from carl.envs.mario.toad_gan import generate_level +from carl.utils.trial_logger import TrialLogger +from carl.utils.types import Context, Contexts + + +class CARLMarioEnv(CARLEnv): + def __init__( + self, + env: gym.Env = MarioEnv(levels=[]), + contexts: Contexts = {}, + hide_context: bool = True, + add_gaussian_noise_to_context: bool = False, + gaussian_noise_std_percentage: float = 0.05, + logger: Optional[TrialLogger] = None, + scale_context_features: str = "no", + default_context: Optional[Context] = DEFAULT_CONTEXT, + state_context_features: Optional[List[str]] = None, + context_mask: Optional[List[str]] = None, + dict_observation_space: bool = False, + context_selector: Optional[ + Union[AbstractSelector, type[AbstractSelector]] + ] = None, + context_selector_kwargs: Optional[Dict] = None, + ): + if not contexts: + contexts = {0: DEFAULT_CONTEXT} + super().__init__( + env=env, + contexts=contexts, + hide_context=True, + add_gaussian_noise_to_context=add_gaussian_noise_to_context, + gaussian_noise_std_percentage=gaussian_noise_std_percentage, + logger=logger, + scale_context_features="no", + default_context=default_context, + dict_observation_space=dict_observation_space, + context_selector=context_selector, + context_selector_kwargs=context_selector_kwargs, + context_mask=context_mask, + ) + self.levels: List[str] = [] + self._update_context() + + def _update_context(self) -> None: + self.env: MarioEnv + if not self.levels: + for context in self.contexts.values(): + level = generate_level( + width=INITIAL_WIDTH, + height=INITIAL_HEIGHT, + level_index=context["level_index"], + initial_noise=context["noise"], + filter_unplayable=True, + ) + self.levels.append(level) + self.env.mario_state = self.context["mario_state"] + self.env.mario_inertia = self.context["mario_inertia"] + self.env.levels = [self.levels[self.context_index]] + + def _log_context(self) -> None: + if self.logger: + loggable_context = {k: v for k, v in self.context.items() if k != "noise"} + self.logger.write_context( + self.episode_counter, self.total_timestep_counter, loggable_context + ) diff --git a/carl/envs/mario/carl_mario_definitions.py b/carl/envs/mario/carl_mario_definitions.py new file mode 100644 index 00000000..8cdabafe --- /dev/null +++ b/carl/envs/mario/carl_mario_definitions.py @@ -0,0 +1,27 @@ +import numpy as np +from torch import Tensor + +try: + from carl.envs.mario.toad_gan import generate_initial_noise +except FileNotFoundError: + + def generate_initial_noise(width: int, height: int, level_index: int) -> Tensor: + return Tensor() + + +INITIAL_WIDTH = 100 +INITIAL_LEVEL_INDEX = 0 +INITIAL_HEIGHT = 16 +DEFAULT_CONTEXT = { + "level_index": INITIAL_LEVEL_INDEX, + "noise": generate_initial_noise(INITIAL_WIDTH, INITIAL_HEIGHT, INITIAL_LEVEL_INDEX), + "mario_state": 0, + "mario_inertia": 0.89, +} +CONTEXT_BOUNDS = { + "level_index": (None, None, "categorical", np.arange(0, 14)), + "noise": (-1.0, 1.0, float), + "mario_state": (None, None, "categorical", [0, 1, 2]), + "mario_inertia": (0.5, 1.5, float), +} +CATEGORICAL_CONTEXT_FEATURES = ["level_index", "mario_state"] diff --git a/carl/envs/mario/generate_sample.py b/carl/envs/mario/generate_sample.py new file mode 100644 index 00000000..39a95222 --- /dev/null +++ b/carl/envs/mario/generate_sample.py @@ -0,0 +1,122 @@ +# Code from https://github.com/Mawiszus/TOAD-GAN +from typing import Any, List, Optional, Tuple, Union + +import torch +import torch.nn as nn +from torch import Tensor +from torch.nn.functional import interpolate + + +# Generates a noise tensor. Uses torch.randn. +def generate_spatial_noise( + size: Union[Any, List[int], Tuple[int]], device: Union[str, torch.device] = "cpu" +) -> Tensor: + return torch.randn(size, device=device, dtype=torch.float32) + + +# Generate a sample given a TOAD-GAN and additional parameters +@torch.no_grad() # type: ignore [misc] +def generate_sample( + generators: Tensor, + noise_maps: Tensor, + reals: Tensor, + noise_amplitudes: Tensor, + num_layer: int, + token_list: Tensor, + scale_v: float = 1.0, + scale_h: float = 1.0, + current_scale: int = 0, + gen_start_scale: int = 0, + initial_noise: Optional[Tensor] = None, +) -> List[str]: + + in_s = None + images_cur: List[Tensor] = [] + images: List[Tensor] = [] + z_s: List[Tensor] = [] + + # Generate on GPU if available + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + # Main loop + for G, Z_opt, noise_amp in zip(generators, noise_maps, noise_amplitudes): + if current_scale >= len(generators): + break # should not be reached + + # Zero Padding + n_pad = int(num_layer) + m = nn.ZeroPad2d(int(n_pad)) + + # Calculate actual shape + nzx = (Z_opt.shape[2] - n_pad * 2) * scale_v + nzy = (Z_opt.shape[3] - n_pad * 2) * scale_h + + # Init images list + images_prev = images_cur + images_cur = [] + channels = len(token_list) + + # Init in_s + if in_s is None: + in_s = torch.zeros(reals[0].shape[0], channels, *reals[0].shape[2:]).to( + device + ) + elif in_s.sum() == 0: + in_s = torch.zeros(in_s.shape[0], channels, *in_s.shape[2:]).to(device) + + if current_scale == 0: # First step: Make base noise + if initial_noise is not None and len(initial_noise) > 0: + z_curr = initial_noise.float().to(device) + else: + z_curr = generate_spatial_noise( + [1, channels, int(round(nzx)), int(round(nzy))], device=device + ) + z_curr = m(z_curr) + else: # All other steps: Make added noise + if current_scale < gen_start_scale: + z_curr = z_s[current_scale] + else: + z_curr = generate_spatial_noise( + [1, channels, int(round(nzx)), int(round(nzy))], device=device + ) + z_curr = m(z_curr) + if (not images_prev) or current_scale == 0: # if there is no "previous" image + I_prev = in_s + else: + I_prev = images[current_scale - 1] + + # Bilinear interpolation for upscaling + I_prev = interpolate( + I_prev, + [int(round(nzx)), int(round(nzy))], + mode="bilinear", + align_corners=False, + ) + I_prev = m(I_prev) + + # Main Step + z_in = noise_amp * z_curr + I_prev + I_curr = G(z_in, I_prev, temperature=1) + + # Append results + images_cur.append(I_curr) + + if current_scale >= gen_start_scale: + images.append(I_curr) + z_s.append(z_curr) + current_scale += 1 + + return one_hot_to_ascii_level(I_curr, token_list) + + +def one_hot_to_ascii_level(level: Any, tokens: Any) -> List[str]: + """Converts a full token level tensor to an ascii level.""" + ascii_level = [] + for i in range(level.shape[2]): + line = "" + for j in range(level.shape[3]): + line += tokens[level[:, :, i, j].argmax()] + if i < level.shape[2] - 1: + line += "\n" + ascii_level.append(line) + return ascii_level diff --git a/carl/envs/mario/level_image_gen.py b/carl/envs/mario/level_image_gen.py new file mode 100644 index 00000000..17378462 --- /dev/null +++ b/carl/envs/mario/level_image_gen.py @@ -0,0 +1,418 @@ +# Code from https://github.com/Mawiszus/TOAD-GAN +from typing import Any, List, Tuple + +import os + +from PIL import Image, ImageEnhance, ImageOps + + +class LevelImageGen: + """Generates PIL Image files from Super Mario Bros. ascii levels. + Initialize once and then use LevelImageGen.render() to generate images.""" + + def __init__(self, sprite_path: str): + """sprite_path: path to the folder of sprite files, e.g. 'mario/sprites/'""" + + # Load Graphics (assumes sprite_path points to "img" folder of Mario-AI-Framework or provided sprites folder + mariosheet = Image.open(os.path.join(sprite_path, "smallmariosheet.png")) + enemysheet = Image.open(os.path.join(sprite_path, "enemysheet.png")) + itemsheet = Image.open(os.path.join(sprite_path, "itemsheet.png")) + mapsheet = Image.open(os.path.join(sprite_path, "mapsheet.png")) + + # Cut out the actual sprites: + sprite_dict = dict() + # Mario Sheet + sprite_dict["M"] = mariosheet.crop((4 * 16, 0, 5 * 16, 16)) + + # Enemy Sheet + enemy_names = ["r", "k", "g", "y", "wings", "*", "plant"] + for i, e in enumerate(enemy_names): + sprite_dict[e] = enemysheet.crop((0, i * 2 * 16, 16, (i + 1) * 2 * 16)) + + sprite_dict["E"] = enemysheet.crop( + (16, 2 * 2 * 16, 2 * 16, 3 * 2 * 16) + ) # Set generic enemy to second goomba sprite + sprite_dict["plant"] = enemysheet.crop( + (16, (len(enemy_names) - 1) * 2 * 16, 2 * 16, len(enemy_names) * 2 * 16) + ) + + # Item Sheet + sprite_dict["shroom"] = itemsheet.crop((0, 0, 16, 16)) + sprite_dict["flower"] = itemsheet.crop((16, 0, 2 * 16, 16)) + sprite_dict["flower2"] = itemsheet.crop((0, 16, 16, 2 * 16)) + sprite_dict["1up"] = itemsheet.crop((16, 16, 2 * 16, 2 * 16)) + + # Map Sheet + map_names = [ + "-", + "X", + "#", + "B", + "b", + "b2", + "S", + "L", + "?", + "dump", + "@", + "Q", + "dump", + "!", + "D", + "o", + "o2", + "o3", + "<", + ">", + "[", + "]", + "bg_sl_l", + "bg_top", + "bg_sl_r", + "bg_m_l", + "bg_m", + "bg_m_r", + "bush_l", + "bush_m", + "bush_r", + "cloud_l", + "cloud_m", + "cloud_r", + "cloud_b_l", + "cloud_b_m", + "cloud_b_r", + "waves", + "water", + "F_top", + "F_b", + "F", + "bg_sky", + "%", + "%_l", + "%_r", + "%_m", + "|", + "1", + "2", + "C", + "U", + "T", + "t", + "dump", + "dump", + ] + + sheet_length = (7, 8) + sprite_counter = 0 + for i in range(sheet_length[0]): + for j in range(sheet_length[1]): + sprite_dict[map_names[sprite_counter]] = mapsheet.crop( + (j * 16, i * 16, (j + 1) * 16, (i + 1) * 16) + ) + sprite_counter += 1 + + sprite_dict["@"] = sprite_dict["?"] + sprite_dict["!"] = sprite_dict["Q"] + + self.sprite_dict = sprite_dict + + def prepare_sprite_and_box( + self, ascii_level: List[str], sprite_key: str, curr_x: int, curr_y: int + ) -> Tuple[Any, Tuple[int, int, int, int]]: + """Helper to make correct sprites and sprite sizes to draw into the image. + Some sprites are bigger than one tile and the renderer needs to adjust for them.""" + + # Init default size + new_left = curr_x * 16 + new_top = curr_y * 16 + new_right = (curr_x + 1) * 16 + new_bottom = (curr_y + 1) * 16 + + # Handle sprites depending on their type: + if sprite_key == "F": # Flag Pole + actual_sprite = Image.new("RGBA", (2 * 16, curr_y * 16)) + actual_sprite.paste(self.sprite_dict["F_top"], (16, 0, 2 * 16, 16)) + for s in range(curr_y): + actual_sprite.paste( + self.sprite_dict["F_b"], (16, (s + 1) * 16, 2 * 16, (s + 2) * 16) + ) + actual_sprite.paste(self.sprite_dict["F"], (7, 1 * 16, 16 + 7, 2 * 16)) + new_left = new_left - 16 + new_top = new_top - (curr_y - 1) * 16 + + elif sprite_key in ["y", "E", "g", "k", "r"]: # enemy sprite + actual_sprite = self.sprite_dict[sprite_key] + new_top = new_top - 16 + + elif sprite_key in ["Y", "K", "R"]: # winged spiky/koopa sprite + actual_sprite = Image.new("RGBA", (2 * 16, 2 * 16)) + actual_sprite.paste( + self.sprite_dict[str.lower(sprite_key)], (16, 0, 2 * 16, 2 * 16) + ) + actual_sprite.paste(self.sprite_dict["wings"], (7, -7, 16 + 7, 2 * 16 - 7)) + new_left = new_left - 16 + new_top = new_top - 16 + + elif ( + sprite_key == "G" + ): # winged goomba sprite (untested because original has none?) + actual_sprite = Image.new("RGBA", (3 * 16, 2 * 16)) + actual_sprite.paste(self.sprite_dict["wings"], (1, -5, 16 + 1, 2 * 16 - 5)) + actual_sprite.paste( + ImageOps.mirror(self.sprite_dict["wings"]), + (2 * 16 - 1, -5, 3 * 16 - 1, 2 * 16 - 5), + ) + actual_sprite.paste( + self.sprite_dict[str.lower(sprite_key)], (16, 0, 2 * 16, 2 * 16) + ) + new_left = new_left - 16 + new_top = new_top - 16 + new_right = new_right + 16 + + elif sprite_key == "%": # jump through platform + if curr_x == 0: + if ( + len(ascii_level[curr_y]) > 1 + and ascii_level[curr_y][curr_x + 1] == sprite_key + ): # middle piece + actual_sprite = self.sprite_dict["%_m"] + else: # single_piece + actual_sprite = self.sprite_dict["%"] + elif ascii_level[curr_y][curr_x - 1] == sprite_key: + if curr_x >= (len(ascii_level[curr_y]) - 1): # right end piece + actual_sprite = self.sprite_dict["%_r"] + elif ascii_level[curr_y][curr_x + 1] == sprite_key: # middle piece + actual_sprite = self.sprite_dict["%_m"] + else: # right end piece + actual_sprite = self.sprite_dict["%_r"] + else: + if curr_x >= (len(ascii_level[curr_y]) - 1): # single piece + actual_sprite = self.sprite_dict["%"] + elif ascii_level[curr_y][curr_x + 1] == sprite_key: # left end piece + actual_sprite = self.sprite_dict["%_l"] + else: # single piece + actual_sprite = self.sprite_dict[sprite_key] + + elif sprite_key == "b": # bullet bill tower + if curr_y > 0: + if ascii_level[curr_y - 1][curr_x] == sprite_key: + actual_sprite = self.sprite_dict["b2"] + else: + actual_sprite = self.sprite_dict[sprite_key] + else: + actual_sprite = self.sprite_dict[sprite_key] + + elif sprite_key == "*": # alternative bullet bill tower + if curr_y > 0: + if ascii_level[curr_y - 1][curr_x] != sprite_key: # top + actual_sprite = self.sprite_dict["B"] + elif curr_y > 1: + if ascii_level[curr_y - 2][curr_x] != sprite_key: + actual_sprite = self.sprite_dict["b"] + else: + actual_sprite = self.sprite_dict["b2"] + else: + actual_sprite = self.sprite_dict["b2"] + + elif sprite_key in ["T", "t"]: # Pipes + + # figure out what kind of pipe this is + if curr_y > 0 and ascii_level[curr_y - 1][curr_x] == sprite_key: + is_top = False + else: + is_top = True + + pipelength_t = 0 + while ( + curr_y - pipelength_t >= 0 + and ascii_level[curr_y - pipelength_t][curr_x] == sprite_key + ): + pipelength_t += 1 + + pipelength_b = 0 + while ( + curr_y + pipelength_b < len(ascii_level) + and ascii_level[curr_y + pipelength_b][curr_x] == sprite_key + ): + pipelength_b += 1 + + pipelength_l = 0 + while ( + curr_x - pipelength_l >= 0 + and ascii_level[curr_y][curr_x - pipelength_l] == sprite_key + ): + pipelength_l += 1 + + pipelength_r = 0 + while ( + curr_x + pipelength_r < len(ascii_level[curr_y]) + and ascii_level[curr_y][curr_x - pipelength_r] == sprite_key + ): + pipelength_r += 1 + + # Check for fall out criteria + try: + if pipelength_l % 2 == 0: # second half of a double pipe + is_left = False + is_right = True + elif pipelength_l % 2 == 1: + if ( + curr_x >= len(ascii_level[curr_y]) + or ascii_level[curr_y][curr_x + 1] != sprite_key + ): + is_left = False + is_right = False + else: + is_left = True + is_right = False + else: + is_left = False + is_right = False + + if is_left: + if ascii_level[curr_y - pipelength_t][curr_x + 1] == sprite_key: + is_left = False + is_right = False + if ascii_level[curr_y - pipelength_t + 1][curr_x + 1] != sprite_key: + is_left = False + is_right = False + if is_right: + if ascii_level[curr_y - pipelength_t][curr_x - 1] == sprite_key: + is_left = False + is_right = False + if ascii_level[curr_y - pipelength_t + 1][curr_x - 1] != sprite_key: + is_left = False + is_right = False + if curr_y + pipelength_b < len(ascii_level): + if is_left: + if ascii_level[curr_y + pipelength_b][curr_x + 1] == sprite_key: + is_left = False + is_right = False + if ( + ascii_level[curr_y + pipelength_b - 1][curr_x + 1] + != sprite_key + ): + is_left = False + is_right = False + if is_right: + if ascii_level[curr_y + pipelength_b][curr_x - 1] == sprite_key: + is_left = False + is_right = False + if ( + ascii_level[curr_y + pipelength_b - 1][curr_x - 1] + != sprite_key + ): + is_left = False + is_right = False + except IndexError: + # Default to single pipe + is_left = False + is_right = False + + if is_top: + if is_left: + actual_sprite = self.sprite_dict["<"] + elif is_right: + if sprite_key == "T": + actual_sprite = Image.new("RGBA", (2 * 16, 3 * 16)) + actual_sprite.paste( + self.sprite_dict["plant"], (8, 5, 16 + 8, 2 * 16 + 5) + ) + actual_sprite.paste( + self.sprite_dict["<"], (0, 2 * 16, 16, 3 * 16) + ) + actual_sprite.paste( + self.sprite_dict[">"], (16, 2 * 16, 2 * 16, 3 * 16) + ) + new_left = new_left - 16 + new_top = new_top - 2 * 16 + else: + actual_sprite = self.sprite_dict[">"] + else: + if sprite_key == "T": + actual_sprite = Image.new("RGBA", (16, 3 * 16)) + actual_sprite.paste( + self.sprite_dict["plant"], (0, 5, 16, 2 * 16 + 5) + ) + actual_sprite.paste( + self.sprite_dict["T"], (0, 2 * 16, 16, 3 * 16) + ) + new_top = new_top - 2 * 16 + else: + actual_sprite = self.sprite_dict["T"] + else: + if is_left: + actual_sprite = self.sprite_dict["["] + elif is_right: + actual_sprite = self.sprite_dict["]"] + else: + actual_sprite = self.sprite_dict["t"] + + elif sprite_key in [ + "?", + "@", + "Q", + "!", + "C", + "U", + "L", + ]: # Block/Brick hidden items + if sprite_key == "L": + i_key = "1up" + elif sprite_key in ["?", "@", "U"]: + i_key = "shroom" + else: + i_key = "o" + + mask = self.sprite_dict[i_key].getchannel(3) + mask = ImageEnhance.Brightness(mask).enhance(0.7) + actual_sprite = Image.composite( + self.sprite_dict[i_key], self.sprite_dict[sprite_key], mask=mask + ) + + elif sprite_key in ["1", "2"]: # Hidden block + if sprite_key == "1": + i_key = "1up" + else: + i_key = "o" + + mask1 = self.sprite_dict["D"].getchannel(3) + mask1 = ImageEnhance.Brightness(mask1).enhance(0.5) + tmp_sprite = Image.composite( + self.sprite_dict["D"], self.sprite_dict[sprite_key], mask=mask1 + ) + mask = self.sprite_dict[i_key].getchannel(3) + mask = ImageEnhance.Brightness(mask).enhance(0.7) + actual_sprite = Image.composite( + self.sprite_dict[i_key], tmp_sprite, mask=mask + ) + + else: + actual_sprite = self.sprite_dict[sprite_key] + + return actual_sprite, (new_left, new_top, new_right, new_bottom) + + def render(self, ascii_level: List[str]) -> Image: + """Renders the ascii level as a PIL Image. Assumes the Background is sky""" + len_level = len(ascii_level[-1]) + height_level = len(ascii_level) + + # Fill base image with sky tiles + dst = Image.new("RGB", (len_level * 16, height_level * 16)) + for y in range(height_level): + for x in range(len_level): + dst.paste( + self.sprite_dict["bg_sky"], + (x * 16, y * 16, (x + 1) * 16, (y + 1) * 16), + ) + + # Fill with actual tiles + for y in range(height_level): + for x in range(len_level): + curr_sprite = ascii_level[y][x] + sprite, box = self.prepare_sprite_and_box( + ascii_level, curr_sprite, x, y + ) + dst.paste(sprite, box, mask=sprite) + + return dst diff --git a/carl/envs/mario/mario_env.py b/carl/envs/mario/mario_env.py new file mode 100644 index 00000000..127a75db --- /dev/null +++ b/carl/envs/mario/mario_env.py @@ -0,0 +1,227 @@ +from typing import Any, ByteString, Deque, Dict, List, Literal, Optional, Union, cast + +import os +import random +import socket +from collections import deque + +import cv2 +import gym +import numpy as np +from gym import spaces +from gym.core import ObsType +from gym.utils import seeding +from PIL import Image +from py4j.java_gateway import GatewayParameters, JavaGateway + +from carl.envs.mario.level_image_gen import LevelImageGen + +from .mario_game import MarioGame +from .utils import get_port, load_level + + +class MarioEnv(gym.Env): + metadata = {"render.modes": ["rgb_array"]} + + def __init__( + self, + levels: List[str], + timer: int = 100, + visual: bool = False, + sticky_action_probability: float = 0.1, + frame_skip: int = 2, + frame_stack: int = 4, + frame_dim: int = 64, + hide_points_banner: bool = False, + sparse_rewards: bool = False, + grayscale: bool = False, + seed: int = 0, + ): + self.gateway: Any = None + self.seed(seed) + self.level_names = levels + self.levels = [load_level(name) for name in levels] + self.timer = timer + self.visual = visual + self.frame_skip = frame_skip + self.frame_stack = frame_stack + self.sticky_action_probability = sticky_action_probability + self.hide_points_banner = hide_points_banner + self.sparse_rewards = sparse_rewards + self.points_banner_height = 4 + self.grayscale = grayscale + self.last_action = None + self.width = self.height = frame_dim + self.observation_space = spaces.Box( + low=0, + high=255, + shape=[self.frame_stack if grayscale else 3, self.height, self.width], + dtype=np.uint8, + ) + self.original_obs: Deque = deque(maxlen=self.frame_skip) + self.actions = [ + [False, False, False, False, False], # noop + [False, False, True, False, False], # down + [False, True, False, False, False], # right + [False, True, False, True, False], # right speed + [False, True, False, False, True], # right jump + [False, True, False, True, True], # right speed jump + [True, False, False, False, False], # left + [True, False, False, False, True], # left jump + [True, False, False, True, True], # left speed jump + [False, False, False, False, True], # jump + ] + self.action_space = spaces.Discrete(n=len(self.actions)) + self._obs: Any = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) + self.current_level_idx = 0 + self.frame_size = -1 + self.port = get_port() + self.mario_state: Literal[0, 1, 2] = 0 # normal, large, fire + self.mario_inertia = 0.89 + self._init_game() + + def reset( + self, + *, + seed: Optional[int] = None, + return_info: bool = False, + options: Optional[dict] = None, + ) -> Union[ObsType, tuple[ObsType, dict]]: + self._reset_obs() + if self.game is None: + self.game: Any = self._init_game() + self.current_level_idx = (self.current_level_idx + 1) % len(self.levels) + level = self.levels[self.current_level_idx] + self.game.resetGame(level, self.timer, self.mario_state, self.mario_inertia) + self.game.computeObservationRGB() + buffer = self._receive() + frame = self._read_frame(buffer) + self._update_obs(frame) + if not return_info: + return self._obs.copy() + else: + return self._obs.copy(), {} + + def step(self, action: Any) -> Any: + if self.sticky_action_probability != 0.0: + if ( + self.last_action is not None + and random.random() < self.sticky_action_probability + ): + a = self.actions[self.last_action] + else: + a = self.actions[action] + self.last_action = action + else: + a = self.actions[action] + + assert self.game + frame = None + for i in range(self.frame_skip): + self.game.stepGame(*a) + if self.visual or i == self.frame_skip - 1: + self.game.computeObservationRGB() + buffer = self._receive() + frame = self._read_frame(buffer) + self._update_obs(frame) + + reward, done, completionPercentage = ( + self.game.computeReward(), + self.game.computeDone(), + self.game.getCompletionPercentage(), + ) + + info: Dict[str, Any] = {"completed": completionPercentage} + if self.visual: + info["original_obs"] = self.original_obs + return ( + self._obs.copy(), + reward if not self.sparse_rewards else int(completionPercentage == 1.0), + done, # bool + info, # Dict[str, Any] + ) + + def render(self, *args: Any, **kwargs: Any) -> ObsType: + return self.original_obs[0] + + def __getstate__(self) -> Dict: + assert self.gateway + + self.gateway.close() + self.gateway = None + self.game = None + self.socket.shutdown(1) + self.socket.close() + return self.__dict__ + + def _reset_obs(self) -> None: + self._obs[:] = 0 + self.original_obs.clear() + + def _read_frame(self, buffer: Any) -> Any: + frame = ( + np.frombuffer(buffer, dtype=np.int32).reshape(256, 256, 3).astype(np.uint8) + ) + self.original_obs.append(frame) + return frame + + def _update_obs(self, frame: Any) -> Any: + if self.grayscale: + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) + + frame = cv2.resize(frame, (self.width, self.height), cv2.INTER_NEAREST) + if self.hide_points_banner: + frame[: self.points_banner_height, :] = 0 + if self.grayscale: + self._obs = np.concatenate([self._obs[1:], frame[np.newaxis]]) + else: + self._obs = np.transpose(frame, axes=(2, 0, 1)) + + def _init_game(self) -> MarioGame: + self.gateway = JavaGateway( + gateway_parameters=GatewayParameters( + port=self.port, + eager_load=True, + ) + ) + self.game = cast(MarioGame, cast(Any, self.gateway.jvm).engine.core.MarioGame()) + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.connect(("localhost", self.game.getPort())) + self.game.initGame() + self.frame_size = self.game.getFrameSize() + return self.game + + def _receive(self) -> ByteString: + frameBuffer = b"" + while len(frameBuffer) != self.frame_size: + frameBuffer += self.socket.recv(self.frame_size) + return frameBuffer + + def get_action_meanings(self) -> List[str]: + return ACTION_MEANING + + def render_current_level(self) -> Image: + img_gen = LevelImageGen( + sprite_path=os.path.abspath( + os.path.join(os.path.dirname(__file__), "sprites") + ) + ) + return img_gen.render(self.levels[self.current_level_idx].split("\n")) + + def seed(self, seed: Optional[int] = None) -> List[Any]: + self.np_random, seed = seeding.np_random(seed) + return [seed] + + +ACTION_MEANING = [ + "NOOP", + "DOWN", + "RIGHT", + "RIGHTSPEED", + "RIGHTJUMP", + "RIGHTSPEEDJUMP", + "LEFT", + "LEFTJUMP", + "LEFTSPEEDJUMP", + "JUMP", +] diff --git a/carl/envs/mario/mario_game.py b/carl/envs/mario/mario_game.py new file mode 100644 index 00000000..09f62da5 --- /dev/null +++ b/carl/envs/mario/mario_game.py @@ -0,0 +1,43 @@ +from abc import ABC, abstractmethod + + +class MarioGame(ABC): + @abstractmethod + def getPort(self) -> int: + pass + + @abstractmethod + def initGame(self) -> None: + pass + + @abstractmethod + def stepGame( + self, left: bool, right: bool, down: bool, speed: bool, jump: bool + ) -> None: + pass + + @abstractmethod + def resetGame( + self, level: str, timer: int, mario_state: int, inertia: float + ) -> None: + pass + + @abstractmethod + def computeObservationRGB(self) -> None: + pass + + @abstractmethod + def computeReward(self) -> float: + pass + + @abstractmethod + def computeDone(self) -> bool: + pass + + @abstractmethod + def getCompletionPercentage(self) -> float: + pass + + @abstractmethod + def getFrameSize(self) -> int: + pass diff --git a/carl/envs/dmc/dmc_tasks/__init__.py b/carl/envs/mario/models/__init__.py similarity index 100% rename from carl/envs/dmc/dmc_tasks/__init__.py rename to carl/envs/mario/models/__init__.py diff --git a/carl/envs/mario/models/conv_block.py b/carl/envs/mario/models/conv_block.py new file mode 100644 index 00000000..614f0fae --- /dev/null +++ b/carl/envs/mario/models/conv_block.py @@ -0,0 +1,31 @@ +# Code from https://github.com/Mawiszus/TOAD-GAN +from typing import Tuple, Union + +import torch.nn as nn + + +class ConvBlock(nn.Sequential): + """Conv block containing Conv2d, BatchNorm2d and LeakyReLU Layers.""" + + def __init__( + self, + in_channel: int, + out_channel: int, + ker_size: Union[int, Tuple[int, int]], + padd: Union[str, Union[int, Tuple[int, int]]], + stride: Union[int, Tuple[int, int]], + ): + super().__init__() + self.add_module( + "conv", + nn.Conv2d( + in_channel, + out_channel, + kernel_size=ker_size, + stride=stride, + padding=padd, + ), + ) + + self.add_module("norm", nn.BatchNorm2d(out_channel)) + self.add_module("LeakyRelu", nn.LeakyReLU(0.2, inplace=True)) diff --git a/carl/envs/mario/models/discriminator.py b/carl/envs/mario/models/discriminator.py new file mode 100644 index 00000000..4527e94f --- /dev/null +++ b/carl/envs/mario/models/discriminator.py @@ -0,0 +1,34 @@ +# Code from https://github.com/Mawiszus/TOAD-GAN +from argparse import Namespace + +import torch +import torch.nn as nn +from torch import Tensor + +from .conv_block import ConvBlock + + +class Level_WDiscriminator(nn.Module): + """Patch based Discriminator. Uses Namespace opt.""" + + def __init__(self, opt: Namespace): + super().__init__() + self.is_cuda = torch.cuda.is_available() + N = int(opt.nfc) + self.head = ConvBlock(opt.nc_current, N, (3, 3), opt.padd_size, 1) + self.body = nn.Sequential() + + for i in range(opt.num_layer - 2): + block = ConvBlock(N, N, (3, 3), opt.padd_size, 1) + self.body.add_module("block%d" % (i + 1), block) + + block = ConvBlock(N, N, (3, 3), opt.padd_size, 1) + self.body.add_module("block%d" % (opt.num_layer - 2), block) + + self.tail = nn.Conv2d(N, 1, kernel_size=(3, 3), stride=1, padding=opt.padd_size) + + def forward(self, x: Tensor) -> Tensor: + x = self.head(x) + x = self.body(x) + x = self.tail(x) + return x diff --git a/carl/envs/mario/models/generator.py b/carl/envs/mario/models/generator.py new file mode 100644 index 00000000..95c75eab --- /dev/null +++ b/carl/envs/mario/models/generator.py @@ -0,0 +1,44 @@ +# Code from https://github.com/Mawiszus/TOAD-GAN +from argparse import Namespace + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from .conv_block import ConvBlock + + +class Level_GeneratorConcatSkip2CleanAdd(nn.Module): + """Patch based Generator. Uses namespace opt.""" + + def __init__(self, opt: Namespace): + super().__init__() + self.is_cuda = torch.cuda.is_available() + N = int(opt.nfc) + self.head = ConvBlock(opt.nc_current, N, (3, 3), opt.padd_size, 1) + self.body = nn.Sequential() + + for i in range(opt.num_layer - 2): + block = ConvBlock(N, N, (3, 3), opt.padd_size, 1) + self.body.add_module("block%d" % (i + 1), block) + + block = ConvBlock(N, N, (3, 3), opt.padd_size, 1) + self.body.add_module("block%d" % (opt.num_layer - 2), block) + + self.tail = nn.Sequential( + nn.Conv2d( + N, opt.nc_current, kernel_size=(3, 3), stride=1, padding=opt.padd_size + ), + ) + + def forward(self, x: Tensor, y: Tensor, temperature: float = 1) -> Tensor: + x = self.head(x) + x = self.body(x) + x = self.tail(x) + x = F.softmax( + x * temperature, dim=1 + ) # Softmax is added here to allow for the temperature parameter + ind = int((y.shape[2] - x.shape[2]) / 2) + y = y[:, :, ind : (y.shape[2] - ind), ind : (y.shape[3] - ind)] + return x + y diff --git a/carl/envs/mario/reachabillity.py b/carl/envs/mario/reachabillity.py new file mode 100644 index 00000000..60fd2d17 --- /dev/null +++ b/carl/envs/mario/reachabillity.py @@ -0,0 +1,309 @@ +from typing import List, Tuple + +import numpy as np + +horizontal = 10 # when sprinting Mario can jump over a 10 tiles gap horizontally +vertical = 4 # Mario can jump over a 4 tile wall +diagonal = ( + 6 # Mario can jump over 6 tiles to clear a 4 block height difference when sprinting +) + +empty = "-" +ignored = ["M", "F", "|", "E", "g", "k", "r", "y", "G", "K", "R", "Y", "*", "B", "o"] + + +def remove_ignored(level: List[str]) -> Tuple[List, Tuple[int, int], Tuple[int, int]]: + """ + Replaces all ignored tokens with the empty token in a level. In case of Mario and the flag the coordinates of the + blocks below are returned and they are also replaced. + :param level: a level in ASCII form + :return: the level in ASCII form with the ignored tokens replaced and the coordinates of the block below Mario and + the flag if existing + """ + new_level = [] + mario = (-1, -1) + flag = (-1, -1) + + for i, row in enumerate(level): + mario_y = row.find("M") + if mario_y >= 0: + mario = (i + 1, mario_y) + flag_y = row.find("F") + if flag_y >= 0: + flag = (i + 1, flag_y) + for token in ignored: + row = row.replace(token, empty) + new_level.append(row) + + return new_level, mario, flag + + +def reachability_map( + level: List[str], + shape: Tuple[int, int], + has_mario: bool = False, + has_flag: bool = False, + check_outside: bool = False, +) -> Tuple[np.ndarray, bool]: + """ + This creates a numpy 2D array containing the reachability map for a given ASCII-Level. + Every solid block will have a 1 if Mario can stand on it and can reach the tile and a 0 else. + Currently ignoring sprint. + Levels are generated without Mario and the flag and as such the algorithm is not including these. + :param level: The level (slice) as a list containing the ASCII strings of each level row + :param shape: The level shape + :param has_mario: As levels are expected to be generated without Mario, this option has to be set to search for + Mario as a starting point + :param has_flag: As levels are expected to be generated without the flag, this option has to be set to determine + playability via reaching the flag + :param check_outside: If this option is set, playability will check if the player can reach the outside + :return: A numpy array where a 0 indicates an unreachable block and a 1 denotes a reachable block; + a boolean indicating if the level can be finished by the player + """ + + level, mario, flag = remove_ignored(level) + reachability_map = np.zeros(shape=shape) + index_queue = [] + + # find the starting point, either the block Mario is standing on or the first solid block Mario could stand on + found_first = False + if has_mario: + index_queue.append(mario) + else: + for i in range(shape[0] - 1, 0, -1): # start from the bottom of the level + for j in range(0, shape[1]): + tile = level[i][j] + if ( + tile != empty + and ( + reachability_map[i][j] == 1 + or not found_first + and i < shape[0] - 1 + ) + and i > 0 + and level[i - 1][j] == empty + ): + found, queue, _ = mark(level, reachability_map, i, j) + index_queue.extend(queue) + if not found_first: + found_first = found + break + if found_first: + break + + # calculate all reachable positions by applying a BFS type of algorithm + outside = False + while len(index_queue) > 0: + index = index_queue.pop() + _, queue, reached_outside = mark( + level, reachability_map, index[0], index[1], check_outside=check_outside + ) + if reached_outside: + outside = True + index_queue.extend(queue) + + # a level is playable if either the flag is reachable or if no flag is included, the rightmost side can be reached + # Bug: if the level ends with a gap, it might be playable but still wouldn't count as such + playable = False + + if has_flag: + if reachability_map[flag[0]][flag[1]]: + playable = True + else: + # look at all tiles in the last column + for i in range(1, shape[0]): + if reachability_map[shape[0] - i][shape[1] - 1]: + playable = True + break + + if not playable and check_outside: + # Assumption is that reaching the outside is identical to completing the level + if outside: + playable = True + + return reachability_map, playable + + +def check_blocked( + level: List[str], i: int, j: int, dh: int, dv: int, right: bool +) -> int: + """ + Checks for a given position, level and direction if a blockade exists in the range specified by dh and dv. + :param level: The level in ASCII form + :param i: x coordinate of the starting position + :param j: y coordinate of the starting position + :param dh: amount of blocks in the horizontal direction from the starting point the algorithm tries to jump + :param dv: amount of blocks in the vertical direction from the starting point the algorithm tries to jump + :param right: direction of the jump + :return: the blockade y value if a blockade is found, default max value otherwise + """ + blocked = horizontal + 1 # default value + boundary = j + dh if right else j - dh + try: + if level[i - dv][boundary] != empty: + height = 1 + dv + while height < vertical + 1: + v = i - dv - height + if v < 0: + # over maximum level height, cannot pass + blocked = dh + break + if level[v][boundary] != empty or dh + height > 10: + height += 1 + else: + break + if height == vertical + 1: + blocked = dh + except IndexError: + # over maximum level height, cannot pass + blocked = dh + + return blocked + + +def check_down( + level: List[str], + map: np.ndarray, + i: int, + j: int, + dh: int, + check_outside: bool, + right: bool, +) -> Tuple[bool, bool, List[Tuple[int, int]]]: + drop = 1 + found_first = False + reach_outside = False + found = [] + boundary = j + dh if right else j - dh + if boundary > map.shape[1] - 1: + if check_outside: + reach_outside = True + else: + y = min(max(boundary, 0), map.shape[1] - 1) + while i + drop < map.shape[0]: + # right and down + x = i + drop + above = x - 1 + + if ( + level[x][y] != empty + and above >= 0 + and level[above][y] == empty + and map[x][y] != 1 + ): + map[x][y] = 1 + found.append((x, y)) + found_first = True + break + drop += 1 + + return found_first, reach_outside, found + + +def mark( + level: List[str], + reachability_map: np.ndarray, + i: int, + j: int, + check_outside: bool = False, +) -> Tuple[bool, List[Tuple[int, int]], bool]: + """ + For a given position and a level this will mark all tiles reachable from the given position and collect all + these positions for further use. + :param level: The level (slice) as a list containing the ASCII strings of each level row + :param map: The current reachability map where the reachable tiles will be marked + :param i: x coordinate + :param j: y coordinate + :param check_outside: if the algorithm should indicate that the player can reach the right outside of the level + :return: A boolean indicating if any tile can be reached from this position, a list of all reachable positions and + if the outside can be reached + """ + found_first = False + reach_outside = False + found = [] + blocked_level = vertical + 1 + blocked_right = horizontal + 1 + blocked_left = horizontal + 1 + blocked_down_right = horizontal + 1 + blocked_down_left = horizontal + 1 + + # mark diagonally + for dh in range(0, horizontal + 1): + # check down as far as possible, Mario can fall down the whole level until he hits a solid block + if blocked_down_right == horizontal + 1: + blocked_down_right = check_blocked(level, i, j, dh, 0, right=True) + if blocked_down_right >= dh: + found_rechable, found_outside, positions = check_down( + level, reachability_map, i, j, dh, check_outside, right=True + ) + if found_rechable: + found_first = True + if found_outside: + reach_outside = True + found.extend(positions) + + if blocked_down_left == horizontal + 1: + blocked_down_left = check_blocked(level, i, j, dh, 0, right=False) + if blocked_down_left >= dh: + found_rechable, found_outside, positions = check_down( + level, reachability_map, i, j, dh, check_outside, right=False + ) + if found_rechable: + found_first = True + if found_outside: + reach_outside = True + found.extend(positions) + + for dv in range(0, vertical + 1): + if dh != 0 or dv != 0: + if dv >= blocked_level: + break + + # check if vertical path is blocked + if dh == 0: + if level[i - dv][j] != empty: + blocked_level = dv + continue + + # check if horizontal right path is blocked + if blocked_right == horizontal + 1: + blocked_right = check_blocked(level, i, j, dh, dv, right=True) + + if dh <= blocked_right and dh + dv <= 10: + # right and up + x = min(max(i - dv, 0), reachability_map.shape[0] - 1) + right = j + dh + if right > reachability_map.shape[1] - 1 and check_outside: + reach_outside = True + y = min(max(right, 0), reachability_map.shape[1] - 1) + above = x - 1 + if ( + level[x][y] != empty + and above >= 0 + and level[above][y] == empty + and reachability_map[x][y] != 1 + ): + reachability_map[x][y] = 1 + found.append((x, y)) + found_first = True + + # check if horizontal left path is blocked + if blocked_left == horizontal + 1: + blocked_left = check_blocked(level, i, j, dh, dv, right=False) + + if dh <= blocked_left and dh + dv <= 10: + # left and up + x = min(max(i - dv, 0), reachability_map.shape[0] - 1) + y = min(max(j - dh, 0), reachability_map.shape[1] - 1) + above = x - 1 + if ( + level[x][y] != empty + and above >= 0 + and level[above][y] == empty + and reachability_map[x][y] != 1 + ): + reachability_map[x][y] = 1 + found.append((x, y)) + found_first = True + + return found_first, found, reach_outside diff --git a/carl/envs/mario/sprites/README.md b/carl/envs/mario/sprites/README.md new file mode 100644 index 00000000..6e2eda67 --- /dev/null +++ b/carl/envs/mario/sprites/README.md @@ -0,0 +1,4 @@ +## Notice + +This folder contains the sprite images from https://github.com/amidos2006/Mario-AI-Framework/tree/master/img. +They are necessary for the Mario-AI-Framework and our level preview renderer. \ No newline at end of file diff --git a/carl/envs/mario/sprites/enemysheet.png b/carl/envs/mario/sprites/enemysheet.png new file mode 100755 index 00000000..7846c9b6 Binary files /dev/null and b/carl/envs/mario/sprites/enemysheet.png differ diff --git a/carl/envs/mario/sprites/favicon.png b/carl/envs/mario/sprites/favicon.png new file mode 100644 index 00000000..6bdeb815 Binary files /dev/null and b/carl/envs/mario/sprites/favicon.png differ diff --git a/carl/envs/mario/sprites/firemariosheet.png b/carl/envs/mario/sprites/firemariosheet.png new file mode 100755 index 00000000..3e8431bd Binary files /dev/null and b/carl/envs/mario/sprites/firemariosheet.png differ diff --git a/carl/envs/mario/sprites/font.gif b/carl/envs/mario/sprites/font.gif new file mode 100755 index 00000000..213d7a01 Binary files /dev/null and b/carl/envs/mario/sprites/font.gif differ diff --git a/carl/envs/mario/sprites/frameworkAD.gif b/carl/envs/mario/sprites/frameworkAD.gif new file mode 100644 index 00000000..51881dcf Binary files /dev/null and b/carl/envs/mario/sprites/frameworkAD.gif differ diff --git a/carl/envs/mario/sprites/itemsheet.png b/carl/envs/mario/sprites/itemsheet.png new file mode 100755 index 00000000..cc5c4b84 Binary files /dev/null and b/carl/envs/mario/sprites/itemsheet.png differ diff --git a/carl/envs/mario/sprites/mapsheet.png b/carl/envs/mario/sprites/mapsheet.png new file mode 100755 index 00000000..6a5d77d4 Binary files /dev/null and b/carl/envs/mario/sprites/mapsheet.png differ diff --git a/carl/envs/mario/sprites/mariosheet.png b/carl/envs/mario/sprites/mariosheet.png new file mode 100755 index 00000000..fa9877cf Binary files /dev/null and b/carl/envs/mario/sprites/mariosheet.png differ diff --git a/carl/envs/mario/sprites/particlesheet.png b/carl/envs/mario/sprites/particlesheet.png new file mode 100644 index 00000000..8421cdbc Binary files /dev/null and b/carl/envs/mario/sprites/particlesheet.png differ diff --git a/carl/envs/mario/sprites/smallmariosheet.png b/carl/envs/mario/sprites/smallmariosheet.png new file mode 100755 index 00000000..ac7cd549 Binary files /dev/null and b/carl/envs/mario/sprites/smallmariosheet.png differ diff --git a/carl/envs/mario/toad_gan.py b/carl/envs/mario/toad_gan.py new file mode 100644 index 00000000..ac290a83 --- /dev/null +++ b/carl/envs/mario/toad_gan.py @@ -0,0 +1,129 @@ +from typing import Optional + +import functools +import os +import sys +from dataclasses import dataclass + +import torch +from torch import Tensor + +from carl.envs.mario.generate_sample import generate_sample, generate_spatial_noise +from carl.envs.mario.reachabillity import reachability_map + + +@dataclass +class TOADGAN: + def __init__( + self, + Gs: Tensor, + Zs: Tensor, + reals: Tensor, + NoiseAmp: Tensor, + token_list: Tensor, + num_layers: int, + ): + self.generators = Gs + self.noise_maps = Zs + self.reals = reals + self.noise_amplitudes = NoiseAmp + self.token_list = token_list + self.num_layer = num_layers + + @property + def original_height(self) -> int: + return self.reals[-1].shape[-2] + + @property + def original_width(self) -> int: + return self.reals[-1].shape[-1] + + +GENERATOR_DIR = os.path.abspath( + os.path.join(os.path.dirname(__file__), "TOAD-GUI", "generators", "v2") +) +GENERATOR_PATHS = sorted( + os.listdir(GENERATOR_DIR), + key=lambda name: [int(index) for index in name.replace("TOAD_GAN_", "").split("-")], +) + + +@functools.lru_cache(maxsize=None) +def load_generator(level_index: int) -> TOADGAN: + import carl.envs.mario.models as models + + sys.modules["models"] = models + gen_path = os.path.join(GENERATOR_DIR, GENERATOR_PATHS[level_index]) + reals = torch.load( + "%s/reals.pth" % gen_path, + map_location="cuda:0" if torch.cuda.is_available() else "cpu", + ) + Zs = torch.load( + "%s/noise_maps.pth" % gen_path, + map_location="cuda:0" if torch.cuda.is_available() else "cpu", + ) + NoiseAmp = torch.load( + "%s/noise_amplitudes.pth" % gen_path, + map_location="cuda:0" if torch.cuda.is_available() else "cpu", + ) + token_list = torch.load("%s/token_list.pth" % gen_path) + num_layers = torch.load("%s/num_layer.pth" % gen_path) + Gs = torch.load( + "%s/generators.pth" % gen_path, + map_location="cuda:0" if torch.cuda.is_available() else "cpu", + ) + return TOADGAN( + Gs=Gs, + Zs=Zs, + reals=reals, + NoiseAmp=NoiseAmp, + num_layers=num_layers, + token_list=token_list, + ) + + +def generate_level( + width: int, + height: int, + level_index: int, + initial_noise: Optional[torch.Tensor] = None, + filter_unplayable: bool = True, +) -> str: + toad_gan = load_generator(level_index) + playable = False + level = None + tries = 0 + while not playable: + tries += 1 + level = generate_sample( + **vars(toad_gan), + scale_h=width / toad_gan.original_width, + scale_v=height / toad_gan.original_height, + initial_noise=initial_noise, + ) + if filter_unplayable and tries < 100: + _, playable = reachability_map( + level, shape=(height, width), check_outside=True + ) + else: + playable = True + assert level + return "".join(level) + + +def generate_initial_noise(width: int, height: int, level_index: int) -> Tensor: + toad_gan = load_generator(level_index) + base_noise_map = toad_gan.noise_maps[0] + nzx = ( + (base_noise_map.shape[2] - 2 * toad_gan.num_layer) + * height + / toad_gan.original_height + ) + nzy = ( + (base_noise_map.shape[3] - 2 * toad_gan.num_layer) + * width + / toad_gan.original_width + ) + noise_shape = (1, len(toad_gan.token_list), int(round(nzx)), int(round(nzy))) + noise = generate_spatial_noise(noise_shape) + return noise diff --git a/carl/envs/mario/utils.py b/carl/envs/mario/utils.py new file mode 100644 index 00000000..0470d3ab --- /dev/null +++ b/carl/envs/mario/utils.py @@ -0,0 +1,59 @@ +from typing import Tuple + +import atexit +import os +import socket +import sys +from contextlib import closing + +from py4j.java_gateway import JavaGateway +from xvfbwrapper import Xvfb + +MARIO_AI_PATH = os.path.abspath( + os.path.join(os.path.dirname(__file__), "Mario-AI-Framework") +) +_gateway = None +_port = None + + +def find_free_port() -> int: + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(("", 0)) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + return s.getsockname()[1] + + +def load_level(level_name: str = "lvl-1.txt") -> str: + prefix = ( + os.path.join(MARIO_AI_PATH, "levels", "original") + if level_name.startswith("lvl-") + else "" + ) + with open(os.path.join(prefix, level_name), "r") as f: + level = f.read() + return level + + +def get_port() -> int: + global _gateway + global _port + if _gateway is None: + _gateway, _port = launch_gateway() + return _port + + +def launch_gateway() -> Tuple[JavaGateway, int]: + vdisplay = Xvfb(width=1280, height=740, colordepth=16) + vdisplay.start() + atexit.register(lambda: vdisplay.stop()) + free_port = find_free_port() + return ( + JavaGateway.launch_gateway( + classpath=os.path.join(MARIO_AI_PATH, "carl"), + redirect_stderr=sys.stderr, + redirect_stdout=sys.stdout, + die_on_exit=True, + port=free_port, + ), + free_port, + ) diff --git a/carl/utils/doc_building/plot_radar.py b/carl/utils/doc_building/plot_radar.py index 276caa98..f33815ea 100644 --- a/carl/utils/doc_building/plot_radar.py +++ b/carl/utils/doc_building/plot_radar.py @@ -4,14 +4,12 @@ from pathlib import Path import matplotlib.pyplot as plt + import numpy as np import pandas as pd import seaborn as sns - import numpy as np from carl.utils.doc_building.plotting import radar_factory - plot_legend = True - env_context_feature_names = { "CARLMountainCarEnv": [ "force", @@ -295,9 +293,6 @@ 5, 3, 14, - 14, - 14, - 18 ] env_names = [ "CARLMountainCarEnv", @@ -323,13 +318,11 @@ ] n_cfs_d = [11, 5, 8, 6, 10, 16, 1, 20, 7, 6, 5, 9, 9, 9, 4, 3, 14, 14, 14, 18] n_cfs_r = [0, 0, 0, 0, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] - - n_cfs = np.sum(n_context_features) - n_dynami_changing = np.sum(n_cfs_d) # 129 + n_cfs = 131 + n_dynami_changing = 129 n_reward_changing = 7 - n_float_cfs = 114 + 14 + 14 + 14 + 18 + n_float_cfs = 114 percentage_float_cfs = n_float_cfs / n_cfs - print("percentage float", percentage_float_cfs) env_types = { "classic_control": [ @@ -380,14 +373,6 @@ ) data = pd.concat(data) - asvals = list(data["action_space_size"].unique()) - asvals.sort() - print("unique action sizes", asvals) - print("number of context features", np.sum(n_context_features)) - print("perc changing dynamic", n_dynami_changing / n_cfs, n_dynami_changing, n_cfs) - print(len(n_cfs_d), len(n_context_features)) - print("reward changing", len(n_cfs_r), np.sum(n_cfs_r), np.sum(n_cfs_r)/n_cfs) - # normalize values cols = [c for c in data.columns if c not in ["env_type", "env_name"]] # type: ignore [attr-defined] max_values_per_col = [] @@ -398,8 +383,6 @@ max_values_per_col.append(max_val) data[col] /= max_val - - cols_plot = [ "state_space_size", "action_space_size", diff --git a/changelog.md b/changelog.md index f5e94fdd..d5696ace 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,4 @@ # 0.2.1 -- Make sampling of contexts deterministic with seed - Add Finger (DMC) env - Readd RNA env (#78) diff --git a/docs/source/environments/data/context_definitions/CARLAcrobotEnv.csv b/docs/source/environments/data/context_definitions/CARLAcrobotEnv.csv new file mode 100644 index 00000000..b54b4d2f --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLAcrobotEnv.csv @@ -0,0 +1,11 @@ +Context Feature,Default,Bounds +link_length_1,1.0,"(0.1, 10, )" +link_length_2,1.0,"(0.1, 10, )" +link_mass_1,1.0,"(0.1, 10, )" +link_mass_2,1.0,"(0.1, 10, )" +link_com_1,0.5,"(0, 1, )" +link_com_2,0.5,"(0, 1, )" +link_moi,1.0,"(0.1, 10, )" +max_velocity_1,12.566370614359172,"(1.2566370614359172, 125.66370614359172, )" +max_velocity_2,28.274333882308138,"(2.827433388230814, 282.7433388230814, )" +torque_noise_max,0.0,"(-1.0, 1.0, )" diff --git a/docs/source/environments/data/context_definitions/CARLAnt.csv b/docs/source/environments/data/context_definitions/CARLAnt.csv new file mode 100644 index 00000000..9352bcae --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLAnt.csv @@ -0,0 +1,8 @@ +Context Feature,Default,Bounds +joint_stiffness,5000.0,"(1, inf, )" +gravity,-9.8,"(-inf, -0.1, )" +friction,0.6,"(-inf, inf, )" +angular_damping,-0.05,"(-inf, inf, )" +actuator_strength,300.0,"(1, inf, )" +joint_angular_damping,35.0,"(0, inf, )" +torso_mass,10.0,"(0.1, inf, )" diff --git a/docs/source/environments/data/context_definitions/CARLBipedalWalkerEnv.csv b/docs/source/environments/data/context_definitions/CARLBipedalWalkerEnv.csv new file mode 100644 index 00000000..b1d8a28e --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLBipedalWalkerEnv.csv @@ -0,0 +1,21 @@ +Context Feature,Default,Bounds +FPS,50.0,"(1, 500, )" +SCALE,30.0,"(1, 100, )" +GRAVITY_X,0.0,"(-20, 20, )" +GRAVITY_Y,-10.0,"(-20, -0.01, )" +FRICTION,2.5,"(0, 10, )" +TERRAIN_STEP,0.4666666666666667,"(0.25, 1, )" +TERRAIN_LENGTH,200.0,"(100, 500, )" +TERRAIN_HEIGHT,5.0,"(3, 10, )" +TERRAIN_GRASS,10.0,"(5, 15, )" +TERRAIN_STARTPAD,20.0,"(10, 30, )" +MOTORS_TORQUE,80.0,"(0, 200, )" +SPEED_HIP,4.0,"(1e-06, 15, )" +SPEED_KNEE,6.0,"(1e-06, 15, )" +LIDAR_RANGE,5.333333333333333,"(0.5, 20, )" +LEG_DOWN,-0.26666666666666666,"(-2, -0.25, )" +LEG_W,0.26666666666666666,"(0.25, 0.5, )" +LEG_H,1.1333333333333333,"(0.25, 2, )" +INITIAL_RANDOM,5.0,"(0, 50, )" +VIEWPORT_W,600.0,"(400, 1000, )" +VIEWPORT_H,400.0,"(200, 800, )" diff --git a/docs/source/environments/data/context_definitions/CARLCartPoleEnv.csv b/docs/source/environments/data/context_definitions/CARLCartPoleEnv.csv new file mode 100644 index 00000000..1fcb4d0d --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLCartPoleEnv.csv @@ -0,0 +1,7 @@ +Context Feature,Default,Bounds +gravity,9.8,"(0.1, inf, )" +masscart,1.0,"(0.1, 10, )" +masspole,0.1,"(0.01, 1, )" +pole_length,0.5,"(0.05, 5, )" +force_magnifier,10.0,"(1, 100, )" +update_interval,0.02,"(0.002, 0.2, )" diff --git a/docs/source/environments/data/context_definitions/CARLFetch.csv b/docs/source/environments/data/context_definitions/CARLFetch.csv new file mode 100644 index 00000000..84a11cc5 --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLFetch.csv @@ -0,0 +1,10 @@ +Context Feature,Default,Bounds +joint_stiffness,5000.0,"(1, inf, )" +gravity,-9.8,"(-inf, -0.1, )" +friction,0.6,"(-inf, inf, )" +angular_damping,-0.05,"(-inf, inf, )" +actuator_strength,300.0,"(1, inf, )" +joint_angular_damping,35.0,"(0, inf, )" +torso_mass,1.0,"(0.1, inf, )" +target_radius,2.0,"(0.1, inf, )" +target_distance,15.0,"(0.1, inf, )" diff --git a/docs/source/environments/data/context_definitions/CARLGrasp.csv b/docs/source/environments/data/context_definitions/CARLGrasp.csv new file mode 100644 index 00000000..3bf2cac4 --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLGrasp.csv @@ -0,0 +1,10 @@ +Context Feature,Default,Bounds +joint_stiffness,5000.0,"(1, inf, )" +gravity,-9.8,"(-inf, -0.1, )" +friction,0.6,"(-inf, inf, )" +angular_damping,-0.05,"(-inf, inf, )" +actuator_strength,300.0,"(1, inf, )" +joint_angular_damping,50.0,"(0, inf, )" +target_radius,1.1,"(0.1, inf, )" +target_distance,10.0,"(0.1, inf, )" +target_height,8.0,"(0.1, inf, )" diff --git a/docs/source/environments/data/context_definitions/CARLHalfcheetah.csv b/docs/source/environments/data/context_definitions/CARLHalfcheetah.csv new file mode 100644 index 00000000..57186d5c --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLHalfcheetah.csv @@ -0,0 +1,7 @@ +Context Feature,Default,Bounds +joint_stiffness,15000.0,"(1, inf, )" +gravity,-9.8,"(-inf, -0.1, )" +friction,0.6,"(-inf, inf, )" +angular_damping,-0.05,"(-inf, inf, )" +joint_angular_damping,20.0,"(0, inf, )" +torso_mass,9.457333,"(0.1, inf, )" diff --git a/docs/source/environments/data/context_definitions/CARLHumanoid.csv b/docs/source/environments/data/context_definitions/CARLHumanoid.csv new file mode 100644 index 00000000..bef83ec5 --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLHumanoid.csv @@ -0,0 +1,6 @@ +Context Feature,Default,Bounds +gravity,-9.8,"(-inf, -0.1, )" +friction,0.6,"(-inf, inf, )" +angular_damping,-0.05,"(-inf, inf, )" +joint_angular_damping,20.0,"(0, inf, )" +torso_mass,8.907463,"(0.1, inf, )" diff --git a/docs/source/environments/data/context_definitions/CARLLunarLanderEnv.csv b/docs/source/environments/data/context_definitions/CARLLunarLanderEnv.csv new file mode 100644 index 00000000..eba5d5e8 --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLLunarLanderEnv.csv @@ -0,0 +1,17 @@ +Context Feature,Default,Bounds +FPS,50.0,"(1, 500, )" +SCALE,30.0,"(1, 100, )" +MAIN_ENGINE_POWER,13.0,"(0, 50, )" +SIDE_ENGINE_POWER,0.6,"(0, 50, )" +INITIAL_RANDOM,1000.0,"(0, 2000, )" +GRAVITY_X,0.0,"(-20, 20, )" +GRAVITY_Y,-10.0,"(-20, -0.01, )" +LEG_AWAY,20.0,"(0, 50, )" +LEG_DOWN,18.0,"(0, 50, )" +LEG_W,2.0,"(1, 10, )" +LEG_H,8.0,"(1, 20, )" +LEG_SPRING_TORQUE,40.0,"(0, 100, )" +SIDE_ENGINE_HEIGHT,14.0,"(1, 20, )" +SIDE_ENGINE_AWAY,12.0,"(1, 20, )" +VIEWPORT_W,600.0,"(400, 1000, )" +VIEWPORT_H,400.0,"(200, 800, )" diff --git a/docs/source/environments/data/context_definitions/CARLMarioEnv.csv b/docs/source/environments/data/context_definitions/CARLMarioEnv.csv new file mode 100644 index 00000000..e7fbff60 --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLMarioEnv.csv @@ -0,0 +1,5 @@ +Context Feature,Default,Bounds +level_index,0,"(None, None, 'categorical', array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]))" +noise,None *,"(-1.0, 1.0, )" +mario_state,0,"(None, None, 'categorical', [0, 1, 2])" +mario_inertia,0.89,"(0.5, 1.5, )" diff --git a/docs/source/environments/data/context_definitions/CARLMountainCarContinuousEnv.csv b/docs/source/environments/data/context_definitions/CARLMountainCarContinuousEnv.csv new file mode 100644 index 00000000..736fd40f --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLMountainCarContinuousEnv.csv @@ -0,0 +1,11 @@ +Context Feature,Default,Bounds +min_position,-1.2,"(-inf, inf, )" +max_position,0.6,"(-inf, inf, )" +max_speed,0.07,"(0, inf, )" +goal_position,0.45,"(-inf, inf, )" +goal_velocity,0.0,"(-inf, inf, )" +power,0.0015,"(-inf, inf, )" +min_position_start,-0.6,"(-inf, inf, )" +max_position_start,-0.4,"(-inf, inf, )" +min_velocity_start,0.0,"(-inf, inf, )" +max_velocity_start,0.0,"(-inf, inf, )" diff --git a/docs/source/environments/data/context_definitions/CARLMountainCarEnv.csv b/docs/source/environments/data/context_definitions/CARLMountainCarEnv.csv new file mode 100644 index 00000000..1cf3d747 --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLMountainCarEnv.csv @@ -0,0 +1,12 @@ +Context Feature,Default,Bounds +min_position,-1.2,"(-inf, inf, )" +max_position,0.6,"(-inf, inf, )" +max_speed,0.07,"(0, inf, )" +goal_position,0.5,"(-inf, inf, )" +goal_velocity,0.0,"(-inf, inf, )" +force,0.001,"(-inf, inf, )" +gravity,0.0025,"(0, inf, )" +start_position,-0.5,"(-1.5, 0.5, )" +start_position_std,0.1,"(0.1, inf, )" +start_velocity,0.0,"(-inf, inf, )" +start_velocity_std,0.0,"(0.1, inf, )" diff --git a/docs/source/environments/data/context_definitions/CARLPendulumEnv.csv b/docs/source/environments/data/context_definitions/CARLPendulumEnv.csv new file mode 100644 index 00000000..9a69ae61 --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLPendulumEnv.csv @@ -0,0 +1,6 @@ +Context Feature,Default,Bounds +max_speed,8.0,"(-inf, inf, )" +dt,0.05,"(0, inf, )" +g,10.0,"(0, inf, )" +m,1.0,"(1e-06, inf, )" +l,1.0,"(1e-06, inf, )" diff --git a/docs/source/environments/data/context_definitions/CARLRnaDesignEnv.csv b/docs/source/environments/data/context_definitions/CARLRnaDesignEnv.csv new file mode 100644 index 00000000..f303c13d --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLRnaDesignEnv.csv @@ -0,0 +1,6 @@ +Context Feature,Default,Bounds +mutation_threshold,5,"(0.1, inf, )" +reward_exponent,1,"(0.1, inf, )" +state_radius,5,"(1, inf, )" +dataset,eterna,"('eterna', 'rfam_taneda', None)" +target_structure_ids,,"(0, inf, [, ])" diff --git a/docs/source/environments/data/context_definitions/CARLUr5e.csv b/docs/source/environments/data/context_definitions/CARLUr5e.csv new file mode 100644 index 00000000..5b681908 --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLUr5e.csv @@ -0,0 +1,10 @@ +Context Feature,Default,Bounds +joint_stiffness,40000.0,"(1, inf, )" +gravity,-9.81,"(-inf, -0.1, )" +friction,0.6,"(-inf, inf, )" +angular_damping,-0.05,"(-inf, inf, )" +actuator_strength,100.0,"(1, inf, )" +joint_angular_damping,50.0,"(0, 360, )" +target_radius,0.02,"(0.01, inf, )" +target_distance,0.5,"(0.01, inf, )" +torso_mass,1.0,"(0, inf, )" diff --git a/docs/source/environments/data/context_definitions/CARLVehicleRacingEnv.csv b/docs/source/environments/data/context_definitions/CARLVehicleRacingEnv.csv new file mode 100644 index 00000000..8c55a66a --- /dev/null +++ b/docs/source/environments/data/context_definitions/CARLVehicleRacingEnv.csv @@ -0,0 +1,3 @@ +Context Feature,Default,Bounds +VEHICLE,0,"(None, None, 'categorical', array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]))" diff --git a/docs/source/environments/data/screenshots/ant.png b/docs/source/environments/data/screenshots/ant.png new file mode 100644 index 00000000..4883ada3 Binary files /dev/null and b/docs/source/environments/data/screenshots/ant.png differ diff --git a/docs/source/environments/data/screenshots/fetch.png b/docs/source/environments/data/screenshots/fetch.png new file mode 100644 index 00000000..5e5889f5 Binary files /dev/null and b/docs/source/environments/data/screenshots/fetch.png differ diff --git a/docs/source/environments/data/screenshots/grasp.png b/docs/source/environments/data/screenshots/grasp.png new file mode 100644 index 00000000..d0648f9c Binary files /dev/null and b/docs/source/environments/data/screenshots/grasp.png differ diff --git a/docs/source/environments/data/screenshots/halfcheetah.png b/docs/source/environments/data/screenshots/halfcheetah.png new file mode 100644 index 00000000..3f99a44e Binary files /dev/null and b/docs/source/environments/data/screenshots/halfcheetah.png differ diff --git a/docs/source/environments/data/screenshots/humanoid.png b/docs/source/environments/data/screenshots/humanoid.png new file mode 100644 index 00000000..2f0b6cc6 Binary files /dev/null and b/docs/source/environments/data/screenshots/humanoid.png differ diff --git a/docs/source/environments/data/screenshots/learna.png b/docs/source/environments/data/screenshots/learna.png new file mode 100644 index 00000000..9030d232 Binary files /dev/null and b/docs/source/environments/data/screenshots/learna.png differ diff --git a/docs/source/environments/data/screenshots/supermario.png b/docs/source/environments/data/screenshots/supermario.png new file mode 100644 index 00000000..b05858dc Binary files /dev/null and b/docs/source/environments/data/screenshots/supermario.png differ diff --git a/docs/source/environments/data/screenshots/ur5e.png b/docs/source/environments/data/screenshots/ur5e.png new file mode 100644 index 00000000..86e1dc0d Binary files /dev/null and b/docs/source/environments/data/screenshots/ur5e.png differ diff --git a/docs/source/environments/data/screenshots/vehicleracing.png b/docs/source/environments/data/screenshots/vehicleracing.png new file mode 100644 index 00000000..7bcd3c3e Binary files /dev/null and b/docs/source/environments/data/screenshots/vehicleracing.png differ diff --git a/docs/source/environments/data/tab_overview_environments.csv b/docs/source/environments/data/tab_overview_environments.csv new file mode 100644 index 00000000..2dec5e5a --- /dev/null +++ b/docs/source/environments/data/tab_overview_environments.csv @@ -0,0 +1,17 @@ +Env. Family,Name,# Context Features,Action Space,Obs. Space +classic_control,CARLMountainCarEnv,11,Discrete(3),"Box(-inf, inf, (13,), float32)" +classic_control,CARLPendulumEnv,5,"Box(-2.0, 2.0, (1,), float32)","Box(-inf, inf, (8,), float32)" +classic_control,CARLAcrobotEnv,10,Discrete(3),"Box(-28.274333953857422, 282.74334716796875, (16,), float32)" +classic_control,CARLCartPoleEnv,6,Discrete(2),"Box(-3.4028234663852886e+38, inf, (10,), float32)" +classic_control,CARLMountainCarContinuousEnv,10,"Box(-1.0, 1.0, (1,), float32)","Box(-inf, inf, (12,), float32)" +box2d,CARLLunarLanderEnv,16,Discrete(4),"Box(-inf, inf, (24,), float32)" +box2d,CARLVehicleRacingEnv,1,"Box(-1.0, 1.0, (3,), float32)","Box(0, 255, (96, 96, 3), uint8)" +box2d,CARLBipedalWalkerEnv,20,"Box(-1.0, 1.0, (4,), float32)","Box(-inf, inf, (44,), float32)" +brax,CARLAnt,7,"Box(-1.0, 1.0, (8,), float32)","Box(-inf, inf, (94,), float32)" +brax,CARLHalfcheetah,6,"Box(-1.0, 1.0, (6,), float32)","Box(-inf, inf, (29,), float32)" +brax,CARLHumanoid,5,"Box(-1.0, 1.0, (17,), float32)","Box(-inf, inf, (304,), float32)" +brax,CARLFetch,9,"Box(-1.0, 1.0, (10,), float32)","Box(-inf, inf, (110,), float32)" +brax,CARLGrasp,9,"Box(-1.0, 1.0, (19,), float32)","Box(-inf, inf, (141,), float32)" +brax,CARLUr5e,9,"Box(-1.0, 1.0, (6,), float32)","Box(-inf, inf, (75,), float32)" +RNA,CARLRnaDesignEnv,5,Discrete(4),"Box(-inf, inf, (11,), float32)" +Mario,CARLMarioEnv,4,Discrete(10),"Box(0, 255, (4, 64, 64), uint8)" diff --git a/docs/source/figures/CARL_logo.png b/docs/source/figures/CARL_logo.png new file mode 100644 index 00000000..bc753ad1 Binary files /dev/null and b/docs/source/figures/CARL_logo.png differ diff --git a/docs/source/figures/concept.png b/docs/source/figures/concept.png new file mode 100644 index 00000000..65c88e92 Binary files /dev/null and b/docs/source/figures/concept.png differ diff --git a/docs/source/figures/experiments/CARLMarioEnv_mean_ep_rew_over_step_visible_inertia.png b/docs/source/figures/experiments/CARLMarioEnv_mean_ep_rew_over_step_visible_inertia.png new file mode 100644 index 00000000..c3f4c03a Binary files /dev/null and b/docs/source/figures/experiments/CARLMarioEnv_mean_ep_rew_over_step_visible_inertia.png differ diff --git a/docs/source/figures/experiments/gravity_distribution_exp1.png b/docs/source/figures/experiments/gravity_distribution_exp1.png new file mode 100644 index 00000000..b6244dc5 Binary files /dev/null and b/docs/source/figures/experiments/gravity_distribution_exp1.png differ diff --git a/docs/source/figures/experiments/gravity_sampled_gravities.png b/docs/source/figures/experiments/gravity_sampled_gravities.png new file mode 100644 index 00000000..4f75aba0 Binary files /dev/null and b/docs/source/figures/experiments/gravity_sampled_gravities.png differ diff --git a/docs/source/figures/experiments/policytransfer_hiddenvisible_exp1.png b/docs/source/figures/experiments/policytransfer_hiddenvisible_exp1.png new file mode 100644 index 00000000..f156f9b1 Binary files /dev/null and b/docs/source/figures/experiments/policytransfer_hiddenvisible_exp1.png differ diff --git a/docs/source/figures/logo.png b/docs/source/figures/logo.png new file mode 100644 index 00000000..65c88e92 Binary files /dev/null and b/docs/source/figures/logo.png differ diff --git a/docs/source/figures/radar_env_space.png b/docs/source/figures/radar_env_space.png new file mode 100644 index 00000000..85a16f53 Binary files /dev/null and b/docs/source/figures/radar_env_space.png differ diff --git a/environment.yaml b/environment.yaml deleted file mode 100644 index 7159b54d..00000000 --- a/environment.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: carl -channels: - - pytorch - - defaults - - conda-forge - - nvidia -dependencies: - - python=3.9 - - pylint - - rope - - torchvision - - cudatoolkit=11.3 - - cuda-nvcc - - torchaudio - - pytorch - - pybox2d - - pip - - pip: - - gym - - scipy - - ConfigArgParse - - numpy - - pandas - - xvfbwrapper - - matplotlib - - optuna - - dataclasses - - numpyencoder - - pyglet - - pytablewriter - - PyYAML - - tabulate - - brax - - protobuf - - RNA - - Pillow - - py4j - - ray - - seaborn - - stable_baselines3 - - sb3_contrib - - pytest - - debugpy - - hydra-core - - hydra-submitit-launcher - - hydra-optuna-sweeper - - hydra_colorlog - - coax - - wandb \ No newline at end of file diff --git a/examples/try_dm_control.py b/examples/try_dm_control.py index be259a43..48ff1825 100644 --- a/examples/try_dm_control.py +++ b/examples/try_dm_control.py @@ -11,11 +11,6 @@ from carl.envs import CARLDmcWalkerEnv from carl.envs import CARLDmcWalkerEnv_defaults as walker_default from carl.envs import CARLDmcWalkerEnv_mask as walker_mask -from carl.envs import CARLDmcFingerEnv - -import os -os.environ["MUJOCO_GL"] = "glfw" -# os.environ["SDL_VIDEODRIVER"] = "dummy" if __name__ == "__main__": # Load one task: @@ -37,30 +32,14 @@ hide_context=False, dict_observation_space=True, ) - carl_env = CARLDmcFingerEnv() - # carl_env = CARLDmcWalkerEnv() - # carl_env = CARLDmcQuadrupedEnv() - # carl_env = CARLDmcFishEnv() action = carl_env.action_space.sample() - s = carl_env.reset() state, reward, done, info = carl_env.step(action=action) print("state", state, type(state)) - def render(env, **render_kwargs): - frame = carl_env.render(mode="rgb_array", **render_kwargs) - plt.imshow(frame) - plt.axis("off") - plt.tight_layout() - plt.savefig(f"dm_render_{type(env).__name__}.png", dpi=300, bbox_inches='tight',transparent=True, pad_inches=0) - + render = lambda: plt.imshow(carl_env.render(mode="rgb_array")) s = carl_env.reset() - render( - carl_env, - camera_id=1, - height=400, - width=400, - ) - + render() + # plt.savefig("dm_render.png") action = carl_env.action_space.sample() state, reward, done, info = carl_env.step(action=action) print("state", state, type(state)) diff --git a/examples/vary_initial_state_distributions.ipynb b/examples/vary_initial_state_distributions.ipynb index 5c5ba3f8..08a2e42d 100644 --- a/examples/vary_initial_state_distributions.ipynb +++ b/examples/vary_initial_state_distributions.ipynb @@ -2,12 +2,6 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, "source": [ "# Initial State Distributions\n", "For the classic control environments the initial state distributions are narrow. With CARL (Contextually Adapative RL)\n", @@ -16,17 +10,17 @@ "\n", "## Collect Initial States\n", "We collect the initial states, once of the classic environments and once of the contextually extended environments." - ] - }, - { - "cell_type": "code", - "execution_count": 3, + ], "metadata": { "collapsed": false, "pycharm": { - "name": "#%%\n" + "name": "#%% md\n" } - }, + } + }, + { + "cell_type": "code", + "execution_count": 3, "outputs": [ { "name": "stderr", @@ -151,18 +145,11 @@ ], "source": [ "import sys\n", - "\n", "sys.path.append(\"..\")\n", "import importlib\n", "import carl.envs\n", - "\n", "importlib.reload(carl.envs)\n", - "from carl.envs import (\n", - " CARLCartPoleEnv,\n", - " CARLAcrobotEnv,\n", - " CARLMountainCarEnv,\n", - " CARLPendulumEnv,\n", - ")\n", + "from carl.envs import CARLCartPoleEnv, CARLAcrobotEnv, CARLMountainCarEnv, CARLPendulumEnv\n", "import numpy as np\n", "from tqdm import tqdm\n", "from typing import Dict\n", @@ -195,10 +182,10 @@ "contexts_mountaincar = dict()\n", "for i in range(n_contexts):\n", " # Sample for Acrobot\n", - " initial_angle_lower = np.random.uniform(-np.pi, 0)\n", - " initial_angle_upper = np.random.uniform(min(0, initial_angle_lower), np.pi)\n", - " initial_velocity_lower = np.random.uniform(-1, 0)\n", - " initial_velocity_upper = np.random.uniform(min(0, initial_velocity_lower), 1)\n", + " initial_angle_lower = np.random.uniform(- np.pi, 0)\n", + " initial_angle_upper = np.random.uniform(min(0,initial_angle_lower), np.pi)\n", + " initial_velocity_lower = np.random.uniform(- 1, 0)\n", + " initial_velocity_upper = np.random.uniform(min(0,initial_velocity_lower), 1)\n", " context = {\n", " \"initial_angle_lower\": initial_angle_lower,\n", " \"initial_angle_upper\": initial_angle_upper,\n", @@ -209,7 +196,7 @@ "\n", " # Sample for CartPole\n", " initial_state_lower = np.random.uniform(-2, 0)\n", - " initial_state_upper = np.random.uniform(min(0, initial_state_lower), 2)\n", + " initial_state_upper = np.random.uniform(min(0,initial_state_lower), 2)\n", " context = {\n", " \"initial_state_lower\": initial_state_lower,\n", " \"initial_state_upper\": initial_state_upper,\n", @@ -250,39 +237,37 @@ "# Collect initial states\n", "renders, states = get_renders(env_specs=env_specs)\n", "renders_classic, states_classic = get_renders(env_specs=env_specs_classic)" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "collapsed": false, "pycharm": { - "name": "#%% md\n" + "name": "#%%\n" } - }, + } + }, + { + "cell_type": "markdown", "source": [ "## Classic vs. Contextual Initial State Distributions\n", "The contextually extended CARL environments show a much higher variety in the initial state distributions\n", "thus aiming for generalization starting from the first state.\n", "The figure displays the average of 50 initial states." - ] - }, - { - "cell_type": "code", - "execution_count": 4, + ], "metadata": { "collapsed": false, "pycharm": { - "name": "#%%\n" + "name": "#%% md\n" } - }, + } + }, + { + "cell_type": "code", + "execution_count": 4, "outputs": [ { "data": { - "image/png": "", - "text/plain": [ - "
" - ] + "text/plain": "
", + "image/png": "\n" }, "metadata": {}, "output_type": "display_data" @@ -295,7 +280,10 @@ "\n", "title = f\"Initial State Distributions ($n_{{initial states}} = {n_initial_states}$)\"\n", "\n", - "data = {\"classic\": renders_classic, \"contextual\": renders}\n", + "data = {\n", + " \"classic\": renders_classic,\n", + " \"contextual\": renders\n", + "}\n", "nrows = len(data)\n", "ncols = len(data[\"classic\"])\n", "enlarge = 3\n", @@ -305,7 +293,7 @@ " for j, (env_name, _renders) in enumerate(renders.items()):\n", " ax = axes[i, j]\n", " _renders = np.array(_renders)\n", - " render = np.mean(_renders, axis=0) / 255\n", + " render = np.mean(_renders, axis=0)/255\n", " ax.imshow(render)\n", " # ax.axis('off')\n", " ax.set_xticks([])\n", @@ -319,12 +307,18 @@ "\n", "fig.set_tight_layout(True)\n", "plt.show()" - ] + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } } ], "metadata": { "kernelspec": { - "display_name": "Python 3.9.5 ('testvenv': venv)", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -338,14 +332,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" - }, - "vscode": { - "interpreter": { - "hash": "a36dcd8ce3fbd929ee047bf0480a76de49c6a54240010afb1cc974faf11ebb5e" - } + "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/setup.py b/setup.py index da867e13..ae5cd938 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,6 @@ def read_file(filepath: str) -> str: extras_require = { "box2d": [ - "swig", "gym[box2d]==0.24.1", ], "brax": [ @@ -33,12 +32,9 @@ def read_file(filepath: str) -> str: "dm_control>=1.0.3", ], "mario": [ - "torch", - "py4j", - "Pillow", - "opencv-python", - "xvfbwrapper", - "jdk4py" + "torch>=1.9.0", + "Pillow>=8.3.1", + "py4j>=0.10.9.2", ], "dev": [ "pytest>=6.1.1", @@ -55,13 +51,6 @@ def read_file(filepath: str) -> str: "sphinx-gallery>=0.10.0", "image>=1.5.33", "sphinx-autoapi>=1.8.4", - "gym[box2d]==0.24.1", - "brax>=0.0.10", - "protobuf>=3.17.3", - "dm_control>=1.0.3", - "torch>=1.9.0", - "Pillow>=8.3.1", - "py4j>=0.10.9.2" ] } diff --git a/test/test_all_envs.py b/test/test_all_envs.py index a39c51a5..141b06b4 100644 --- a/test/test_all_envs.py +++ b/test/test_all_envs.py @@ -16,7 +16,6 @@ def test_init_all_envs(self): env = ( # noqa: F841 local variable is assigned to but never used var() ) - obs = env.reset() except Exception as e: print(f"Cannot instantiate {var} environment.") raise e diff --git a/test/test_evaluation_protocol.py b/test/test_evaluation_protocol.py deleted file mode 100644 index aac1d6bf..00000000 --- a/test/test_evaluation_protocol.py +++ /dev/null @@ -1,57 +0,0 @@ -import unittest - -from experiments.evaluation_protocol.evaluation_protocol import ( - ContextFeature, - EvaluationProtocol, -) -from experiments.evaluation_protocol.plot_evaluate_on_protocol import ( - plot_evaluation_protocol, -) - - -class TestEvaluationProtocol(unittest.TestCase): - def test_context_creation(self): - cf0 = ContextFeature("g", 9.0, 9.5, 10.0, 11.0) - cf1 = ContextFeature("l", 0.4, 0.5, 0.6, 0.8) - seed = None - n_contexts = 100 - context_features = [cf0, cf1] - modes = ["A", "B", "C"] - for mode in modes: - ep = EvaluationProtocol( - context_features=context_features, mode=mode, seed=seed - ) - contexts_train = ep.create_train_contexts(n=n_contexts) - contexts_ES = ep.create_contexts_extrapolation_single( - n=n_contexts - ) # covers two quadrants - contexts_EA = ep.create_contexts_extrapolation_all(n=n_contexts) - contexts_I = ep.create_contexts_interpolation( - n=n_contexts, contexts_forbidden=contexts_train - ) - contexts_IC = ep.create_contexts_interpolation_combinatorial( - n=n_contexts, contexts_forbidden=contexts_train - ) - contexts_dict = { - "train": contexts_train, - "test_interpolation": contexts_I, - "test_interpolation_combinatorial": contexts_IC, - "test_extrapolation_single": contexts_ES, - "test_extrapolation_all": contexts_EA, - } - for c_id, C in contexts_dict.items(): - if len(C) != 0: - self.assertTrue( - len(C) == n_contexts, - msg=f"Number of contexts {len(C)} not equal to desired number {n_contexts} for {c_id}.", - ) - - def test_plot(self): - cf0 = ContextFeature("g", 9.0, 9.5, 10.0, 11.0) - cf1 = ContextFeature("l", 0.4, 0.5, 0.6, 0.8) - seed = 1 - n_contexts = 100 - context_features = [cf0, cf1] - plot_evaluation_protocol( - context_features=context_features, seed=seed, n_contexts=n_contexts - ) diff --git a/test/test_sampling.py b/test/test_sampling.py deleted file mode 100644 index f622a9a6..00000000 --- a/test/test_sampling.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest -from carl.context.sampling import get_default_context_and_bounds -from experiments.carlbench.context_sampling import ContextSampler - - -class TestSampling(unittest.TestCase): - def test_get_default_context_and_bounds(self): - env_name = "CARLPendulumEnv" - env_defaults, env_bounds = get_default_context_and_bounds(env_name=env_name) - defaults = {"max_speed": 8.0, "dt": 0.05, "g": 10.0, "m": 1.0, "l": 1.0} - self.assertDictEqual(env_defaults, defaults) - - def test_context_sampler(self): - env_name = "CARLPendulumEnv" - cs = ContextSampler( - env_name=env_name, - difficulty="easy", - n_samples=1, - context_feature_names=["m", "l", "g"], - seed=455, - ) - contexts = cs.sample_contexts() - true_dict = { - 0: { - "max_speed": 8.0, - "dt": 0.05, - "g": 9.748400206554313, - "m": 0.8727822986909317, - "l": 0.9215523401261485, - } - } - for context_sampled, context_true in zip(contexts.values(), true_dict.values()): - for k in context_sampled.keys(): - self.assertAlmostEqual(context_sampled[k], context_true[k])