ray-project · sven1977 · Oct 28, 2024 · May 14, 2024 · May 14, 2024 · May 14, 2024
@@ -4,7 +4,7 @@
 try:
     import gymnasium as gym
 
-    env = gym.make("ALE/Pong-v5")
+    env = gym.make("ale_py:ALE/Pong-v5")
     obs, infos = env.reset()
 except Exception:
     import gym

@@ -285,7 +285,7 @@ in roughly 5min. It can be run like this on a single g5.24xlarge (or g6.24xlarge
 .. code-block:: bash
 
     $ cd ray/rllib/tuned_examples/ppo
-    $ python atari_ppo.py --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95
+    $ python atari_ppo.py --env=ale_py:ALE/Pong-v5 --num-gpus=4 --num-env-runners=95
 
 Note that some of the files in this folder are used for RLlib's daily or weekly
 release tests as well.

diff --git a/python/requirements.txt b/python/requirements.txt
@@ -37,7 +37,7 @@ colorful
 rich
 opentelemetry-sdk
 fastapi
-gymnasium==0.28.1
+gymnasium==1.0.0a2
 virtualenv!=20.21.1,>=20.0.24
 opentelemetry-api
 opencensus

diff --git a/python/requirements/ml/rllib-test-requirements.txt b/python/requirements/ml/rllib-test-requirements.txt
@@ -3,22 +3,18 @@
 # Environment adapters.
 # ---------------------
 # Atari
-gymnasium==0.28.1
 imageio==2.31.1
-ale_py==0.8.1
+ale_py==0.9.0
 # For testing MuJoCo envs with gymnasium.
 mujoco==2.3.6
 dm_control==1.0.12
 
 # For tests on PettingZoo's multi-agent envs.
-pettingzoo==1.23.1
-# When installing pettingzoo, chess is missing, even though its a dependancy
-# TODO: remove if a future pettingzoo and/or ray version fixes this dependancy issue.
-chess==1.7.0
+pettingzoo==1.24.3
 pymunk==6.2.1
-supersuit==3.8.0
 tinyscaler==1.2.6
 shimmy
+supersuit==3.9.0
 
 # Kaggle envs.
 kaggle_environments==1.7.11
@@ -29,12 +25,6 @@ mlagents_envs==0.28.0
 
 # For tests on minigrid.
 minigrid
-# For tests on RecSim and Kaggle envs.
-# Explicitly depends on `tensorflow` and doesn't accept `tensorflow-macos`
-recsim==0.2.4; (sys_platform != 'darwin' or platform_machine != 'arm64')
-# recsim depends on dopamine-rl, but dopamine-rl pins gym <= 0.25.2, which break some envs
-dopamine-rl==4.0.5; (sys_platform != 'darwin' or platform_machine != 'arm64')
-tensorflow_estimator
 # DeepMind's OpenSpiel
 open-spiel==1.4
 

diff --git a/python/requirements_compiled.txt b/python/requirements_compiled.txt
@@ -75,10 +75,10 @@ aiosqlite==0.19.0
     # via ypy-websocket
 alabaster==0.7.13
     # via sphinx
-ale-py==0.8.1
+ale-py==0.9.0
     # via
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
-    #   gym
+    #   gymnasium
 alembic==1.12.1
     # via
     #   aim
@@ -274,8 +274,6 @@ charset-normalizer==3.3.2
     # via
     #   aiohttp
     #   requests
-chess==1.7.0
-    # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 chex==0.1.7
     # via optax
 clang-format==12.0.1
@@ -306,7 +304,6 @@ cloudpickle==2.2.0
     #   -r /ray/ci/../python/requirements/test-requirements.txt
     #   dask
     #   distributed
-    #   gym
     #   gymnasium
     #   hyperopt
     #   mlagents-envs
@@ -701,13 +698,7 @@ gsutil==5.27
     # via -r /ray/ci/../python/requirements/docker/ray-docker-requirements.txt
 gunicorn==20.1.0
     # via mlflow
-gym==0.26.2
-    # via
-    #   dopamine-rl
-    #   recsim
-gym-notices==0.0.8
-    # via gym
-gymnasium==0.28.1
+gymnasium==1.0.0a2
     # via
     #   -r /ray/ci/../python/requirements.txt
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
@@ -1256,7 +1247,6 @@ numpy==1.24.4
     #   flax
     #   gpy
     #   gradio
-    #   gym
     #   gymnasium
     #   h5py
     #   hebo
@@ -1302,7 +1292,6 @@ numpy==1.24.4
     #   pytorch-lightning
     #   pywavelets
     #   raydp
-    #   recsim
     #   scikit-image
     #   scikit-learn
     #   scipy
@@ -1501,7 +1490,7 @@ pbr==6.0.0
     #   sarif-om
 peewee==3.17.0
     # via semgrep
-pettingzoo==1.23.1
+pettingzoo==1.24.3
     # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 pexpect==4.8.0
     # via
@@ -1871,8 +1860,6 @@ querystring-parser==1.2.4
     #   tune-sklearn
 raydp==1.7.0b20231020.dev0
     # via -r /ray/ci/../python/requirements/ml/data-test-requirements.txt
-recsim==0.2.4 ; sys_platform != "darwin" or platform_machine != "arm64"
-    # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 redis==4.4.2
     # via -r /ray/ci/../python/requirements/test-requirements.txt
 regex==2023.10.3
@@ -2177,7 +2164,7 @@ statsmodels==0.14.0
     # via
     #   hpbandster
     #   statsforecast
-supersuit==3.8.0
+supersuit==3.9.0
     # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 sympy==1.12
     # via

@@ -295,7 +295,7 @@ def get_packages(self):
 
     setup_spec.extras["rllib"] = setup_spec.extras["tune"] + [
         "dm_tree",
-        "gymnasium==0.28.1",
+        "gymnasium==1.0.0a2",
         "lz4",
         "scikit-image",
         "pyyaml",

@@ -113,7 +113,7 @@ aiosignal==1.3.1 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   aiohttp
-ale-py==0.8.1 \
+ale-py==0.9.0 \
     --hash=sha256:0006d80dfe7745eb5a93444492337203c8bc7eb594a2c24c6a651c5c5b0eaf09 \
     --hash=sha256:0856ca777473ec4ae8a59f3af9580259adb0fd4a47d586a125a440c62e82fc10 \
     --hash=sha256:0ffecb5c956749596030e464827642945162170a132d093c3d4fa2d7e5725c18 \
@@ -1231,17 +1231,6 @@ gsutil==5.27 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   -r release/ray_release/byod/requirements_byod_3.9.in
-gym[atari]==0.26.2 \
-    --hash=sha256:e0d882f4b54f0c65f203104c24ab8a38b039f1289986803c7d02cdbe214fbcc4
-    # via
-    #   -c release/ray_release/byod/requirements_compiled.txt
-    #   -r release/ray_release/byod/requirements_byod_3.9.in
-gym-notices==0.0.8 \
-    --hash=sha256:ad25e200487cafa369728625fe064e88ada1346618526102659b4640f2b4b911 \
-    --hash=sha256:e5f82e00823a166747b4c2a07de63b6560b1acb880638547e0cabf825a01e463
-    # via
-    #   -c release/ray_release/byod/requirements_compiled.txt
-    #   gym
 h5py==3.10.0 \
     --hash=sha256:012ab448590e3c4f5a8dd0f3533255bc57f80629bf7c5054cf4c87b30085063c \
     --hash=sha256:212bb997a91e6a895ce5e2f365ba764debeaef5d2dca5c6fb7098d66607adf99 \
@@ -1707,7 +1696,6 @@ numpy==1.24.4 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   ale-py
-    #   gym
     #   h5py
     #   lightgbm
     #   ml-dtypes

@@ -3110,7 +3110,7 @@ def is_atari(self) -> bool:
         # Not yet determined, try to figure this out.
         if self._is_atari is None:
             # Atari envs are usually specified via a string like "PongNoFrameskip-v4"
-            # or "ALE/Breakout-v5".
+            # or "ale_py:ALE/Breakout-v5".
             # We do NOT attempt to auto-detect Atari env for other specified types like
             # a callable, to avoid running heavy logics in validate().
             # For these cases, users can explicitly set `environment(atari=True)`.

@@ -64,7 +64,7 @@ def test_dreamerv3_compilation(self):
         for env in [
             "FrozenLake-v1",
             "CartPole-v1",
-            "ALE/MsPacman-v5",
+            "ale_py:ALE/MsPacman-v5",
             "Pendulum-v1",
         ]:
             print("Env={}".format(env))

@@ -12,6 +12,7 @@
 from typing import List, Tuple
 
 import gymnasium as gym
+from gymnasium.wrappers.vector import DictInfoToList
 import numpy as np
 import tree  # pip install dm_tree
 
@@ -73,7 +74,7 @@ def __init__(
 
         # Create the gym.vector.Env object.
         # Atari env.
-        if self.config.env.startswith("ALE/"):
+        if "ALE/" in self.config.env:
             # TODO (sven): This import currently causes a Tune test to fail. Either way,
             #  we need to figure out how to properly setup the CI environment with
             #  the correct versions of all gymnasium-related packages.
@@ -160,11 +161,15 @@ def _entry_point():
                     env_descriptor=self.config.env,
                 ),
             )
-            # Create the vectorized gymnasium env.
-            self.env = gym.vector.make(
-                "dreamerv3-custom-env-v0",
-                num_envs=self.config.num_envs_per_env_runner,
-                asynchronous=False,  # self.config.remote_worker_envs,
+            # Wrap into `DictInfoToList` wrapper to get infos as lists.
+            self.env = DictInfoToList(
+                gym.make_vec(
+                    "dreamerv3-custom-env-v0",
+                    num_envs=self.config.num_envs_per_env_runner,
+                    vectorization_mode=(
+                        "async" if self.config.remote_worker_envs else "sync"
+                    ),
+                )
             )
         self.num_envs = self.env.num_envs
         assert self.num_envs == self.config.num_envs_per_env_runner

@@ -160,7 +160,7 @@ def test_ppo_compilation_w_connectors(self):
         num_iterations = 2
 
         for fw in framework_iterator(config):
-            for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]:
+            for env in ["FrozenLake-v1", "ale_py:ALE/MsPacman-v5"]:
                 print("Env={}".format(env))
                 for lstm in [False, True]:
                     print("LSTM={}".format(lstm))
@@ -226,7 +226,7 @@ def test_ppo_compilation_and_schedule_mixins(self):
         num_iterations = 2
 
         for fw in framework_iterator(config):
-            for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]:
+            for env in ["FrozenLake-v1", "ale_py:ALE/MsPacman-v5"]:
                 print("Env={}".format(env))
                 for lstm in [False, True]:
                     print("LSTM={}".format(lstm))

@@ -140,7 +140,7 @@ def tearDownClass(cls):
     def test_rollouts(self):
         # TODO: Add FrozenLake-v1 to cover LSTM case.
         frameworks = ["torch", "tf2"]
-        env_names = ["CartPole-v1", "Pendulum-v1", "ALE/Breakout-v5"]
+        env_names = ["CartPole-v1", "Pendulum-v1", "ale_py:ALE/Breakout-v5"]
         fwd_fns = ["forward_exploration", "forward_inference"]
         lstm = [True, False]
         config_combinations = [frameworks, env_names, fwd_fns, lstm]
@@ -181,7 +181,7 @@ def test_rollouts(self):
     def test_forward_train(self):
         # TODO: Add FrozenLake-v1 to cover LSTM case.
         frameworks = ["tf2", "torch"]
-        env_names = ["CartPole-v1", "Pendulum-v1", "ALE/Breakout-v5"]
+        env_names = ["CartPole-v1", "Pendulum-v1", "ale_py:ALE/Breakout-v5"]
         lstm = [False, True]
         config_combinations = [frameworks, env_names, lstm]
         for config in itertools.product(*config_combinations):

diff --git a/rllib/algorithms/ppo/tests/test_ppo_with_env_runner.py b/rllib/algorithms/ppo/tests/test_ppo_with_env_runner.py
@@ -103,7 +103,7 @@ def test_ppo_compilation_and_schedule_mixins(self):
                 # "CliffWalking-v0",
                 "CartPole-v1",
                 "Pendulum-v1",
-            ]:  # "ALE/Breakout-v5"]:
+            ]:  # "ale_py:ALE/Breakout-v5"]:
                 print("Env={}".format(env))
                 for lstm in [False]:
                     print("LSTM={}".format(lstm))

diff --git a/rllib/algorithms/ppo/tests/test_ppo_with_rl_module.py b/rllib/algorithms/ppo/tests/test_ppo_with_rl_module.py
@@ -99,7 +99,7 @@ def test_ppo_compilation_and_schedule_mixins(self):
 
         for fw in framework_iterator(config, frameworks=("tf2", "torch")):
             # TODO (Kourosh) Bring back "FrozenLake-v1"
-            for env in ["CartPole-v1", "Pendulum-v1", "ALE/Breakout-v5"]:
+            for env in ["CartPole-v1", "Pendulum-v1", "ale_py:ALE/Breakout-v5"]:
                 print("Env={}".format(env))
                 for lstm in [False]:
                     print("LSTM={}".format(lstm))

@@ -145,11 +145,11 @@ def test_rollout_fragment_length(self):
     def test_detect_atari_env(self):
         """Tests that we can properly detect Atari envs."""
         config = AlgorithmConfig().environment(
-            env="ALE/Breakout-v5", env_config={"frameskip": 1}
+            env="ale_py:ALE/Breakout-v5", env_config={"frameskip": 1}
         )
         self.assertTrue(config.is_atari)
 
-        config = AlgorithmConfig().environment(env="ALE/Pong-v5")
+        config = AlgorithmConfig().environment(env="ale_py:ALE/Pong-v5")
         self.assertTrue(config.is_atari)
 
         config = AlgorithmConfig().environment(env="CartPole-v1")
@@ -158,7 +158,7 @@ def test_detect_atari_env(self):
 
         config = AlgorithmConfig().environment(
             env=lambda ctx: gym.make(
-                "ALE/Breakout-v5",
+                "ale_py:ALE/Breakout-v5",
                 frameskip=1,
             )
         )