From d34b0d9606cb0dbd7425790dfe73ea5cc472811b Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Sun, 12 Mar 2023 17:31:26 -0700 Subject: [PATCH 01/11] Remove conversions to config Signed-off-by: Artur Niederfahrenhorst --- rllib/algorithms/a3c/a3c_tf_policy.py | 2 -- rllib/algorithms/a3c/a3c_torch_policy.py | 2 -- rllib/algorithms/appo/appo_tf_policy.py | 4 ---- rllib/algorithms/ars/ars.py | 2 +- rllib/algorithms/ars/tests/test_ars.py | 2 +- rllib/algorithms/bandit/bandit_torch_policy.py | 2 -- rllib/algorithms/ddpg/ddpg_tf_policy.py | 4 ---- rllib/algorithms/dreamer/dreamer_torch_policy.py | 3 --- rllib/algorithms/impala/impala_tf_policy.py | 4 ---- rllib/algorithms/impala/tf/impala_tf_policy_rlm.py | 3 --- rllib/algorithms/maddpg/maddpg_tf_policy.py | 1 - rllib/algorithms/maml/maml_tf_policy.py | 1 - rllib/algorithms/marwil/marwil_tf_policy.py | 4 ---- rllib/algorithms/marwil/marwil_torch_policy.py | 4 ---- rllib/algorithms/mbmpo/mbmpo_torch_policy.py | 1 - rllib/algorithms/ppo/ppo_tf_policy.py | 1 - rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py | 1 - rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py | 1 - rllib/algorithms/qmix/qmix_policy.py | 1 - rllib/algorithms/simple_q/simple_q_tf_policy.py | 6 ------ rllib/algorithms/simple_q/simple_q_torch_policy.py | 3 --- rllib/policy/eager_tf_policy.py | 7 +++---- rllib/policy/policy_template.py | 8 +++----- rllib/policy/tf_policy_template.py | 4 +--- 24 files changed, 9 insertions(+), 62 deletions(-) diff --git a/rllib/algorithms/a3c/a3c_tf_policy.py b/rllib/algorithms/a3c/a3c_tf_policy.py index b093c10fb3f21..a85a73e25bf60 100644 --- a/rllib/algorithms/a3c/a3c_tf_policy.py +++ b/rllib/algorithms/a3c/a3c_tf_policy.py @@ -58,8 +58,6 @@ def __init__( # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() - config = dict(ray.rllib.algorithms.a3c.a3c.A3CConfig().to_dict(), **config) - # Initialize base class. base.__init__( self, diff --git a/rllib/algorithms/a3c/a3c_torch_policy.py b/rllib/algorithms/a3c/a3c_torch_policy.py index 8fd542059af9e..81836e765e9e8 100644 --- a/rllib/algorithms/a3c/a3c_torch_policy.py +++ b/rllib/algorithms/a3c/a3c_torch_policy.py @@ -30,8 +30,6 @@ class A3CTorchPolicy( """PyTorch Policy class used with A3C.""" def __init__(self, observation_space, action_space, config): - config = dict(ray.rllib.algorithms.a3c.a3c.A3CConfig().to_dict(), **config) - TorchPolicyV2.__init__( self, observation_space, diff --git a/rllib/algorithms/appo/appo_tf_policy.py b/rllib/algorithms/appo/appo_tf_policy.py index dc1aea59f7816..1bf98b7220d62 100644 --- a/rllib/algorithms/appo/appo_tf_policy.py +++ b/rllib/algorithms/appo/appo_tf_policy.py @@ -83,10 +83,6 @@ def __init__( # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() - config = dict( - ray.rllib.algorithms.appo.appo.APPOConfig().to_dict(), **config - ) - # Although this is a no-op, we call __init__ here to make it clear # that base.__init__ will use the make_model() call. VTraceClipGradients.__init__(self) diff --git a/rllib/algorithms/ars/ars.py b/rllib/algorithms/ars/ars.py index 3ac9067b669b2..96c97279b9fff 100644 --- a/rllib/algorithms/ars/ars.py +++ b/rllib/algorithms/ars/ars.py @@ -386,7 +386,7 @@ def setup(self, config: AlgorithmConfig): self._policy_class = get_policy_class(self.config) self.policy = self._policy_class( - env.observation_space, env.action_space, self.config.to_dict() + env.observation_space, env.action_space, self.config ) self.optimizer = optimizers.SGD(self.policy, self.config.sgd_stepsize) diff --git a/rllib/algorithms/ars/tests/test_ars.py b/rllib/algorithms/ars/tests/test_ars.py index f2da7c84735bc..22058d79373d3 100644 --- a/rllib/algorithms/ars/tests/test_ars.py +++ b/rllib/algorithms/ars/tests/test_ars.py @@ -8,7 +8,7 @@ class TestARS(unittest.TestCase): @classmethod def setUpClass(cls): - ray.init(num_cpus=3) + ray.init(num_cpus=3, local_mode=True) @classmethod def tearDownClass(cls): diff --git a/rllib/algorithms/bandit/bandit_torch_policy.py b/rllib/algorithms/bandit/bandit_torch_policy.py index 60a39fb6b9389..4f50104ddf7c8 100644 --- a/rllib/algorithms/bandit/bandit_torch_policy.py +++ b/rllib/algorithms/bandit/bandit_torch_policy.py @@ -23,8 +23,6 @@ class BanditTorchPolicy(TorchPolicyV2): def __init__(self, observation_space, action_space, config): - config = dict(ray.rllib.algorithms.bandit.bandit.DEFAULT_CONFIG, **config) - TorchPolicyV2.__init__( self, observation_space, diff --git a/rllib/algorithms/ddpg/ddpg_tf_policy.py b/rllib/algorithms/ddpg/ddpg_tf_policy.py index 2a08b49599fa7..5db1728ad9546 100644 --- a/rllib/algorithms/ddpg/ddpg_tf_policy.py +++ b/rllib/algorithms/ddpg/ddpg_tf_policy.py @@ -89,10 +89,6 @@ def __init__( # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() - config = dict( - ray.rllib.algorithms.ddpg.ddpg.DDPGConfig().to_dict(), **config - ) - # Validate action space for DDPG validate_spaces(self, observation_space, action_space) diff --git a/rllib/algorithms/dreamer/dreamer_torch_policy.py b/rllib/algorithms/dreamer/dreamer_torch_policy.py index 79c029dd32456..0ae11b4673529 100644 --- a/rllib/algorithms/dreamer/dreamer_torch_policy.py +++ b/rllib/algorithms/dreamer/dreamer_torch_policy.py @@ -32,9 +32,6 @@ class DreamerTorchPolicy(TorchPolicyV2): def __init__(self, observation_space, action_space, config): - - config = dict(ray.rllib.algorithms.dreamer.DreamerConfig().to_dict(), **config) - TorchPolicyV2.__init__( self, observation_space, diff --git a/rllib/algorithms/impala/impala_tf_policy.py b/rllib/algorithms/impala/impala_tf_policy.py index f3c762b7ad638..415434ee6b399 100644 --- a/rllib/algorithms/impala/impala_tf_policy.py +++ b/rllib/algorithms/impala/impala_tf_policy.py @@ -286,10 +286,6 @@ def __init__( # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() - config = dict( - ray.rllib.algorithms.impala.impala.ImpalaConfig().to_dict(), **config - ) - # Initialize base class. base.__init__( self, diff --git a/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py b/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py index ba8a7310d92fe..efab8a5fad4fc 100644 --- a/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py +++ b/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py @@ -30,9 +30,6 @@ class ImpalaTfPolicyWithRLModule( EagerTFPolicyV2, ): def __init__(self, observation_space, action_space, config): - config = dict( - ray.rllib.algorithms.impala.impala.ImpalaConfig().to_dict(), **config - ) validate_config(config) EagerTFPolicyV2.enable_eager_execution_if_necessary() EagerTFPolicyV2.__init__(self, observation_space, action_space, config) diff --git a/rllib/algorithms/maddpg/maddpg_tf_policy.py b/rllib/algorithms/maddpg/maddpg_tf_policy.py index 8b37e4bbf1634..5cbe8d6da5fe8 100644 --- a/rllib/algorithms/maddpg/maddpg_tf_policy.py +++ b/rllib/algorithms/maddpg/maddpg_tf_policy.py @@ -43,7 +43,6 @@ def postprocess_trajectory( class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): def __init__(self, obs_space, act_space, config): # _____ Initial Configuration - config = dict(ray.rllib.algorithms.maddpg.maddpg.DEFAULT_CONFIG, **config) self.config = config self.global_step = tf1.train.get_or_create_global_step() diff --git a/rllib/algorithms/maml/maml_tf_policy.py b/rllib/algorithms/maml/maml_tf_policy.py index 4484a691849e5..b77d51968957b 100644 --- a/rllib/algorithms/maml/maml_tf_policy.py +++ b/rllib/algorithms/maml/maml_tf_policy.py @@ -380,7 +380,6 @@ def __init__( # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() - config = dict(ray.rllib.algorithms.maml.maml.DEFAULT_CONFIG, **config) validate_config(config) # Initialize base class. diff --git a/rllib/algorithms/marwil/marwil_tf_policy.py b/rllib/algorithms/marwil/marwil_tf_policy.py index 84511bb040303..422a0dd4f7cbf 100644 --- a/rllib/algorithms/marwil/marwil_tf_policy.py +++ b/rllib/algorithms/marwil/marwil_tf_policy.py @@ -174,10 +174,6 @@ def __init__( # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() - config = dict( - ray.rllib.algorithms.marwil.marwil.MARWILConfig().to_dict(), **config - ) - # Initialize base class. base.__init__( self, diff --git a/rllib/algorithms/marwil/marwil_torch_policy.py b/rllib/algorithms/marwil/marwil_torch_policy.py index 1b0a63ea730a3..7578ac8abba27 100644 --- a/rllib/algorithms/marwil/marwil_torch_policy.py +++ b/rllib/algorithms/marwil/marwil_torch_policy.py @@ -21,10 +21,6 @@ class MARWILTorchPolicy(ValueNetworkMixin, PostprocessAdvantages, TorchPolicyV2) """PyTorch policy class used with Marwil.""" def __init__(self, observation_space, action_space, config): - config = dict( - ray.rllib.algorithms.marwil.marwil.MARWILConfig().to_dict(), **config - ) - TorchPolicyV2.__init__( self, observation_space, diff --git a/rllib/algorithms/mbmpo/mbmpo_torch_policy.py b/rllib/algorithms/mbmpo/mbmpo_torch_policy.py index af969eb508769..821248f672942 100644 --- a/rllib/algorithms/mbmpo/mbmpo_torch_policy.py +++ b/rllib/algorithms/mbmpo/mbmpo_torch_policy.py @@ -34,7 +34,6 @@ def __init__(self, observation_space, action_space, config): "or using the multi-agent API." ) - config = dict(ray.rllib.algorithms.mbmpo.mbmpo.DEFAULT_CONFIG, **config) super().__init__(observation_space, action_space, config) def make_model_and_action_dist( diff --git a/rllib/algorithms/ppo/ppo_tf_policy.py b/rllib/algorithms/ppo/ppo_tf_policy.py index 905d2515d3b48..8319b4d9e0a22 100644 --- a/rllib/algorithms/ppo/ppo_tf_policy.py +++ b/rllib/algorithms/ppo/ppo_tf_policy.py @@ -74,7 +74,6 @@ def __init__( # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() - config = dict(ray.rllib.algorithms.ppo.ppo.PPOConfig().to_dict(), **config) # TODO: Move into Policy API, if needed at all here. Why not move this into # `PPOConfig`?. validate_config(config) diff --git a/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py b/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py index aeffe2397eacb..b1eebfb1b2e32 100644 --- a/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py +++ b/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py @@ -59,7 +59,6 @@ class PPOTfPolicyWithRLModule( """ def __init__(self, observation_space, action_space, config): - config = dict(ray.rllib.algorithms.ppo.ppo.PPOConfig().to_dict(), **config) # TODO: Move into Policy API, if needed at all here. Why not move this into # `PPOConfig`?. validate_config(config) diff --git a/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py b/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py index c2c0169285593..9aea0cfce358c 100644 --- a/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py +++ b/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py @@ -61,7 +61,6 @@ class PPOTorchPolicyWithRLModule( """ def __init__(self, observation_space, action_space, config): - config = dict(ray.rllib.algorithms.ppo.ppo.PPOConfig().to_dict(), **config) # TODO: Move into Policy API, if needed at all here. Why not move this into # `PPOConfig`?. validate_config(config) diff --git a/rllib/algorithms/qmix/qmix_policy.py b/rllib/algorithms/qmix/qmix_policy.py index 63ae4e724c685..edc4af906cec7 100644 --- a/rllib/algorithms/qmix/qmix_policy.py +++ b/rllib/algorithms/qmix/qmix_policy.py @@ -174,7 +174,6 @@ def __init__(self, obs_space, action_space, config): raise ImportError("Could not import PyTorch, which QMix requires.") _validate(obs_space, action_space) - config = dict(ray.rllib.algorithms.qmix.qmix.DEFAULT_CONFIG, **config) self.framework = "torch" self.n_agents = len(obs_space.original_space.spaces) diff --git a/rllib/algorithms/simple_q/simple_q_tf_policy.py b/rllib/algorithms/simple_q/simple_q_tf_policy.py index 49c3275ab304b..11735443a664c 100644 --- a/rllib/algorithms/simple_q/simple_q_tf_policy.py +++ b/rllib/algorithms/simple_q/simple_q_tf_policy.py @@ -54,12 +54,6 @@ def __init__( ): # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() - - config = dict( - ray.rllib.algorithms.simple_q.simple_q.SimpleQConfig().to_dict(), - **config, - ) - # Initialize base class. base.__init__( self, diff --git a/rllib/algorithms/simple_q/simple_q_torch_policy.py b/rllib/algorithms/simple_q/simple_q_torch_policy.py index 2bbc3de8e250c..d8034ccdea9fa 100644 --- a/rllib/algorithms/simple_q/simple_q_torch_policy.py +++ b/rllib/algorithms/simple_q/simple_q_torch_policy.py @@ -35,9 +35,6 @@ class SimpleQTorchPolicy( """PyTorch policy class used with SimpleQTrainer.""" def __init__(self, observation_space, action_space, config): - config = dict( - ray.rllib.algorithms.simple_q.simple_q.SimpleQConfig().to_dict(), **config - ) TorchPolicyV2.__init__( self, observation_space, diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py index ffa8059e617fb..968c3d5ebf065 100644 --- a/rllib/policy/eager_tf_policy.py +++ b/rllib/policy/eager_tf_policy.py @@ -6,9 +6,11 @@ import logging import os import threading -import tree # pip install dm_tree from typing import Dict, List, Optional, Tuple, Union +import tree # pip install dm_tree + +from ray.rllib.algorithms.algorithm import AlgorithmConfig from ray.rllib.evaluation.episode import Episode from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.repeated_values import RepeatedValues @@ -386,9 +388,6 @@ def __init__(self, observation_space, action_space, config): ) self._max_seq_len = config["model"]["max_seq_len"] - if get_default_config: - config = dict(get_default_config(), **config) - if validate_spaces: validate_spaces(self, observation_space, action_space, config) diff --git a/rllib/policy/policy_template.py b/rllib/policy/policy_template.py index ce2d4c63db54c..0e01aa8b88cca 100644 --- a/rllib/policy/policy_template.py +++ b/rllib/policy/policy_template.py @@ -1,4 +1,3 @@ -import gymnasium as gym from typing import ( Any, Callable, @@ -11,6 +10,9 @@ Union, ) +import gymnasium as gym + +from ray.rllib.algorithms.algorithm import AlgorithmConfig from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.jax.jax_modelv2 import JAXModelV2 from ray.rllib.models.modelv2 import ModelV2 @@ -250,10 +252,6 @@ def build_policy_class( class policy_cls(base): def __init__(self, obs_space, action_space, config): - # Set up the config from possible default-config fn and given - # config arg. - if get_default_config: - config = dict(get_default_config(), **config) self.config = config # Set the DL framework for this Policy. diff --git a/rllib/policy/tf_policy_template.py b/rllib/policy/tf_policy_template.py index 84e242e25aa8a..02359f0c9cd1a 100644 --- a/rllib/policy/tf_policy_template.py +++ b/rllib/policy/tf_policy_template.py @@ -9,6 +9,7 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import TFPolicy from ray.rllib.utils import add_mixins, force_list +from ray.rllib.algorithms.algorithm import AlgorithmConfig from ray.rllib.utils.annotations import override, DeveloperAPI from ray.rllib.utils.deprecation import deprecation_warning, DEPRECATED_VALUE from ray.rllib.utils.framework import try_import_tf @@ -232,9 +233,6 @@ def __init__( existing_model=None, existing_inputs=None, ): - if get_default_config: - config = dict(get_default_config(), **config) - if validate_spaces: validate_spaces(self, obs_space, action_space, config) From ce43b2b76e64be0ab445ffbed7ba01ec7bea4e75 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Sun, 12 Mar 2023 17:42:41 -0700 Subject: [PATCH 02/11] Remove all DEFAULT CONFIGS Signed-off-by: Artur Niederfahrenhorst --- rllib/agents/__init__.py | 8 --- rllib/agents/a3c/__init__.py | 22 -------- rllib/agents/a3c/a2c.py | 4 -- rllib/agents/a3c/a3c.py | 6 -- rllib/agents/alpha_star/__init__.py | 18 ------ rllib/agents/ars/__init__.py | 14 ----- rllib/agents/bandit/__init__.py | 19 ------- rllib/agents/callbacks.py | 13 ----- rllib/agents/cql/__init__.py | 13 ----- rllib/agents/ddpg/__init__.py | 22 -------- rllib/agents/ddpg/apex.py | 4 -- rllib/agents/ddpg/ddpg.py | 6 -- rllib/agents/ddpg/td3.py | 4 -- rllib/agents/dqn/__init__.py | 55 ------------------- rllib/agents/dqn/apex.py | 4 -- rllib/agents/dqn/dqn.py | 6 -- rllib/agents/dqn/simple_q.py | 4 -- rllib/agents/dreamer/__init__.py | 18 ------ rllib/agents/es/__init__.py | 10 ---- rllib/agents/impala/__init__.py | 17 ------ rllib/agents/maddpg/__init__.py | 19 ------- rllib/agents/maml/__init__.py | 10 ---- rllib/agents/marwil/__init__.py | 31 ----------- rllib/agents/mbmpo/__init__.py | 11 ---- rllib/agents/mock.py | 13 ----- rllib/agents/pg/__init__.py | 15 ----- rllib/agents/ppo/__init__.py | 24 -------- rllib/agents/ppo/appo.py | 4 -- rllib/agents/ppo/ddppo.py | 4 -- rllib/agents/ppo/ppo.py | 6 -- rllib/agents/qmix/__init__.py | 11 ---- rllib/agents/sac/__init__.py | 23 -------- rllib/agents/slateq/__init__.py | 21 ------- rllib/agents/trainer.py | 11 ---- rllib/agents/trainer_config.py | 10 ---- rllib/algorithms/a2c/__init__.py | 4 +- rllib/algorithms/a2c/a2c.py | 18 ------ rllib/algorithms/a3c/a3c_tf_policy.py | 1 - rllib/algorithms/a3c/a3c_torch_policy.py | 1 - rllib/algorithms/apex_ddpg/__init__.py | 2 - rllib/algorithms/apex_ddpg/apex_ddpg.py | 19 +------ rllib/algorithms/apex_dqn/__init__.py | 2 - rllib/algorithms/apex_dqn/apex_dqn.py | 19 +------ rllib/algorithms/appo/appo_tf_policy.py | 1 - .../algorithms/bandit/bandit_torch_policy.py | 1 - rllib/algorithms/bc/__init__.py | 4 +- rllib/algorithms/bc/bc.py | 18 ------ rllib/algorithms/cql/cql.py | 18 ------ rllib/algorithms/ddpg/ddpg_tf_policy.py | 2 - .../dreamer/dreamer_torch_policy.py | 1 - rllib/algorithms/impala/impala_tf_policy.py | 1 - .../impala/tf/impala_tf_policy_rlm.py | 1 - rllib/algorithms/maddpg/maddpg_tf_policy.py | 1 - rllib/algorithms/maml/maml_tf_policy.py | 1 - rllib/algorithms/marwil/marwil_tf_policy.py | 1 - .../algorithms/marwil/marwil_torch_policy.py | 1 - rllib/algorithms/mbmpo/mbmpo_torch_policy.py | 1 - rllib/algorithms/ppo/ppo_tf_policy.py | 1 - rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py | 1 - .../ppo/torch/ppo_torch_policy_rlm.py | 1 - rllib/algorithms/qmix/qmix_policy.py | 1 - rllib/algorithms/r2d2/__init__.py | 3 +- rllib/algorithms/r2d2/r2d2.py | 18 ------ rllib/algorithms/r2d2/r2d2_tf_policy.py | 2 +- rllib/algorithms/r2d2/r2d2_torch_policy.py | 2 +- rllib/algorithms/sac/__init__.py | 10 +--- .../algorithms/simple_q/simple_q_tf_policy.py | 1 - .../simple_q/simple_q_torch_policy.py | 1 - rllib/algorithms/td3/__init__.py | 3 +- rllib/algorithms/td3/td3.py | 18 ------ rllib/policy/eager_tf_policy.py | 1 - rllib/policy/policy_template.py | 1 - rllib/policy/tf_policy_template.py | 1 - rllib/tests/test_gpus.py | 6 +- 74 files changed, 14 insertions(+), 655 deletions(-) delete mode 100644 rllib/agents/__init__.py delete mode 100644 rllib/agents/a3c/__init__.py delete mode 100644 rllib/agents/a3c/a2c.py delete mode 100644 rllib/agents/a3c/a3c.py delete mode 100644 rllib/agents/alpha_star/__init__.py delete mode 100644 rllib/agents/ars/__init__.py delete mode 100644 rllib/agents/bandit/__init__.py delete mode 100644 rllib/agents/callbacks.py delete mode 100644 rllib/agents/cql/__init__.py delete mode 100644 rllib/agents/ddpg/__init__.py delete mode 100644 rllib/agents/ddpg/apex.py delete mode 100644 rllib/agents/ddpg/ddpg.py delete mode 100644 rllib/agents/ddpg/td3.py delete mode 100644 rllib/agents/dqn/__init__.py delete mode 100644 rllib/agents/dqn/apex.py delete mode 100644 rllib/agents/dqn/dqn.py delete mode 100644 rllib/agents/dqn/simple_q.py delete mode 100644 rllib/agents/dreamer/__init__.py delete mode 100644 rllib/agents/es/__init__.py delete mode 100644 rllib/agents/impala/__init__.py delete mode 100644 rllib/agents/maddpg/__init__.py delete mode 100644 rllib/agents/maml/__init__.py delete mode 100644 rllib/agents/marwil/__init__.py delete mode 100644 rllib/agents/mbmpo/__init__.py delete mode 100644 rllib/agents/mock.py delete mode 100644 rllib/agents/pg/__init__.py delete mode 100644 rllib/agents/ppo/__init__.py delete mode 100644 rllib/agents/ppo/appo.py delete mode 100644 rllib/agents/ppo/ddppo.py delete mode 100644 rllib/agents/ppo/ppo.py delete mode 100644 rllib/agents/qmix/__init__.py delete mode 100644 rllib/agents/sac/__init__.py delete mode 100644 rllib/agents/slateq/__init__.py delete mode 100644 rllib/agents/trainer.py delete mode 100644 rllib/agents/trainer_config.py diff --git a/rllib/agents/__init__.py b/rllib/agents/__init__.py deleted file mode 100644 index b4dd902f35944..0000000000000 --- a/rllib/agents/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from ray.rllib.algorithms.algorithm import Algorithm as Trainer, with_common_config -from ray.rllib.algorithms.algorithm_config import AlgorithmConfig as TrainerConfig - -__all__ = [ - "Trainer", - "TrainerConfig", - "with_common_config", -] diff --git a/rllib/agents/a3c/__init__.py b/rllib/agents/a3c/__init__.py deleted file mode 100644 index f65efd3382464..0000000000000 --- a/rllib/agents/a3c/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -import ray.rllib.agents.a3c.a2c as a2c # noqa -from ray.rllib.algorithms.a2c.a2c import ( - A2CConfig, - A2C as A2CTrainer, - A2C_DEFAULT_CONFIG, -) -from ray.rllib.algorithms.a3c.a3c import A3CConfig, A3C as A3CTrainer, DEFAULT_CONFIG -from ray.rllib.utils.deprecation import deprecation_warning - - -__all__ = [ - "A2CConfig", - "A2C_DEFAULT_CONFIG", # deprecated - "A2CTrainer", - "A3CConfig", - "A3CTrainer", - "DEFAULT_CONFIG", # A3C default config (deprecated) -] - -deprecation_warning( - "ray.rllib.agents.a3c", "ray.rllib.algorithms.[a3c|a2c]", error=True -) diff --git a/rllib/agents/a3c/a2c.py b/rllib/agents/a3c/a2c.py deleted file mode 100644 index 5e611c5e7b45f..0000000000000 --- a/rllib/agents/a3c/a2c.py +++ /dev/null @@ -1,4 +0,0 @@ -from ray.rllib.algorithms.a2c import ( # noqa - A2C as A2CTrainer, - A2C_DEFAULT_CONFIG, -) diff --git a/rllib/agents/a3c/a3c.py b/rllib/agents/a3c/a3c.py deleted file mode 100644 index ee04180546c35..0000000000000 --- a/rllib/agents/a3c/a3c.py +++ /dev/null @@ -1,6 +0,0 @@ -from ray.rllib.algorithms.a3c import ( # noqa - a3c_tf_policy, - a3c_torch_policy, - A3C as A3CTrainer, - DEFAULT_CONFIG, -) diff --git a/rllib/agents/alpha_star/__init__.py b/rllib/agents/alpha_star/__init__.py deleted file mode 100644 index 0d363be259ad1..0000000000000 --- a/rllib/agents/alpha_star/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from ray.rllib.algorithms.alpha_star.alpha_star import ( - AlphaStarConfig, - AlphaStarTrainer, - DEFAULT_CONFIG, -) - -__all__ = [ - "AlphaStarConfig", - "AlphaStarTrainer", - "DEFAULT_CONFIG", -] - - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - "ray.rllib.agents.alpha_star", "ray.rllib.algorithms.alpha_star", error=True -) diff --git a/rllib/agents/ars/__init__.py b/rllib/agents/ars/__init__.py deleted file mode 100644 index 9b8b653cf67b9..0000000000000 --- a/rllib/agents/ars/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from ray.rllib.algorithms.ars.ars import ARS as ARSTrainer, DEFAULT_CONFIG -from ray.rllib.algorithms.ars.ars_tf_policy import ARSTFPolicy -from ray.rllib.algorithms.ars.ars_torch_policy import ARSTorchPolicy - -__all__ = [ - "ARSTFPolicy", - "ARSTorchPolicy", - "ARSTrainer", - "DEFAULT_CONFIG", -] - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning("ray.rllib.agents.ars", "ray.rllib.algorithms.ars", error=True) diff --git a/rllib/agents/bandit/__init__.py b/rllib/agents/bandit/__init__.py deleted file mode 100644 index 20ae010205e03..0000000000000 --- a/rllib/agents/bandit/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from ray.rllib.algorithms.bandit.bandit import ( - BanditLinTS as BanditLinTSTrainer, - BanditLinUCB as BanditLinUCBTrainer, - BanditLinTSConfig, - BanditLinUCBConfig, -) - -__all__ = [ - "BanditLinTSTrainer", - "BanditLinUCBTrainer", - "BanditLinTSConfig", - "BanditLinUCBConfig", -] - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - "ray.rllib.agents.bandits", "ray.rllib.algorithms.bandits", error=True -) diff --git a/rllib/agents/callbacks.py b/rllib/agents/callbacks.py deleted file mode 100644 index ad974993dd83c..0000000000000 --- a/rllib/agents/callbacks.py +++ /dev/null @@ -1,13 +0,0 @@ -from ray.rllib.algorithms.callbacks import ( # noqa - DefaultCallbacks, - MemoryTrackingCallbacks, - MultiCallbacks, - RE3UpdateCallbacks, -) -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - old="ray.rllib.agents.callbacks", - new="ray.rllib.algorithms.callbacks", - error=True, -) diff --git a/rllib/agents/cql/__init__.py b/rllib/agents/cql/__init__.py deleted file mode 100644 index e75f0d190988e..0000000000000 --- a/rllib/agents/cql/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from ray.rllib.algorithms.cql.cql import CQL as CQLTrainer, CQL_DEFAULT_CONFIG -from ray.rllib.algorithms.cql.cql_tf_policy import CQLTFPolicy -from ray.rllib.algorithms.cql.cql_torch_policy import CQLTorchPolicy -from ray.rllib.utils.deprecation import deprecation_warning - -__all__ = [ - "CQL_DEFAULT_CONFIG", - "CQLTFPolicy", - "CQLTorchPolicy", - "CQLTrainer", -] - -deprecation_warning("ray.rllib.agents.cql", "ray.rllib.algorithms.cql", error=True) diff --git a/rllib/agents/ddpg/__init__.py b/rllib/agents/ddpg/__init__.py deleted file mode 100644 index dedf41e734056..0000000000000 --- a/rllib/agents/ddpg/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -import ray.rllib.agents.ddpg.apex as apex # noqa -import ray.rllib.agents.ddpg.td3 as td3 # noqa -from ray.rllib.algorithms.apex_ddpg.apex_ddpg import ApexDDPG as ApexDDPGTrainer -from ray.rllib.algorithms.ddpg.ddpg import ( - DDPGConfig, - DDPG as DDPGTrainer, - DEFAULT_CONFIG, -) -from ray.rllib.algorithms.td3.td3 import TD3 as TD3Trainer - - -__all__ = [ - "ApexDDPGTrainer", - "DDPGConfig", - "DDPGTrainer", - "DEFAULT_CONFIG", - "TD3Trainer", -] - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning("ray.rllib.agents.ddpg", "ray.rllib.algorithms.ddpg", error=True) diff --git a/rllib/agents/ddpg/apex.py b/rllib/agents/ddpg/apex.py deleted file mode 100644 index b254e05bd4217..0000000000000 --- a/rllib/agents/ddpg/apex.py +++ /dev/null @@ -1,4 +0,0 @@ -from ray.rllib.algorithms.apex_ddpg import ( # noqa - ApexDDPG as ApexDDPGTrainer, - APEX_DDPG_DEFAULT_CONFIG, -) diff --git a/rllib/agents/ddpg/ddpg.py b/rllib/agents/ddpg/ddpg.py deleted file mode 100644 index 5f5042f1f2d5c..0000000000000 --- a/rllib/agents/ddpg/ddpg.py +++ /dev/null @@ -1,6 +0,0 @@ -from ray.rllib.algorithms.ddpg import ( # noqa - ddpg_tf_policy, - ddpg_torch_policy, - DDPG as DDPGTrainer, - DEFAULT_CONFIG, -) diff --git a/rllib/agents/ddpg/td3.py b/rllib/agents/ddpg/td3.py deleted file mode 100644 index 14bc1b89bdb6e..0000000000000 --- a/rllib/agents/ddpg/td3.py +++ /dev/null @@ -1,4 +0,0 @@ -from ray.rllib.algorithms.td3 import ( # noqa - TD3 as TD3Trainer, - TD3_DEFAULT_CONFIG, -) diff --git a/rllib/agents/dqn/__init__.py b/rllib/agents/dqn/__init__.py deleted file mode 100644 index 481cc9dee385b..0000000000000 --- a/rllib/agents/dqn/__init__.py +++ /dev/null @@ -1,55 +0,0 @@ -import ray.rllib.agents.dqn.apex as apex # noqa -import ray.rllib.agents.dqn.simple_q as simple_q # noqa -from ray.rllib.algorithms.apex_dqn.apex_dqn import APEX_DEFAULT_CONFIG -from ray.rllib.algorithms.apex_dqn.apex_dqn import ApexDQN as ApexTrainer -from ray.rllib.algorithms.apex_dqn.apex_dqn import ApexDQNConfig -from ray.rllib.algorithms.dqn.dqn import DEFAULT_CONFIG -from ray.rllib.algorithms.dqn.dqn import DQN as DQNTrainer -from ray.rllib.algorithms.dqn.dqn import DQNConfig -from ray.rllib.algorithms.dqn.dqn_tf_policy import DQNTFPolicy -from ray.rllib.algorithms.dqn.dqn_torch_policy import DQNTorchPolicy -from ray.rllib.algorithms.r2d2.r2d2 import R2D2 as R2D2Trainer -from ray.rllib.algorithms.r2d2.r2d2 import R2D2_DEFAULT_CONFIG, R2D2Config -from ray.rllib.algorithms.r2d2.r2d2_tf_policy import R2D2TFPolicy -from ray.rllib.algorithms.r2d2.r2d2_torch_policy import R2D2TorchPolicy -from ray.rllib.algorithms.simple_q.simple_q import ( - DEFAULT_CONFIG as SIMPLE_Q_DEFAULT_CONFIG, -) -from ray.rllib.algorithms.simple_q.simple_q import SimpleQ as SimpleQTrainer -from ray.rllib.algorithms.simple_q.simple_q import SimpleQConfig -from ray.rllib.algorithms.simple_q.simple_q_tf_policy import ( - SimpleQTF1Policy, - SimpleQTF2Policy, -) -from ray.rllib.algorithms.simple_q.simple_q_torch_policy import SimpleQTorchPolicy -from ray.rllib.utils.deprecation import deprecation_warning - -__all__ = [ - "ApexDQNConfig", - "ApexTrainer", - "DQNConfig", - "DQNTFPolicy", - "DQNTorchPolicy", - "DQNTrainer", - "R2D2Config", - "R2D2TFPolicy", - "R2D2TorchPolicy", - "R2D2Trainer", - "SimpleQConfig", - "SimpleQTF1Policy", - "SimpleQTF2Policy", - "SimpleQTorchPolicy", - "SimpleQTrainer", - # Deprecated. - "APEX_DEFAULT_CONFIG", - "DEFAULT_CONFIG", - "R2D2_DEFAULT_CONFIG", - "SIMPLE_Q_DEFAULT_CONFIG", -] - - -deprecation_warning( - "ray.rllib.agents.dqn", - "ray.rllib.algorithms.[dqn|simple_q|r2d2|apex_dqn]", - error=True, -) diff --git a/rllib/agents/dqn/apex.py b/rllib/agents/dqn/apex.py deleted file mode 100644 index 0bcab0901ea2e..0000000000000 --- a/rllib/agents/dqn/apex.py +++ /dev/null @@ -1,4 +0,0 @@ -from ray.rllib.algorithms.apex_dqn import ( # noqa - ApexDQN as ApexTrainer, - APEX_DEFAULT_CONFIG, -) diff --git a/rllib/agents/dqn/dqn.py b/rllib/agents/dqn/dqn.py deleted file mode 100644 index 44002c81570a1..0000000000000 --- a/rllib/agents/dqn/dqn.py +++ /dev/null @@ -1,6 +0,0 @@ -from ray.rllib.algorithms.dqn import ( # noqa - dqn_tf_policy, - dqn_torch_policy, - DQN as DQNTrainer, - DEFAULT_CONFIG, -) diff --git a/rllib/agents/dqn/simple_q.py b/rllib/agents/dqn/simple_q.py deleted file mode 100644 index 5343238a4f8bb..0000000000000 --- a/rllib/agents/dqn/simple_q.py +++ /dev/null @@ -1,4 +0,0 @@ -from ray.rllib.algorithms.simple_q import ( # noqa - SimpleQ as SimpleQTrainer, - DEFAULT_CONFIG, -) diff --git a/rllib/agents/dreamer/__init__.py b/rllib/agents/dreamer/__init__.py deleted file mode 100644 index 2f84b0fcd69a0..0000000000000 --- a/rllib/agents/dreamer/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from ray.rllib.algorithms.dreamer.dreamer import ( - DreamerConfig, - Dreamer as DREAMERTrainer, - DEFAULT_CONFIG, -) - -__all__ = [ - "DreamerConfig", - "DREAMERTrainer", - "DEFAULT_CONFIG", -] - - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - "ray.rllib.agents.dreamer", "ray.rllib.algorithms.dreamer", error=True -) diff --git a/rllib/agents/es/__init__.py b/rllib/agents/es/__init__.py deleted file mode 100644 index c870361ebc841..0000000000000 --- a/rllib/agents/es/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from ray.rllib.algorithms.es.es import ES as ESTrainer, DEFAULT_CONFIG -from ray.rllib.algorithms.es.es_tf_policy import ESTFPolicy -from ray.rllib.algorithms.es.es_torch_policy import ESTorchPolicy - -__all__ = ["ESTFPolicy", "ESTorchPolicy", "ESTrainer", "DEFAULT_CONFIG"] - - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning("ray.rllib.agents.es", "ray.rllib.algorithms.es", error=True) diff --git a/rllib/agents/impala/__init__.py b/rllib/agents/impala/__init__.py deleted file mode 100644 index 45b7ddc1e103c..0000000000000 --- a/rllib/agents/impala/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -from ray.rllib.algorithms.impala.impala import ( - DEFAULT_CONFIG, - ImpalaConfig, - Impala as ImpalaTrainer, -) -from ray.rllib.utils.deprecation import deprecation_warning - - -__all__ = [ - "ImpalaConfig", - "ImpalaTrainer", - "DEFAULT_CONFIG", -] - -deprecation_warning( - "ray.rllib.agents.impala", "ray.rllib.algorithms.impala", error=True -) diff --git a/rllib/agents/maddpg/__init__.py b/rllib/agents/maddpg/__init__.py deleted file mode 100644 index f1adbbae80593..0000000000000 --- a/rllib/agents/maddpg/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from ray.rllib.algorithms.maddpg.maddpg import ( - MADDPG as MADDPGTrainer, - MADDPGTFPolicy, - DEFAULT_CONFIG, -) - -__all__ = [ - "MADDPGTrainer", - "MADDPGTFPolicy", - "DEFAULT_CONFIG", -] - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - "ray.rllib.agents.maddpg", - "ray.rllib.algorithms.maddpg", - error=True, -) diff --git a/rllib/agents/maml/__init__.py b/rllib/agents/maml/__init__.py deleted file mode 100644 index 1fc8410b3a69b..0000000000000 --- a/rllib/agents/maml/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from ray.rllib.algorithms.maml.maml import MAML as MAMLTrainer, DEFAULT_CONFIG - -__all__ = [ - "MAMLTrainer", - "DEFAULT_CONFIG", -] - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning("ray.rllib.agents.maml", "ray.rllib.algorithms.maml", error=True) diff --git a/rllib/agents/marwil/__init__.py b/rllib/agents/marwil/__init__.py deleted file mode 100644 index 2a603fc4abfec..0000000000000 --- a/rllib/agents/marwil/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -from ray.rllib.algorithms.bc.bc import BCConfig, BC as BCTrainer, BC_DEFAULT_CONFIG -from ray.rllib.algorithms.marwil.marwil import ( - DEFAULT_CONFIG, - MARWILConfig, - MARWIL as MARWILTrainer, -) -from ray.rllib.algorithms.marwil.marwil_tf_policy import ( - MARWILTF1Policy, - MARWILTF2Policy, -) -from ray.rllib.algorithms.marwil.marwil_torch_policy import MARWILTorchPolicy - -__all__ = [ - "BCConfig", - "BCTrainer", - "MARWILConfig", - "MARWILTF1Policy", - "MARWILTF2Policy", - "MARWILTorchPolicy", - "MARWILTrainer", - # Deprecated. - "BC_DEFAULT_CONFIG", - "DEFAULT_CONFIG", -] - - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - "ray.rllib.agents.marwil", "ray.rllib.algorithms.[marwil|bc]", error=True -) diff --git a/rllib/agents/mbmpo/__init__.py b/rllib/agents/mbmpo/__init__.py deleted file mode 100644 index 830843c8ea3b4..0000000000000 --- a/rllib/agents/mbmpo/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from ray.rllib.algorithms.mbmpo.mbmpo import MBMPO as MBMPOTrainer, DEFAULT_CONFIG - -__all__ = [ - "MBMPOTrainer", - "DEFAULT_CONFIG", -] - - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning("ray.rllib.agents.mbmpo", "ray.rllib.algorithms.mbmpo", error=True) diff --git a/rllib/agents/mock.py b/rllib/agents/mock.py deleted file mode 100644 index 8e90604a6e7f9..0000000000000 --- a/rllib/agents/mock.py +++ /dev/null @@ -1,13 +0,0 @@ -from ray.rllib.algorithms.mock import ( # noqa - _MockTrainer, - _ParameterTuningTrainer, - _SigmoidFakeData, -) - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - old="ray.rllib.agents.mock", - new="ray.rllib.algorithms.mock", - error=True, -) diff --git a/rllib/agents/pg/__init__.py b/rllib/agents/pg/__init__.py deleted file mode 100644 index de270c5104316..0000000000000 --- a/rllib/agents/pg/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from ray.rllib.algorithms.pg.pg import DEFAULT_CONFIG, PGConfig, PG as PGTrainer -from ray.rllib.algorithms.pg.pg_torch_policy import PGTorchPolicy -from ray.rllib.algorithms.pg.utils import post_process_advantages - -__all__ = [ - "DEFAULT_CONFIG", - "post_process_advantages", - "PGConfig", - "PGTorchPolicy", - "PGTrainer", -] - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning("ray.rllib.agents.pg", "ray.rllib.algorithms.pg", error=True) diff --git a/rllib/agents/ppo/__init__.py b/rllib/agents/ppo/__init__.py deleted file mode 100644 index 649a1cfb3993f..0000000000000 --- a/rllib/agents/ppo/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -import ray.rllib.agents.ppo.appo as appo # noqa -from ray.rllib.algorithms.ppo.ppo import PPOConfig, PPO as PPOTrainer, DEFAULT_CONFIG -from ray.rllib.algorithms.ppo.ppo_tf_policy import PPOTF1Policy, PPOTF2Policy -from ray.rllib.algorithms.ppo.ppo_torch_policy import PPOTorchPolicy -from ray.rllib.algorithms.appo.appo import APPOConfig, APPO as APPOTrainer -from ray.rllib.algorithms.appo.appo_tf_policy import APPOTF1Policy, APPOTF2Policy -from ray.rllib.algorithms.appo.appo_torch_policy import APPOTorchPolicy -from ray.rllib.algorithms.ddppo.ddppo import DDPPOConfig, DDPPO as DDPPOTrainer - -__all__ = [ - "APPOConfig", - "APPOTF1Policy", - "APPOTF2Policy", - "APPOTorchPolicy", - "APPOTrainer", - "DDPPOConfig", - "DDPPOTrainer", - "DEFAULT_CONFIG", - "PPOConfig", - "PPOTF1Policy", - "PPOTF2Policy", - "PPOTorchPolicy", - "PPOTrainer", -] diff --git a/rllib/agents/ppo/appo.py b/rllib/agents/ppo/appo.py deleted file mode 100644 index 3ead65bfc099a..0000000000000 --- a/rllib/agents/ppo/appo.py +++ /dev/null @@ -1,4 +0,0 @@ -from ray.rllib.algorithms.appo import ( # noqa - APPO as APPOTrainer, - DEFAULT_CONFIG, -) diff --git a/rllib/agents/ppo/ddppo.py b/rllib/agents/ppo/ddppo.py deleted file mode 100644 index fa9fe0d90a1a1..0000000000000 --- a/rllib/agents/ppo/ddppo.py +++ /dev/null @@ -1,4 +0,0 @@ -from ray.rllib.algorithms.ddppo import ( # noqa - DDPPO as DDPPOTrainer, - DEFAULT_CONFIG, -) diff --git a/rllib/agents/ppo/ppo.py b/rllib/agents/ppo/ppo.py deleted file mode 100644 index a62ccd73bed3b..0000000000000 --- a/rllib/agents/ppo/ppo.py +++ /dev/null @@ -1,6 +0,0 @@ -from ray.rllib.algorithms.ppo import ( # noqa - ppo_tf_policy, - ppo_torch_policy, - PPO as PPOTrainer, - DEFAULT_CONFIG, -) diff --git a/rllib/agents/qmix/__init__.py b/rllib/agents/qmix/__init__.py deleted file mode 100644 index 05415e2cc2ce2..0000000000000 --- a/rllib/agents/qmix/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from ray.rllib.algorithms.qmix.qmix import ( - QMixConfig, - QMix as QMixTrainer, - DEFAULT_CONFIG, -) - -__all__ = ["QMixConfig", "QMixTrainer", "DEFAULT_CONFIG"] - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning("ray.rllib.agents.qmix", "ray.rllib.algorithms.qmix", error=True) diff --git a/rllib/agents/sac/__init__.py b/rllib/agents/sac/__init__.py deleted file mode 100644 index 22921fe5ecdff..0000000000000 --- a/rllib/agents/sac/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -from ray.rllib.algorithms.sac.sac import SAC as SACTrainer, DEFAULT_CONFIG -from ray.rllib.algorithms.sac.sac_tf_policy import SACTFPolicy -from ray.rllib.algorithms.sac.sac_torch_policy import SACTorchPolicy - -from ray.rllib.algorithms.sac.rnnsac import ( - RNNSAC as RNNSACTrainer, - DEFAULT_CONFIG as RNNSAC_DEFAULT_CONFIG, -) -from ray.rllib.algorithms.sac.rnnsac import RNNSACTorchPolicy - -__all__ = [ - "DEFAULT_CONFIG", - "SACTFPolicy", - "SACTorchPolicy", - "SACTrainer", - "RNNSAC_DEFAULT_CONFIG", - "RNNSACTorchPolicy", - "RNNSACTrainer", -] - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning("ray.rllib.agents.sac", "ray.rllib.algorithms.sac", error=True) diff --git a/rllib/agents/slateq/__init__.py b/rllib/agents/slateq/__init__.py deleted file mode 100644 index 95690986a6d27..0000000000000 --- a/rllib/agents/slateq/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from ray.rllib.algorithms.slateq.slateq import ( - SlateQConfig, - SlateQ as SlateQTrainer, - DEFAULT_CONFIG, -) -from ray.rllib.algorithms.slateq.slateq_tf_policy import SlateQTFPolicy -from ray.rllib.algorithms.slateq.slateq_torch_policy import SlateQTorchPolicy - -__all__ = [ - "DEFAULT_CONFIG", - "SlateQConfig", - "SlateQTFPolicy", - "SlateQTorchPolicy", - "SlateQTrainer", -] - -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - "ray.rllib.agents.slateq", "ray.rllib.algorithms.slateq", error=True -) diff --git a/rllib/agents/trainer.py b/rllib/agents/trainer.py deleted file mode 100644 index efe45ba950cef..0000000000000 --- a/rllib/agents/trainer.py +++ /dev/null @@ -1,11 +0,0 @@ -from ray.rllib.algorithms.algorithm import ( # noqa - Algorithm, - COMMON_CONFIG, - with_common_config, -) -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning(old="rllib.agents::Trainer", new="rllib.algorithms::Algorithm") - -# Alias. -Trainer = Algorithm diff --git a/rllib/agents/trainer_config.py b/rllib/agents/trainer_config.py deleted file mode 100644 index 6ecad310af564..0000000000000 --- a/rllib/agents/trainer_config.py +++ /dev/null @@ -1,10 +0,0 @@ -from ray.rllib.algorithms.algorithm_config import ( # noqa - AlgorithmConfig as TrainerConfig, -) -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - old="ray.rllib.agents.trainer_config::TrainerConfig", - new="ray.rllib.algorithms.algorithm_config::AlgorithmConfig", - error=True, -) diff --git a/rllib/algorithms/a2c/__init__.py b/rllib/algorithms/a2c/__init__.py index dfc7de5a6fa98..8509d972b0b2c 100644 --- a/rllib/algorithms/a2c/__init__.py +++ b/rllib/algorithms/a2c/__init__.py @@ -1,3 +1,3 @@ -from ray.rllib.algorithms.a2c.a2c import A2CConfig, A2C, A2C_DEFAULT_CONFIG +from ray.rllib.algorithms.a2c.a2c import A2CConfig, A2C -__all__ = ["A2CConfig", "A2C", "A2C_DEFAULT_CONFIG"] +__all__ = ["A2CConfig", "A2C"] diff --git a/rllib/algorithms/a2c/a2c.py b/rllib/algorithms/a2c/a2c.py index c2b666b99325c..92ebcee5f99ac 100644 --- a/rllib/algorithms/a2c/a2c.py +++ b/rllib/algorithms/a2c/a2c.py @@ -10,7 +10,6 @@ ) from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.metrics import ( APPLY_GRADS_TIMER, COMPUTE_GRADS_TIMER, @@ -241,20 +240,3 @@ def training_step(self) -> ResultDict: train_results = {DEFAULT_POLICY_ID: info} return train_results - - -# Deprecated: Use ray.rllib.algorithms.a2c.A2CConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(A2CConfig().to_dict()) - - @Deprecated( - old="ray.rllib.agents.a3c.a2c.A2C_DEFAULT_CONFIG", - new="ray.rllib.algorithms.a2c.a2c.A2CConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -A2C_DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/a3c/a3c_tf_policy.py b/rllib/algorithms/a3c/a3c_tf_policy.py index a85a73e25bf60..d3c5649478f96 100644 --- a/rllib/algorithms/a3c/a3c_tf_policy.py +++ b/rllib/algorithms/a3c/a3c_tf_policy.py @@ -1,7 +1,6 @@ """Note: Keep in sync with changes to VTraceTFPolicy.""" from typing import Dict, List, Optional, Type, Union -import ray from ray.rllib.evaluation.episode import Episode from ray.rllib.evaluation.postprocessing import ( compute_gae_for_sample_batch, diff --git a/rllib/algorithms/a3c/a3c_torch_policy.py b/rllib/algorithms/a3c/a3c_torch_policy.py index 81836e765e9e8..d94a07273e6cd 100644 --- a/rllib/algorithms/a3c/a3c_torch_policy.py +++ b/rllib/algorithms/a3c/a3c_torch_policy.py @@ -1,6 +1,5 @@ from typing import Dict, List, Optional, Type, Union -import ray from ray.rllib.evaluation.episode import Episode from ray.rllib.evaluation.postprocessing import ( compute_gae_for_sample_batch, diff --git a/rllib/algorithms/apex_ddpg/__init__.py b/rllib/algorithms/apex_ddpg/__init__.py index 9d4e62225f158..e5de02cf71bcc 100644 --- a/rllib/algorithms/apex_ddpg/__init__.py +++ b/rllib/algorithms/apex_ddpg/__init__.py @@ -1,11 +1,9 @@ from ray.rllib.algorithms.apex_ddpg.apex_ddpg import ( ApexDDPG, ApexDDPGConfig, - APEX_DDPG_DEFAULT_CONFIG, ) __all__ = [ "ApexDDPG", "ApexDDPGConfig", - "APEX_DDPG_DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/apex_ddpg/apex_ddpg.py b/rllib/algorithms/apex_ddpg/apex_ddpg.py index c3358e7a42958..0900de2a29218 100644 --- a/rllib/algorithms/apex_ddpg/apex_ddpg.py +++ b/rllib/algorithms/apex_ddpg/apex_ddpg.py @@ -4,7 +4,7 @@ from ray.rllib.algorithms.apex_dqn.apex_dqn import ApexDQN from ray.rllib.algorithms.ddpg.ddpg import DDPG, DDPGConfig from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import DEPRECATED_VALUE, Deprecated +from ray.rllib.utils.deprecation import DEPRECATED_VALUE from ray.rllib.utils.typing import ( ResultDict, ) @@ -147,20 +147,3 @@ def setup(self, config: AlgorithmConfig): def training_step(self) -> ResultDict: """Use APEX-DQN's training iteration function.""" return ApexDQN.training_step(self) - - -# Deprecated: Use ray.rllib.algorithms.apex_ddpg.ApexDDPGConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(ApexDDPGConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.ddpg.apex.APEX_DDPG_DEFAULT_CONFIG", - new="ray.rllib.algorithms.apex_ddpg.apex_ddpg::ApexDDPGConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -APEX_DDPG_DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/apex_dqn/__init__.py b/rllib/algorithms/apex_dqn/__init__.py index 57e718b7295e7..e8385b3e302ee 100644 --- a/rllib/algorithms/apex_dqn/__init__.py +++ b/rllib/algorithms/apex_dqn/__init__.py @@ -1,11 +1,9 @@ from ray.rllib.algorithms.apex_dqn.apex_dqn import ( ApexDQN, ApexDQNConfig, - APEX_DEFAULT_CONFIG, ) __all__ = [ "ApexDQN", "ApexDQNConfig", - "APEX_DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/apex_dqn/apex_dqn.py b/rllib/algorithms/apex_dqn/apex_dqn.py index 5eea96eccc8e9..498113fef90ef 100644 --- a/rllib/algorithms/apex_dqn/apex_dqn.py +++ b/rllib/algorithms/apex_dqn/apex_dqn.py @@ -29,7 +29,7 @@ from ray.rllib.utils.actor_manager import FaultTolerantActorManager from ray.rllib.utils.actors import create_colocated_actors from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import DEPRECATED_VALUE, Deprecated +from ray.rllib.utils.deprecation import DEPRECATED_VALUE from ray.rllib.utils.metrics import ( LAST_TARGET_UPDATE_TS, NUM_AGENT_STEPS_SAMPLED, @@ -753,20 +753,3 @@ def default_resource_request( ), strategy=cf.placement_strategy, ) - - -# Deprecated: Use ray.rllib.algorithms.apex_dqn.ApexDQNConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(ApexDQNConfig().to_dict()) - - @Deprecated( - old="ray.rllib.agents.dqn.apex.APEX_DEFAULT_CONFIG", - new="ray.rllib.algorithms.apex_dqn.apex_dqn.ApexDQNConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -APEX_DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/appo/appo_tf_policy.py b/rllib/algorithms/appo/appo_tf_policy.py index 1bf98b7220d62..9519cf28cfb83 100644 --- a/rllib/algorithms/appo/appo_tf_policy.py +++ b/rllib/algorithms/appo/appo_tf_policy.py @@ -10,7 +10,6 @@ import gymnasium as gym from typing import Dict, List, Optional, Type, Union -import ray from ray.rllib.algorithms.appo.utils import make_appo_models from ray.rllib.algorithms.impala import vtrace_tf as vtrace from ray.rllib.algorithms.impala.impala_tf_policy import ( diff --git a/rllib/algorithms/bandit/bandit_torch_policy.py b/rllib/algorithms/bandit/bandit_torch_policy.py index 4f50104ddf7c8..f3bfa87144028 100644 --- a/rllib/algorithms/bandit/bandit_torch_policy.py +++ b/rllib/algorithms/bandit/bandit_torch_policy.py @@ -2,7 +2,6 @@ import time from gymnasium import spaces -import ray from ray.rllib.algorithms.bandit.bandit_torch_model import ( DiscreteLinearModelThompsonSampling, DiscreteLinearModelUCB, diff --git a/rllib/algorithms/bc/__init__.py b/rllib/algorithms/bc/__init__.py index 5f2a3ed086e8b..d746e04e508f8 100644 --- a/rllib/algorithms/bc/__init__.py +++ b/rllib/algorithms/bc/__init__.py @@ -1,8 +1,6 @@ -from ray.rllib.algorithms.bc.bc import BCConfig, BC, BC_DEFAULT_CONFIG +from ray.rllib.algorithms.bc.bc import BCConfig, BC __all__ = [ "BCConfig", "BC", - # Deprecated. - "BC_DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/bc/bc.py b/rllib/algorithms/bc/bc.py index 905c2813fc80b..97e546b5a62cf 100644 --- a/rllib/algorithms/bc/bc.py +++ b/rllib/algorithms/bc/bc.py @@ -1,7 +1,6 @@ from ray.rllib.algorithms.algorithm_config import AlgorithmConfig from ray.rllib.algorithms.marwil.marwil import MARWIL, MARWILConfig from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated class BCConfig(MARWILConfig): @@ -74,20 +73,3 @@ class BC(MARWIL): @override(MARWIL) def get_default_config(cls) -> AlgorithmConfig: return BCConfig() - - -# Deprecated: Use ray.rllib.algorithms.bc.BCConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(BCConfig().to_dict()) - - @Deprecated( - old="ray.rllib.agents.marwil.bc::DEFAULT_CONFIG", - new="ray.rllib.algorithms.bc.bc::BCConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -BC_DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/cql/cql.py b/rllib/algorithms/cql/cql.py index 262be7c17c18c..c4aeaae91aad5 100644 --- a/rllib/algorithms/cql/cql.py +++ b/rllib/algorithms/cql/cql.py @@ -20,7 +20,6 @@ from ray.rllib.utils.deprecation import ( DEPRECATED_VALUE, deprecation_warning, - Deprecated, ) from ray.rllib.utils.framework import try_import_tf, try_import_tfp from ray.rllib.utils.metrics import ( @@ -213,20 +212,3 @@ def training_step(self) -> ResultDict: # Return all collected metrics for the iteration. return train_results - - -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(CQLConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.cql.cql::DEFAULT_CONFIG", - new="ray.rllib.algorithms.cql.cql::CQLConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() -CQL_DEFAULT_CONFIG = DEFAULT_CONFIG diff --git a/rllib/algorithms/ddpg/ddpg_tf_policy.py b/rllib/algorithms/ddpg/ddpg_tf_policy.py index 5db1728ad9546..7a744a8835c84 100644 --- a/rllib/algorithms/ddpg/ddpg_tf_policy.py +++ b/rllib/algorithms/ddpg/ddpg_tf_policy.py @@ -3,8 +3,6 @@ import gymnasium as gym from typing import Dict, Tuple, List, Type, Union, Optional, Any -import ray -import ray.experimental.tf_utils from ray.rllib.algorithms.ddpg.utils import make_ddpg_models, validate_spaces from ray.rllib.algorithms.dqn.dqn_tf_policy import ( postprocess_nstep_and_prio, diff --git a/rllib/algorithms/dreamer/dreamer_torch_policy.py b/rllib/algorithms/dreamer/dreamer_torch_policy.py index 0ae11b4673529..3039194b0feda 100644 --- a/rllib/algorithms/dreamer/dreamer_torch_policy.py +++ b/rllib/algorithms/dreamer/dreamer_torch_policy.py @@ -5,7 +5,6 @@ ) import logging -import ray import numpy as np from typing import Dict, Optional diff --git a/rllib/algorithms/impala/impala_tf_policy.py b/rllib/algorithms/impala/impala_tf_policy.py index 415434ee6b399..57b83aa37cc45 100644 --- a/rllib/algorithms/impala/impala_tf_policy.py +++ b/rllib/algorithms/impala/impala_tf_policy.py @@ -7,7 +7,6 @@ import gymnasium as gym from typing import Dict, List, Type, Union -import ray from ray.rllib.algorithms.impala import vtrace_tf as vtrace from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.tf.tf_action_dist import Categorical, TFActionDistribution diff --git a/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py b/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py index efab8a5fad4fc..ce8d4f33026be 100644 --- a/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py +++ b/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py @@ -1,7 +1,6 @@ import logging from typing import Dict, List, Union -import ray from ray.rllib.algorithms.ppo.ppo_tf_policy import validate_config from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.policy.sample_batch import SampleBatch diff --git a/rllib/algorithms/maddpg/maddpg_tf_policy.py b/rllib/algorithms/maddpg/maddpg_tf_policy.py index 5cbe8d6da5fe8..fa3c911b4ebcc 100644 --- a/rllib/algorithms/maddpg/maddpg_tf_policy.py +++ b/rllib/algorithms/maddpg/maddpg_tf_policy.py @@ -1,4 +1,3 @@ -import ray from ray.rllib.algorithms.dqn.dqn_tf_policy import minimize_and_clip from ray.rllib.evaluation.postprocessing import adjust_nstep from ray.rllib.models import ModelCatalog diff --git a/rllib/algorithms/maml/maml_tf_policy.py b/rllib/algorithms/maml/maml_tf_policy.py index b77d51968957b..3e8aba4782918 100644 --- a/rllib/algorithms/maml/maml_tf_policy.py +++ b/rllib/algorithms/maml/maml_tf_policy.py @@ -1,7 +1,6 @@ import logging from typing import Dict, List, Type, Union -import ray from ray.rllib.algorithms.ppo.ppo_tf_policy import validate_config from ray.rllib.evaluation.postprocessing import ( Postprocessing, diff --git a/rllib/algorithms/marwil/marwil_tf_policy.py b/rllib/algorithms/marwil/marwil_tf_policy.py index 422a0dd4f7cbf..af8e07c63650b 100644 --- a/rllib/algorithms/marwil/marwil_tf_policy.py +++ b/rllib/algorithms/marwil/marwil_tf_policy.py @@ -1,7 +1,6 @@ import logging from typing import Any, Dict, List, Optional, Type, Union -import ray from ray.rllib.evaluation.episode import Episode from ray.rllib.evaluation.postprocessing import compute_advantages, Postprocessing from ray.rllib.models.action_dist import ActionDistribution diff --git a/rllib/algorithms/marwil/marwil_torch_policy.py b/rllib/algorithms/marwil/marwil_torch_policy.py index 7578ac8abba27..219a0b176d918 100644 --- a/rllib/algorithms/marwil/marwil_torch_policy.py +++ b/rllib/algorithms/marwil/marwil_torch_policy.py @@ -1,6 +1,5 @@ from typing import Dict, List, Type, Union -import ray from ray.rllib.algorithms.marwil.marwil_tf_policy import PostprocessAdvantages from ray.rllib.evaluation.postprocessing import Postprocessing from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/algorithms/mbmpo/mbmpo_torch_policy.py b/rllib/algorithms/mbmpo/mbmpo_torch_policy.py index 821248f672942..112e65cd24609 100644 --- a/rllib/algorithms/mbmpo/mbmpo_torch_policy.py +++ b/rllib/algorithms/mbmpo/mbmpo_torch_policy.py @@ -2,7 +2,6 @@ import logging from typing import Tuple, Type -import ray from ray.rllib.algorithms.maml.maml_torch_policy import MAMLTorchPolicy from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/algorithms/ppo/ppo_tf_policy.py b/rllib/algorithms/ppo/ppo_tf_policy.py index 8319b4d9e0a22..a00f8c037eb6e 100644 --- a/rllib/algorithms/ppo/ppo_tf_policy.py +++ b/rllib/algorithms/ppo/ppo_tf_policy.py @@ -5,7 +5,6 @@ import logging from typing import Dict, List, Type, Union -import ray from ray.rllib.evaluation.postprocessing import ( Postprocessing, compute_gae_for_sample_batch, diff --git a/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py b/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py index b1eebfb1b2e32..b5e759ebab0d0 100644 --- a/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py +++ b/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py @@ -1,7 +1,6 @@ import logging from typing import Dict, List, Union -import ray from ray.rllib.algorithms.ppo.ppo_tf_policy import validate_config from ray.rllib.evaluation.postprocessing import ( Postprocessing, diff --git a/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py b/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py index 9aea0cfce358c..3e8495967ec78 100644 --- a/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py +++ b/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py @@ -1,7 +1,6 @@ import logging from typing import Dict, List, Type, Union -import ray from ray.rllib.algorithms.ppo.ppo_tf_policy import validate_config from ray.rllib.evaluation.postprocessing import ( Postprocessing, diff --git a/rllib/algorithms/qmix/qmix_policy.py b/rllib/algorithms/qmix/qmix_policy.py index edc4af906cec7..3d549c9f61d5f 100644 --- a/rllib/algorithms/qmix/qmix_policy.py +++ b/rllib/algorithms/qmix/qmix_policy.py @@ -4,7 +4,6 @@ import tree # pip install dm_tree from typing import Dict, List, Optional, Tuple -import ray from ray.rllib.algorithms.qmix.mixers import VDNMixer, QMixer from ray.rllib.algorithms.qmix.model import RNNModel, _get_size from ray.rllib.env.multi_agent_env import ENV_STATE diff --git a/rllib/algorithms/r2d2/__init__.py b/rllib/algorithms/r2d2/__init__.py index d19bdec5affca..a5b6044d35f0e 100644 --- a/rllib/algorithms/r2d2/__init__.py +++ b/rllib/algorithms/r2d2/__init__.py @@ -1,4 +1,4 @@ -from ray.rllib.algorithms.r2d2.r2d2 import R2D2, R2D2Config, R2D2_DEFAULT_CONFIG +from ray.rllib.algorithms.r2d2.r2d2 import R2D2, R2D2Config from ray.rllib.algorithms.r2d2.r2d2_tf_policy import R2D2TFPolicy from ray.rllib.algorithms.r2d2.r2d2_torch_policy import R2D2TorchPolicy @@ -7,5 +7,4 @@ "R2D2Config", "R2D2TFPolicy", "R2D2TorchPolicy", - "R2D2_DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/r2d2/r2d2.py b/rllib/algorithms/r2d2/r2d2.py index a67d0071f301e..545f9469c47ae 100644 --- a/rllib/algorithms/r2d2/r2d2.py +++ b/rllib/algorithms/r2d2/r2d2.py @@ -7,7 +7,6 @@ from ray.rllib.algorithms.r2d2.r2d2_torch_policy import R2D2TorchPolicy from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.deprecation import DEPRECATED_VALUE logger = logging.getLogger(__name__) @@ -221,20 +220,3 @@ def get_default_policy_class( return R2D2TorchPolicy else: return R2D2TFPolicy - - -# Deprecated: Use ray.rllib.algorithms.r2d2.r2d2.R2D2Config instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(R2D2Config().to_dict()) - - @Deprecated( - old="ray.rllib.agents.dqn.r2d2::R2D2_DEFAULT_CONFIG", - new="ray.rllib.algorithms.r2d2.r2d2::R2D2Config(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -R2D2_DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/r2d2/r2d2_tf_policy.py b/rllib/algorithms/r2d2/r2d2_tf_policy.py index 0c1b22ec397fc..299513bd43b11 100644 --- a/rllib/algorithms/r2d2/r2d2_tf_policy.py +++ b/rllib/algorithms/r2d2/r2d2_tf_policy.py @@ -333,7 +333,7 @@ def setup_late_mixins( R2D2TFPolicy = build_tf_policy( name="R2D2TFPolicy", loss_fn=r2d2_loss, - get_default_config=lambda: ray.rllib.algorithms.r2d2.r2d2.R2D2_DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.r2d2.r2d2.R2D2Config(), postprocess_fn=postprocess_nstep_and_prio, stats_fn=build_q_stats, make_model=build_r2d2_model, diff --git a/rllib/algorithms/r2d2/r2d2_torch_policy.py b/rllib/algorithms/r2d2/r2d2_torch_policy.py index abfcc8c9f0dd9..0e4f6cc4ac413 100644 --- a/rllib/algorithms/r2d2/r2d2_torch_policy.py +++ b/rllib/algorithms/r2d2/r2d2_torch_policy.py @@ -314,7 +314,7 @@ def extra_action_out_fn( name="R2D2TorchPolicy", framework="torch", loss_fn=r2d2_loss, - get_default_config=lambda: ray.rllib.algorithms.r2d2.r2d2.R2D2_DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.r2d2.r2d2.R2D2Config(), make_model_and_action_dist=build_r2d2_model_and_distribution, action_distribution_fn=get_distribution_inputs_and_class, stats_fn=build_q_stats, diff --git a/rllib/algorithms/sac/__init__.py b/rllib/algorithms/sac/__init__.py index 77addbb84554e..222b4429b63b5 100644 --- a/rllib/algorithms/sac/__init__.py +++ b/rllib/algorithms/sac/__init__.py @@ -1,11 +1,8 @@ -from ray.rllib.algorithms.sac.sac import SAC, DEFAULT_CONFIG, SACConfig +from ray.rllib.algorithms.sac.sac import SAC, SACConfig from ray.rllib.algorithms.sac.sac_tf_policy import SACTFPolicy from ray.rllib.algorithms.sac.sac_torch_policy import SACTorchPolicy -from ray.rllib.algorithms.sac.rnnsac import ( - RNNSAC, - DEFAULT_CONFIG as RNNSAC_DEFAULT_CONFIG, -) +from ray.rllib.algorithms.sac.rnnsac import RNNSAC from ray.rllib.algorithms.sac.rnnsac import RNNSACTorchPolicy, RNNSACConfig __all__ = [ @@ -16,7 +13,4 @@ "RNNSACTorchPolicy", "RNNSAC", "RNNSACConfig", - # Deprecated. - "DEFAULT_CONFIG", - "RNNSAC_DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/simple_q/simple_q_tf_policy.py b/rllib/algorithms/simple_q/simple_q_tf_policy.py index 11735443a664c..e892d5ebc959f 100644 --- a/rllib/algorithms/simple_q/simple_q_tf_policy.py +++ b/rllib/algorithms/simple_q/simple_q_tf_policy.py @@ -3,7 +3,6 @@ import logging from typing import Dict, List, Tuple, Type, Union -import ray from ray.rllib.algorithms.simple_q.utils import make_q_models from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.tf.tf_action_dist import Categorical, TFActionDistribution diff --git a/rllib/algorithms/simple_q/simple_q_torch_policy.py b/rllib/algorithms/simple_q/simple_q_torch_policy.py index d8034ccdea9fa..091d346f8344c 100644 --- a/rllib/algorithms/simple_q/simple_q_torch_policy.py +++ b/rllib/algorithms/simple_q/simple_q_torch_policy.py @@ -3,7 +3,6 @@ import logging from typing import Any, Dict, List, Tuple, Type, Union -import ray from ray.rllib.algorithms.simple_q.utils import make_q_models from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.torch.torch_action_dist import ( diff --git a/rllib/algorithms/td3/__init__.py b/rllib/algorithms/td3/__init__.py index 12884c091036a..e17240ba6b511 100644 --- a/rllib/algorithms/td3/__init__.py +++ b/rllib/algorithms/td3/__init__.py @@ -1,7 +1,6 @@ -from ray.rllib.algorithms.td3.td3 import TD3, TD3Config, TD3_DEFAULT_CONFIG +from ray.rllib.algorithms.td3.td3 import TD3, TD3Config __all__ = [ "TD3", "TD3Config", - "TD3_DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/td3/td3.py b/rllib/algorithms/td3/td3.py index 2319b3502ddcf..bc8fe25d81434 100644 --- a/rllib/algorithms/td3/td3.py +++ b/rllib/algorithms/td3/td3.py @@ -6,7 +6,6 @@ from ray.rllib.algorithms.algorithm_config import AlgorithmConfig from ray.rllib.algorithms.ddpg.ddpg import DDPG, DDPGConfig from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.deprecation import DEPRECATED_VALUE @@ -107,20 +106,3 @@ class TD3(DDPG): @override(DDPG) def get_default_config(cls) -> AlgorithmConfig: return TD3Config() - - -# Deprecated: Use ray.rllib.algorithms.ddpg..td3.TD3Config instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(TD3Config().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.ddpg.td3::TD3_DEFAULT_CONFIG", - new="ray.rllib.algorithms.td3.td3::TD3Config(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -TD3_DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py index 968c3d5ebf065..fef25bf1e5aa5 100644 --- a/rllib/policy/eager_tf_policy.py +++ b/rllib/policy/eager_tf_policy.py @@ -10,7 +10,6 @@ import tree # pip install dm_tree -from ray.rllib.algorithms.algorithm import AlgorithmConfig from ray.rllib.evaluation.episode import Episode from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.repeated_values import RepeatedValues diff --git a/rllib/policy/policy_template.py b/rllib/policy/policy_template.py index 0e01aa8b88cca..d9f9aff306e5f 100644 --- a/rllib/policy/policy_template.py +++ b/rllib/policy/policy_template.py @@ -12,7 +12,6 @@ import gymnasium as gym -from ray.rllib.algorithms.algorithm import AlgorithmConfig from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.jax.jax_modelv2 import JAXModelV2 from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/policy/tf_policy_template.py b/rllib/policy/tf_policy_template.py index 02359f0c9cd1a..4658cae6021a6 100644 --- a/rllib/policy/tf_policy_template.py +++ b/rllib/policy/tf_policy_template.py @@ -9,7 +9,6 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import TFPolicy from ray.rllib.utils import add_mixins, force_list -from ray.rllib.algorithms.algorithm import AlgorithmConfig from ray.rllib.utils.annotations import override, DeveloperAPI from ray.rllib.utils.deprecation import deprecation_warning, DEPRECATED_VALUE from ray.rllib.utils.framework import try_import_tf diff --git a/rllib/tests/test_gpus.py b/rllib/tests/test_gpus.py index 984808d4b9061..e77c8cfdd9bd0 100644 --- a/rllib/tests/test_gpus.py +++ b/rllib/tests/test_gpus.py @@ -2,7 +2,7 @@ import ray from ray import air -from ray.rllib.algorithms.a2c.a2c import A2C, A2C_DEFAULT_CONFIG +from ray.rllib.algorithms.a2c.a2c import A2C, A2CConfig from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.test_utils import framework_iterator from ray import tune @@ -18,7 +18,7 @@ def test_gpus_in_non_local_mode(self): actual_gpus = torch.cuda.device_count() print(f"Actual GPUs found (by torch): {actual_gpus}") - config = A2C_DEFAULT_CONFIG.copy() + config = A2CConfig() config["num_workers"] = 2 config["env"] = "CartPole-v1" @@ -88,7 +88,7 @@ def test_gpus_in_local_mode(self): actual_gpus_available = torch.cuda.device_count() - config = A2C_DEFAULT_CONFIG.copy() + config = A2CConfig() config["num_workers"] = 2 config["env"] = "CartPole-v1" From f32b5eb852aeff45b0de31f184b95dd3bccecf42 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Sun, 12 Mar 2023 18:01:30 -0700 Subject: [PATCH 03/11] lint Signed-off-by: Artur Niederfahrenhorst --- rllib/algorithms/a3c/a3c_tf_policy.py | 1 - rllib/algorithms/a3c/a3c_torch_policy.py | 1 - rllib/algorithms/appo/appo_tf_policy.py | 1 - rllib/algorithms/bandit/bandit_torch_policy.py | 1 - rllib/algorithms/ddpg/ddpg_tf_policy.py | 2 -- rllib/algorithms/dreamer/dreamer_torch_policy.py | 1 - rllib/algorithms/impala/impala_tf_policy.py | 1 - rllib/algorithms/impala/tf/impala_tf_policy_rlm.py | 1 - rllib/algorithms/maddpg/maddpg_tf_policy.py | 1 - rllib/algorithms/maml/maml_tf_policy.py | 1 - rllib/algorithms/marwil/marwil_tf_policy.py | 1 - rllib/algorithms/marwil/marwil_torch_policy.py | 1 - rllib/algorithms/mbmpo/mbmpo_torch_policy.py | 1 - rllib/algorithms/ppo/ppo_tf_policy.py | 1 - rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py | 1 - rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py | 1 - rllib/algorithms/qmix/qmix_policy.py | 1 - rllib/algorithms/simple_q/simple_q_tf_policy.py | 1 - rllib/algorithms/simple_q/simple_q_torch_policy.py | 1 - rllib/policy/eager_tf_policy.py | 1 - rllib/policy/policy_template.py | 1 - rllib/policy/tf_policy_template.py | 1 - 22 files changed, 23 deletions(-) diff --git a/rllib/algorithms/a3c/a3c_tf_policy.py b/rllib/algorithms/a3c/a3c_tf_policy.py index a85a73e25bf60..d3c5649478f96 100644 --- a/rllib/algorithms/a3c/a3c_tf_policy.py +++ b/rllib/algorithms/a3c/a3c_tf_policy.py @@ -1,7 +1,6 @@ """Note: Keep in sync with changes to VTraceTFPolicy.""" from typing import Dict, List, Optional, Type, Union -import ray from ray.rllib.evaluation.episode import Episode from ray.rllib.evaluation.postprocessing import ( compute_gae_for_sample_batch, diff --git a/rllib/algorithms/a3c/a3c_torch_policy.py b/rllib/algorithms/a3c/a3c_torch_policy.py index 81836e765e9e8..d94a07273e6cd 100644 --- a/rllib/algorithms/a3c/a3c_torch_policy.py +++ b/rllib/algorithms/a3c/a3c_torch_policy.py @@ -1,6 +1,5 @@ from typing import Dict, List, Optional, Type, Union -import ray from ray.rllib.evaluation.episode import Episode from ray.rllib.evaluation.postprocessing import ( compute_gae_for_sample_batch, diff --git a/rllib/algorithms/appo/appo_tf_policy.py b/rllib/algorithms/appo/appo_tf_policy.py index 1bf98b7220d62..9519cf28cfb83 100644 --- a/rllib/algorithms/appo/appo_tf_policy.py +++ b/rllib/algorithms/appo/appo_tf_policy.py @@ -10,7 +10,6 @@ import gymnasium as gym from typing import Dict, List, Optional, Type, Union -import ray from ray.rllib.algorithms.appo.utils import make_appo_models from ray.rllib.algorithms.impala import vtrace_tf as vtrace from ray.rllib.algorithms.impala.impala_tf_policy import ( diff --git a/rllib/algorithms/bandit/bandit_torch_policy.py b/rllib/algorithms/bandit/bandit_torch_policy.py index 4f50104ddf7c8..f3bfa87144028 100644 --- a/rllib/algorithms/bandit/bandit_torch_policy.py +++ b/rllib/algorithms/bandit/bandit_torch_policy.py @@ -2,7 +2,6 @@ import time from gymnasium import spaces -import ray from ray.rllib.algorithms.bandit.bandit_torch_model import ( DiscreteLinearModelThompsonSampling, DiscreteLinearModelUCB, diff --git a/rllib/algorithms/ddpg/ddpg_tf_policy.py b/rllib/algorithms/ddpg/ddpg_tf_policy.py index 5db1728ad9546..7a744a8835c84 100644 --- a/rllib/algorithms/ddpg/ddpg_tf_policy.py +++ b/rllib/algorithms/ddpg/ddpg_tf_policy.py @@ -3,8 +3,6 @@ import gymnasium as gym from typing import Dict, Tuple, List, Type, Union, Optional, Any -import ray -import ray.experimental.tf_utils from ray.rllib.algorithms.ddpg.utils import make_ddpg_models, validate_spaces from ray.rllib.algorithms.dqn.dqn_tf_policy import ( postprocess_nstep_and_prio, diff --git a/rllib/algorithms/dreamer/dreamer_torch_policy.py b/rllib/algorithms/dreamer/dreamer_torch_policy.py index 0ae11b4673529..3039194b0feda 100644 --- a/rllib/algorithms/dreamer/dreamer_torch_policy.py +++ b/rllib/algorithms/dreamer/dreamer_torch_policy.py @@ -5,7 +5,6 @@ ) import logging -import ray import numpy as np from typing import Dict, Optional diff --git a/rllib/algorithms/impala/impala_tf_policy.py b/rllib/algorithms/impala/impala_tf_policy.py index 415434ee6b399..57b83aa37cc45 100644 --- a/rllib/algorithms/impala/impala_tf_policy.py +++ b/rllib/algorithms/impala/impala_tf_policy.py @@ -7,7 +7,6 @@ import gymnasium as gym from typing import Dict, List, Type, Union -import ray from ray.rllib.algorithms.impala import vtrace_tf as vtrace from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.tf.tf_action_dist import Categorical, TFActionDistribution diff --git a/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py b/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py index efab8a5fad4fc..ce8d4f33026be 100644 --- a/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py +++ b/rllib/algorithms/impala/tf/impala_tf_policy_rlm.py @@ -1,7 +1,6 @@ import logging from typing import Dict, List, Union -import ray from ray.rllib.algorithms.ppo.ppo_tf_policy import validate_config from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.policy.sample_batch import SampleBatch diff --git a/rllib/algorithms/maddpg/maddpg_tf_policy.py b/rllib/algorithms/maddpg/maddpg_tf_policy.py index 5cbe8d6da5fe8..fa3c911b4ebcc 100644 --- a/rllib/algorithms/maddpg/maddpg_tf_policy.py +++ b/rllib/algorithms/maddpg/maddpg_tf_policy.py @@ -1,4 +1,3 @@ -import ray from ray.rllib.algorithms.dqn.dqn_tf_policy import minimize_and_clip from ray.rllib.evaluation.postprocessing import adjust_nstep from ray.rllib.models import ModelCatalog diff --git a/rllib/algorithms/maml/maml_tf_policy.py b/rllib/algorithms/maml/maml_tf_policy.py index b77d51968957b..3e8aba4782918 100644 --- a/rllib/algorithms/maml/maml_tf_policy.py +++ b/rllib/algorithms/maml/maml_tf_policy.py @@ -1,7 +1,6 @@ import logging from typing import Dict, List, Type, Union -import ray from ray.rllib.algorithms.ppo.ppo_tf_policy import validate_config from ray.rllib.evaluation.postprocessing import ( Postprocessing, diff --git a/rllib/algorithms/marwil/marwil_tf_policy.py b/rllib/algorithms/marwil/marwil_tf_policy.py index 422a0dd4f7cbf..af8e07c63650b 100644 --- a/rllib/algorithms/marwil/marwil_tf_policy.py +++ b/rllib/algorithms/marwil/marwil_tf_policy.py @@ -1,7 +1,6 @@ import logging from typing import Any, Dict, List, Optional, Type, Union -import ray from ray.rllib.evaluation.episode import Episode from ray.rllib.evaluation.postprocessing import compute_advantages, Postprocessing from ray.rllib.models.action_dist import ActionDistribution diff --git a/rllib/algorithms/marwil/marwil_torch_policy.py b/rllib/algorithms/marwil/marwil_torch_policy.py index 7578ac8abba27..219a0b176d918 100644 --- a/rllib/algorithms/marwil/marwil_torch_policy.py +++ b/rllib/algorithms/marwil/marwil_torch_policy.py @@ -1,6 +1,5 @@ from typing import Dict, List, Type, Union -import ray from ray.rllib.algorithms.marwil.marwil_tf_policy import PostprocessAdvantages from ray.rllib.evaluation.postprocessing import Postprocessing from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/algorithms/mbmpo/mbmpo_torch_policy.py b/rllib/algorithms/mbmpo/mbmpo_torch_policy.py index 821248f672942..112e65cd24609 100644 --- a/rllib/algorithms/mbmpo/mbmpo_torch_policy.py +++ b/rllib/algorithms/mbmpo/mbmpo_torch_policy.py @@ -2,7 +2,6 @@ import logging from typing import Tuple, Type -import ray from ray.rllib.algorithms.maml.maml_torch_policy import MAMLTorchPolicy from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/algorithms/ppo/ppo_tf_policy.py b/rllib/algorithms/ppo/ppo_tf_policy.py index 8319b4d9e0a22..a00f8c037eb6e 100644 --- a/rllib/algorithms/ppo/ppo_tf_policy.py +++ b/rllib/algorithms/ppo/ppo_tf_policy.py @@ -5,7 +5,6 @@ import logging from typing import Dict, List, Type, Union -import ray from ray.rllib.evaluation.postprocessing import ( Postprocessing, compute_gae_for_sample_batch, diff --git a/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py b/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py index b1eebfb1b2e32..b5e759ebab0d0 100644 --- a/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py +++ b/rllib/algorithms/ppo/tf/ppo_tf_policy_rlm.py @@ -1,7 +1,6 @@ import logging from typing import Dict, List, Union -import ray from ray.rllib.algorithms.ppo.ppo_tf_policy import validate_config from ray.rllib.evaluation.postprocessing import ( Postprocessing, diff --git a/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py b/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py index 9aea0cfce358c..3e8495967ec78 100644 --- a/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py +++ b/rllib/algorithms/ppo/torch/ppo_torch_policy_rlm.py @@ -1,7 +1,6 @@ import logging from typing import Dict, List, Type, Union -import ray from ray.rllib.algorithms.ppo.ppo_tf_policy import validate_config from ray.rllib.evaluation.postprocessing import ( Postprocessing, diff --git a/rllib/algorithms/qmix/qmix_policy.py b/rllib/algorithms/qmix/qmix_policy.py index edc4af906cec7..3d549c9f61d5f 100644 --- a/rllib/algorithms/qmix/qmix_policy.py +++ b/rllib/algorithms/qmix/qmix_policy.py @@ -4,7 +4,6 @@ import tree # pip install dm_tree from typing import Dict, List, Optional, Tuple -import ray from ray.rllib.algorithms.qmix.mixers import VDNMixer, QMixer from ray.rllib.algorithms.qmix.model import RNNModel, _get_size from ray.rllib.env.multi_agent_env import ENV_STATE diff --git a/rllib/algorithms/simple_q/simple_q_tf_policy.py b/rllib/algorithms/simple_q/simple_q_tf_policy.py index 11735443a664c..e892d5ebc959f 100644 --- a/rllib/algorithms/simple_q/simple_q_tf_policy.py +++ b/rllib/algorithms/simple_q/simple_q_tf_policy.py @@ -3,7 +3,6 @@ import logging from typing import Dict, List, Tuple, Type, Union -import ray from ray.rllib.algorithms.simple_q.utils import make_q_models from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.tf.tf_action_dist import Categorical, TFActionDistribution diff --git a/rllib/algorithms/simple_q/simple_q_torch_policy.py b/rllib/algorithms/simple_q/simple_q_torch_policy.py index d8034ccdea9fa..091d346f8344c 100644 --- a/rllib/algorithms/simple_q/simple_q_torch_policy.py +++ b/rllib/algorithms/simple_q/simple_q_torch_policy.py @@ -3,7 +3,6 @@ import logging from typing import Any, Dict, List, Tuple, Type, Union -import ray from ray.rllib.algorithms.simple_q.utils import make_q_models from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.torch.torch_action_dist import ( diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py index 968c3d5ebf065..fef25bf1e5aa5 100644 --- a/rllib/policy/eager_tf_policy.py +++ b/rllib/policy/eager_tf_policy.py @@ -10,7 +10,6 @@ import tree # pip install dm_tree -from ray.rllib.algorithms.algorithm import AlgorithmConfig from ray.rllib.evaluation.episode import Episode from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.repeated_values import RepeatedValues diff --git a/rllib/policy/policy_template.py b/rllib/policy/policy_template.py index 0e01aa8b88cca..d9f9aff306e5f 100644 --- a/rllib/policy/policy_template.py +++ b/rllib/policy/policy_template.py @@ -12,7 +12,6 @@ import gymnasium as gym -from ray.rllib.algorithms.algorithm import AlgorithmConfig from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.jax.jax_modelv2 import JAXModelV2 from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/policy/tf_policy_template.py b/rllib/policy/tf_policy_template.py index 02359f0c9cd1a..4658cae6021a6 100644 --- a/rllib/policy/tf_policy_template.py +++ b/rllib/policy/tf_policy_template.py @@ -9,7 +9,6 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import TFPolicy from ray.rllib.utils import add_mixins, force_list -from ray.rllib.algorithms.algorithm import AlgorithmConfig from ray.rllib.utils.annotations import override, DeveloperAPI from ray.rllib.utils.deprecation import deprecation_warning, DEPRECATED_VALUE from ray.rllib.utils.framework import try_import_tf From 8c4f99fa02a88e9149b7daf18324d30a6b859e59 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Sun, 12 Mar 2023 18:27:19 -0700 Subject: [PATCH 04/11] Delete more occurences of legacy DEFAULT_CONFIG objects Signed-off-by: Artur Niederfahrenhorst --- rllib/algorithms/a3c/__init__.py | 4 ++-- rllib/algorithms/a3c/a3c.py | 18 ----------------- rllib/algorithms/algorithm_config.py | 6 +++--- rllib/algorithms/alpha_star/__init__.py | 2 -- rllib/algorithms/alpha_star/alpha_star.py | 18 ----------------- rllib/algorithms/alpha_zero/__init__.py | 2 -- rllib/algorithms/alpha_zero/alpha_zero.py | 19 +----------------- rllib/algorithms/appo/__init__.py | 3 +-- rllib/algorithms/appo/appo.py | 18 ----------------- rllib/algorithms/ars/__init__.py | 3 +-- rllib/algorithms/ars/ars.py | 18 ----------------- rllib/algorithms/ars/ars_torch_policy.py | 2 +- rllib/algorithms/bandit/bandit.py | 18 ----------------- rllib/algorithms/bandit/bandit_tf_policy.py | 2 +- rllib/algorithms/cql/__init__.py | 3 +-- rllib/algorithms/cql/cql_tf_policy.py | 2 +- rllib/algorithms/cql/cql_torch_policy.py | 2 +- rllib/algorithms/ddpg/__init__.py | 3 +-- rllib/algorithms/ddpg/ddpg.py | 18 ----------------- rllib/algorithms/ddppo/__init__.py | 3 +-- rllib/algorithms/ddppo/ddppo.py | 18 ----------------- rllib/algorithms/dqn/__init__.py | 3 +-- rllib/algorithms/dqn/dqn.py | 17 ---------------- rllib/algorithms/dqn/dqn_tf_policy.py | 2 +- rllib/algorithms/dqn/dqn_torch_policy.py | 2 +- rllib/algorithms/dreamer/__init__.py | 2 -- rllib/algorithms/dreamer/dreamer.py | 18 ----------------- rllib/algorithms/es/__init__.py | 4 ++-- rllib/algorithms/es/es.py | 17 ---------------- rllib/algorithms/es/es_torch_policy.py | 2 +- rllib/algorithms/impala/__init__.py | 3 +-- rllib/algorithms/impala/impala.py | 18 ----------------- rllib/algorithms/maddpg/__init__.py | 3 +-- rllib/algorithms/maddpg/maddpg.py | 19 +----------------- rllib/algorithms/maml/__init__.py | 3 +-- rllib/algorithms/maml/maml.py | 19 +----------------- rllib/algorithms/maml/maml_torch_policy.py | 2 +- rllib/algorithms/marwil/__init__.py | 3 --- rllib/algorithms/marwil/marwil.py | 19 +----------------- rllib/algorithms/mbmpo/__init__.py | 3 +-- rllib/algorithms/mbmpo/mbmpo.py | 19 +----------------- rllib/algorithms/pg/__init__.py | 4 +--- rllib/algorithms/pg/pg.py | 18 ----------------- rllib/algorithms/ppo/__init__.py | 3 +-- rllib/algorithms/ppo/ppo.py | 20 +------------------ rllib/algorithms/qmix/__init__.py | 4 ++-- rllib/algorithms/qmix/qmix.py | 18 ----------------- rllib/algorithms/sac/rnnsac.py | 18 +---------------- rllib/algorithms/sac/rnnsac_torch_policy.py | 2 +- rllib/algorithms/sac/sac.py | 18 ----------------- rllib/algorithms/sac/sac_tf_policy.py | 2 +- rllib/algorithms/sac/sac_torch_policy.py | 2 +- rllib/algorithms/simple_q/__init__.py | 2 -- rllib/algorithms/simple_q/simple_q.py | 19 +----------------- rllib/algorithms/slateq/__init__.py | 2 -- rllib/algorithms/slateq/slateq.py | 19 +----------------- rllib/algorithms/slateq/slateq_tf_policy.py | 2 +- .../algorithms/slateq/slateq_torch_policy.py | 2 +- rllib/tests/test_local.py | 4 ++-- rllib/tests/test_nested_action_spaces.py | 4 ++-- rllib/tests/test_placement_groups.py | 8 ++++---- 61 files changed, 51 insertions(+), 480 deletions(-) diff --git a/rllib/algorithms/a3c/__init__.py b/rllib/algorithms/a3c/__init__.py index 415afda039c7d..e003029d470fd 100644 --- a/rllib/algorithms/a3c/__init__.py +++ b/rllib/algorithms/a3c/__init__.py @@ -1,3 +1,3 @@ -from ray.rllib.algorithms.a3c.a3c import A3CConfig, A3C, DEFAULT_CONFIG +from ray.rllib.algorithms.a3c.a3c import A3CConfig, A3C -__all__ = ["A3CConfig", "A3C", "DEFAULT_CONFIG"] +__all__ = ["A3CConfig", "A3C"] diff --git a/rllib/algorithms/a3c/a3c.py b/rllib/algorithms/a3c/a3c.py index e6a119dd10c6e..4440266fa200c 100644 --- a/rllib/algorithms/a3c/a3c.py +++ b/rllib/algorithms/a3c/a3c.py @@ -6,7 +6,6 @@ from ray.rllib.evaluation.rollout_worker import RolloutWorker from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.metrics import ( APPLY_GRADS_TIMER, GRAD_WAIT_TIMER, @@ -251,20 +250,3 @@ def sample_and_compute_grads(worker: RolloutWorker) -> Dict[str, Any]: ) return learner_info_builder.finalize() - - -# Deprecated: Use ray.rllib.algorithms.a3c.A3CConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(A3CConfig().to_dict()) - - @Deprecated( - old="ray.rllib.agents.a3c.a3c.DEFAULT_CONFIG", - new="ray.rllib.algorithms.a3c.a3c.A3CConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index ed80e72353dd2..aa75f842f9e93 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -159,9 +159,9 @@ def from_dict(cls, config_dict: dict) -> "AlgorithmConfig": """Creates an AlgorithmConfig from a legacy python config dict. Examples: - >>> from ray.rllib.algorithms.ppo.ppo import DEFAULT_CONFIG, PPOConfig - >>> ppo_config = PPOConfig.from_dict(DEFAULT_CONFIG) - >>> ppo = ppo_config.build(env="Pendulum-v1") + >>> from ray.rllib.algorithms.ppo.ppo import PPOConfig # doctest: +SKIP + >>> ppo_config = PPOConfig.from_dict({...}) # doctest: +SKIP + >>> ppo = ppo_config.build(env="Pendulum-v1") # doctest: +SKIP Args: config_dict: The legacy formatted python config dict for some algorithm. diff --git a/rllib/algorithms/alpha_star/__init__.py b/rllib/algorithms/alpha_star/__init__.py index b85e5dcf36328..53e1e3563561f 100644 --- a/rllib/algorithms/alpha_star/__init__.py +++ b/rllib/algorithms/alpha_star/__init__.py @@ -1,11 +1,9 @@ from ray.rllib.algorithms.alpha_star.alpha_star import ( AlphaStar, AlphaStarConfig, - DEFAULT_CONFIG, ) __all__ = [ "AlphaStar", "AlphaStarConfig", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/alpha_star/alpha_star.py b/rllib/algorithms/alpha_star/alpha_star.py index e829bde1900a2..02d05fcc4324b 100644 --- a/rllib/algorithms/alpha_star/alpha_star.py +++ b/rllib/algorithms/alpha_star/alpha_star.py @@ -21,7 +21,6 @@ from ray.rllib.policy.sample_batch import MultiAgentBatch from ray.rllib.utils import deep_update from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.from_config import from_config from ray.rllib.utils.metrics import ( LAST_TARGET_UPDATE_TS, @@ -635,20 +634,3 @@ def __setstate__(self, state: dict) -> None: state_copy = state.copy() self.league_builder.__setstate__(state.pop("league_builder", {})) super().__setstate__(state_copy) - - -# Deprecated: Use ray.rllib.algorithms.alpha_star.AlphaStarConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(AlphaStarConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.alpha_star.alpha_star.DEFAULT_CONFIG", - new="ray.rllib.algorithms.alpha_star.alpha_star.AlphaStarConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/alpha_zero/__init__.py b/rllib/algorithms/alpha_zero/__init__.py index e334948951d4e..6fbfd3af71bbf 100644 --- a/rllib/algorithms/alpha_zero/__init__.py +++ b/rllib/algorithms/alpha_zero/__init__.py @@ -1,7 +1,6 @@ from ray.rllib.algorithms.alpha_zero.alpha_zero import ( AlphaZero, AlphaZeroConfig, - DEFAULT_CONFIG, ) from ray.rllib.algorithms.alpha_zero.alpha_zero_policy import AlphaZeroPolicy @@ -9,5 +8,4 @@ "AlphaZero", "AlphaZeroConfig", "AlphaZeroPolicy", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/alpha_zero/alpha_zero.py b/rllib/algorithms/alpha_zero/alpha_zero.py index 2bec09df274e9..0119cd0af7b34 100644 --- a/rllib/algorithms/alpha_zero/alpha_zero.py +++ b/rllib/algorithms/alpha_zero/alpha_zero.py @@ -16,7 +16,7 @@ from ray.rllib.models.torch.torch_action_dist import TorchCategorical from ray.rllib.policy.policy import Policy from ray.rllib.policy.sample_batch import concat_samples -from ray.rllib.utils.annotations import Deprecated, override +from ray.rllib.utils.annotations import override from ray.rllib.utils.deprecation import DEPRECATED_VALUE from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.metrics import ( @@ -400,20 +400,3 @@ def training_step(self) -> ResultDict: # Return all collected metrics for the iteration. return train_results - - -# Deprecated: Use ray.rllib.algorithms.alpha_zero.AlphaZeroConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(AlphaZeroConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.alpha_zero.alpha_zero.DEFAULT_CONFIG", - new="ray.rllib.algorithms.alpha_zero.alpha_zero.AlphaZeroConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/appo/__init__.py b/rllib/algorithms/appo/__init__.py index cc03908e2a3dc..acefcd6c95aeb 100644 --- a/rllib/algorithms/appo/__init__.py +++ b/rllib/algorithms/appo/__init__.py @@ -1,4 +1,4 @@ -from ray.rllib.algorithms.appo.appo import APPO, APPOConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.appo.appo import APPO, APPOConfig from ray.rllib.algorithms.appo.appo_tf_policy import APPOTF1Policy, APPOTF2Policy from ray.rllib.algorithms.appo.appo_torch_policy import APPOTorchPolicy @@ -8,5 +8,4 @@ "APPOTF1Policy", "APPOTF2Policy", "APPOTorchPolicy", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/appo/appo.py b/rllib/algorithms/appo/appo.py index 46b948c83bc3e..eba5fdbb01b58 100644 --- a/rllib/algorithms/appo/appo.py +++ b/rllib/algorithms/appo/appo.py @@ -18,7 +18,6 @@ from ray.rllib.execution.common import _get_shared_metrics, STEPS_SAMPLED_COUNTER from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.metrics import ( LAST_TARGET_UPDATE_TS, NUM_AGENT_STEPS_SAMPLED, @@ -294,20 +293,3 @@ def get_default_policy_class( from ray.rllib.algorithms.appo.appo_tf_policy import APPOTF2Policy return APPOTF2Policy - - -# Deprecated: Use ray.rllib.algorithms.appo.APPOConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(APPOConfig().to_dict()) - - @Deprecated( - old="ray.rllib.agents.ppo.appo::DEFAULT_CONFIG", - new="ray.rllib.algorithms.appo.appo::APPOConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/ars/__init__.py b/rllib/algorithms/ars/__init__.py index 92997a1ce125b..bdac9d1752a65 100644 --- a/rllib/algorithms/ars/__init__.py +++ b/rllib/algorithms/ars/__init__.py @@ -1,4 +1,4 @@ -from ray.rllib.algorithms.ars.ars import ARS, ARSConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.ars.ars import ARS, ARSConfig from ray.rllib.algorithms.ars.ars_tf_policy import ARSTFPolicy from ray.rllib.algorithms.ars.ars_torch_policy import ARSTorchPolicy @@ -7,5 +7,4 @@ "ARSConfig", "ARSTFPolicy", "ARSTorchPolicy", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/ars/ars.py b/rllib/algorithms/ars/ars.py index 96c97279b9fff..1d27a13f09632 100644 --- a/rllib/algorithms/ars/ars.py +++ b/rllib/algorithms/ars/ars.py @@ -21,7 +21,6 @@ from ray.rllib.utils import FilterManager from ray.rllib.utils.actor_manager import FaultAwareApply from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.metrics import ( NUM_AGENT_STEPS_SAMPLED, NUM_AGENT_STEPS_TRAINED, @@ -605,20 +604,3 @@ def __setstate__(self, state): FilterManager.synchronize( {DEFAULT_POLICY_ID: self.policy.observation_filter}, self.workers ) - - -# Deprecated: Use ray.rllib.algorithms.ars.ARSConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(ARSConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.ars.ars.DEFAULT_CONFIG", - new="ray.rllib.algorithms.ars.ars.ARSConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/ars/ars_torch_policy.py b/rllib/algorithms/ars/ars_torch_policy.py index aae154035506f..b5d6497894f11 100644 --- a/rllib/algorithms/ars/ars_torch_policy.py +++ b/rllib/algorithms/ars/ars_torch_policy.py @@ -13,7 +13,7 @@ name="ARSTorchPolicy", framework="torch", loss_fn=None, - get_default_config=lambda: ray.rllib.algorithms.ars.ars.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.ars.ars.ARSConfig(), before_init=before_init, after_init=after_init, make_model_and_action_dist=make_model_and_action_dist, diff --git a/rllib/algorithms/bandit/bandit.py b/rllib/algorithms/bandit/bandit.py index 2129a719e2a49..b5e278bd090f2 100644 --- a/rllib/algorithms/bandit/bandit.py +++ b/rllib/algorithms/bandit/bandit.py @@ -7,7 +7,6 @@ from ray.rllib.algorithms.bandit.bandit_torch_policy import BanditTorchPolicy from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated logger = logging.getLogger(__name__) @@ -121,20 +120,3 @@ def get_default_policy_class( return BanditTFPolicy else: raise NotImplementedError("Only `framework=[torch|tf2]` supported!") - - -# Deprecated: Use ray.rllib.algorithms.bandit.BanditLinUCBConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(BanditLinUCBConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.bandit.bandit.DEFAULT_CONFIG", - new="ray.rllib.algorithms.bandit.bandit.BanditLin[UCB|TS]Config(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/bandit/bandit_tf_policy.py b/rllib/algorithms/bandit/bandit_tf_policy.py index a9fde50dc17ee..8527407ec57f3 100644 --- a/rllib/algorithms/bandit/bandit_tf_policy.py +++ b/rllib/algorithms/bandit/bandit_tf_policy.py @@ -149,7 +149,7 @@ def after_init(policy, *args): BanditTFPolicy = build_tf_policy( name="BanditTFPolicy", - get_default_config=lambda: ray.rllib.algorithms.bandit.bandit.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.bandit.bandit.BanditConfig(), validate_spaces=validate_spaces, make_model=make_model, loss_fn=None, diff --git a/rllib/algorithms/cql/__init__.py b/rllib/algorithms/cql/__init__.py index 10c7c3e7450c0..91b0cc69acd7d 100644 --- a/rllib/algorithms/cql/__init__.py +++ b/rllib/algorithms/cql/__init__.py @@ -1,9 +1,8 @@ -from ray.rllib.algorithms.cql.cql import CQL, DEFAULT_CONFIG, CQLConfig +from ray.rllib.algorithms.cql.cql import CQL, CQLConfig from ray.rllib.algorithms.cql.cql_torch_policy import CQLTorchPolicy __all__ = [ "CQL", "CQLTorchPolicy", "CQLConfig", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/cql/cql_tf_policy.py b/rllib/algorithms/cql/cql_tf_policy.py index a134ad7540d48..2aaecf01e2be0 100644 --- a/rllib/algorithms/cql/cql_tf_policy.py +++ b/rllib/algorithms/cql/cql_tf_policy.py @@ -411,7 +411,7 @@ def apply_gradients_fn(policy, optimizer, grads_and_vars): CQLTFPolicy = build_tf_policy( name="CQLTFPolicy", loss_fn=cql_loss, - get_default_config=lambda: ray.rllib.algorithms.cql.cql.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.cql.cql.CQLConfig(), validate_spaces=validate_spaces, stats_fn=cql_stats, postprocess_fn=postprocess_trajectory, diff --git a/rllib/algorithms/cql/cql_torch_policy.py b/rllib/algorithms/cql/cql_torch_policy.py index 7a6bf60e77e2e..ec8b1ab5a5e96 100644 --- a/rllib/algorithms/cql/cql_torch_policy.py +++ b/rllib/algorithms/cql/cql_torch_policy.py @@ -390,7 +390,7 @@ def apply_gradients_fn(policy, gradients): name="CQLTorchPolicy", framework="torch", loss_fn=cql_loss, - get_default_config=lambda: ray.rllib.algorithms.cql.cql.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.cql.cql.CQLConfig(), stats_fn=cql_stats, postprocess_fn=postprocess_trajectory, extra_grad_process_fn=apply_grad_clipping, diff --git a/rllib/algorithms/ddpg/__init__.py b/rllib/algorithms/ddpg/__init__.py index 04639c7c3079d..cd6e7a9c6e50e 100644 --- a/rllib/algorithms/ddpg/__init__.py +++ b/rllib/algorithms/ddpg/__init__.py @@ -1,8 +1,7 @@ -from ray.rllib.algorithms.ddpg.ddpg import DDPG, DDPGConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.ddpg.ddpg import DDPG, DDPGConfig __all__ = [ "DDPG", "DDPGConfig", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/ddpg/ddpg.py b/rllib/algorithms/ddpg/ddpg.py index fe3cbb07cc1e5..2baaa3be19227 100644 --- a/rllib/algorithms/ddpg/ddpg.py +++ b/rllib/algorithms/ddpg/ddpg.py @@ -6,7 +6,6 @@ from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override from ray.rllib.utils.deprecation import DEPRECATED_VALUE -from ray.rllib.utils.deprecation import Deprecated logger = logging.getLogger(__name__) @@ -312,20 +311,3 @@ def get_default_policy_class( from ray.rllib.algorithms.ddpg.ddpg_tf_policy import DDPGTF2Policy return DDPGTF2Policy - - -# Deprecated: Use ray.rllib.algorithms.ddpg.DDPGConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(DDPGConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.ddpg.ddpg::DEFAULT_CONFIG", - new="ray.rllib.algorithms.ddpg.ddpg.DDPGConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/ddppo/__init__.py b/rllib/algorithms/ddppo/__init__.py index b33460d78c4c4..7f50fd14cafb6 100644 --- a/rllib/algorithms/ddppo/__init__.py +++ b/rllib/algorithms/ddppo/__init__.py @@ -1,7 +1,6 @@ -from ray.rllib.algorithms.ddppo.ddppo import DDPPOConfig, DDPPO, DEFAULT_CONFIG +from ray.rllib.algorithms.ddppo.ddppo import DDPPOConfig, DDPPO __all__ = [ "DDPPOConfig", "DDPPO", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/ddppo/ddppo.py b/rllib/algorithms/ddppo/ddppo.py index f33db502db8c4..7d90ac9284701 100644 --- a/rllib/algorithms/ddppo/ddppo.py +++ b/rllib/algorithms/ddppo/ddppo.py @@ -25,7 +25,6 @@ from ray.rllib.evaluation.postprocessing import Postprocessing from ray.rllib.evaluation.rollout_worker import RolloutWorker from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.metrics import ( LEARN_ON_BATCH_TIMER, NUM_AGENT_STEPS_SAMPLED, @@ -358,20 +357,3 @@ def _sample_and_train_torch_distributed(worker: RolloutWorker): "sample_time": sample_time, "learn_on_batch_time": learn_on_batch_time, } - - -# Deprecated: Use ray.rllib.algorithms.ddppo.DDPPOConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(DDPPOConfig().to_dict()) - - @Deprecated( - old="ray.rllib.agents.ppo.ddppo::DEFAULT_CONFIG", - new="ray.rllib.algorithms.ddppo.ddppo::DDPPOConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/dqn/__init__.py b/rllib/algorithms/dqn/__init__.py index 1a1b949de1056..f6046b5850286 100644 --- a/rllib/algorithms/dqn/__init__.py +++ b/rllib/algorithms/dqn/__init__.py @@ -1,4 +1,4 @@ -from ray.rllib.algorithms.dqn.dqn import DQN, DQNConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.dqn.dqn import DQN, DQNConfig from ray.rllib.algorithms.dqn.dqn_tf_policy import DQNTFPolicy from ray.rllib.algorithms.dqn.dqn_torch_policy import DQNTorchPolicy @@ -7,5 +7,4 @@ "DQNConfig", "DQNTFPolicy", "DQNTorchPolicy", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/dqn/dqn.py b/rllib/algorithms/dqn/dqn.py index 00ece2b5790db..cef8eb27814e6 100644 --- a/rllib/algorithms/dqn/dqn.py +++ b/rllib/algorithms/dqn/dqn.py @@ -477,23 +477,6 @@ def training_step(self) -> ResultDict: return train_results -# Deprecated: Use ray.rllib.algorithms.dqn.DQNConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(DQNConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.dqn.dqn.DEFAULT_CONFIG", - new="ray.rllib.algorithms.dqn.dqn.DQNConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() - - @Deprecated(new="Sub-class directly from `DQN` and override its methods", error=True) class GenericOffPolicyTrainer(SimpleQ): pass diff --git a/rllib/algorithms/dqn/dqn_tf_policy.py b/rllib/algorithms/dqn/dqn_tf_policy.py index 70111d613e705..43a0482b13494 100644 --- a/rllib/algorithms/dqn/dqn_tf_policy.py +++ b/rllib/algorithms/dqn/dqn_tf_policy.py @@ -479,7 +479,7 @@ def postprocess_nstep_and_prio( DQNTFPolicy = build_tf_policy( name="DQNTFPolicy", - get_default_config=lambda: ray.rllib.algorithms.dqn.dqn.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.dqn.dqn.DQNConfig(), make_model=build_q_model, action_distribution_fn=get_distribution_inputs_and_class, loss_fn=build_q_losses, diff --git a/rllib/algorithms/dqn/dqn_torch_policy.py b/rllib/algorithms/dqn/dqn_torch_policy.py index ae745a39c0351..a711792b40396 100644 --- a/rllib/algorithms/dqn/dqn_torch_policy.py +++ b/rllib/algorithms/dqn/dqn_torch_policy.py @@ -487,7 +487,7 @@ def extra_action_out_fn( name="DQNTorchPolicy", framework="torch", loss_fn=build_q_losses, - get_default_config=lambda: ray.rllib.algorithms.dqn.dqn.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.dqn.dqn.DQNConfig(), make_model_and_action_dist=build_q_model_and_distribution, action_distribution_fn=get_distribution_inputs_and_class, stats_fn=build_q_stats, diff --git a/rllib/algorithms/dreamer/__init__.py b/rllib/algorithms/dreamer/__init__.py index 6b8f494367045..6e5bc9b6403e0 100644 --- a/rllib/algorithms/dreamer/__init__.py +++ b/rllib/algorithms/dreamer/__init__.py @@ -1,11 +1,9 @@ from ray.rllib.algorithms.dreamer.dreamer import ( Dreamer, DreamerConfig, - DEFAULT_CONFIG, ) __all__ = [ "Dreamer", "DreamerConfig", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/dreamer/dreamer.py b/rllib/algorithms/dreamer/dreamer.py index bb6114ed30717..63b3d2243800f 100644 --- a/rllib/algorithms/dreamer/dreamer.py +++ b/rllib/algorithms/dreamer/dreamer.py @@ -19,7 +19,6 @@ synchronous_parallel_sample, ) from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.metrics import ( NUM_AGENT_STEPS_SAMPLED, NUM_ENV_STEPS_SAMPLED, @@ -397,20 +396,3 @@ def training_step(self) -> ResultDict: self.local_replay_buffer.add(batch) return fetches - - -# Deprecated: Use ray.rllib.algorithms.dreamer.DreamerConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(DreamerConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.dreamer.dreamer.DEFAULT_CONFIG", - new="ray.rllib.algorithms.dreamer.dreamer.DreamerConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/es/__init__.py b/rllib/algorithms/es/__init__.py index fa10d62897357..3533ac47c7c50 100644 --- a/rllib/algorithms/es/__init__.py +++ b/rllib/algorithms/es/__init__.py @@ -1,5 +1,5 @@ -from ray.rllib.algorithms.es.es import ES, ESConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.es.es import ES, ESConfig from ray.rllib.algorithms.es.es_tf_policy import ESTFPolicy from ray.rllib.algorithms.es.es_torch_policy import ESTorchPolicy -__all__ = ["ES", "ESConfig", "ESTFPolicy", "ESTorchPolicy", "DEFAULT_CONFIG"] +__all__ = ["ES", "ESConfig", "ESTFPolicy", "ESTorchPolicy"] diff --git a/rllib/algorithms/es/es.py b/rllib/algorithms/es/es.py index fd6febc1144c6..18f58897746e8 100644 --- a/rllib/algorithms/es/es.py +++ b/rllib/algorithms/es/es.py @@ -605,20 +605,3 @@ def __setstate__(self, state): FilterManager.synchronize( {DEFAULT_POLICY_ID: self.policy.observation_filter}, self.workers ) - - -# Deprecated: Use ray.rllib.algorithms.es.ESConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(ESConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.es.es.DEFAULT_CONFIG", - new="ray.rllib.algorithms.es.es.ESConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/es/es_torch_policy.py b/rllib/algorithms/es/es_torch_policy.py index 12b61f7f8af20..4028702e0ef2e 100644 --- a/rllib/algorithms/es/es_torch_policy.py +++ b/rllib/algorithms/es/es_torch_policy.py @@ -125,7 +125,7 @@ def make_model_and_action_dist(policy, observation_space, action_space, config): name="ESTorchPolicy", framework="torch", loss_fn=None, - get_default_config=lambda: ray.rllib.algorithms.es.es.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.es.es.ESConfig(), before_init=before_init, after_init=after_init, make_model_and_action_dist=make_model_and_action_dist, diff --git a/rllib/algorithms/impala/__init__.py b/rllib/algorithms/impala/__init__.py index 626022747bf0d..408d069c6ac50 100644 --- a/rllib/algorithms/impala/__init__.py +++ b/rllib/algorithms/impala/__init__.py @@ -1,4 +1,4 @@ -from ray.rllib.algorithms.impala.impala import Impala, ImpalaConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.impala.impala import Impala, ImpalaConfig from ray.rllib.algorithms.impala.impala_tf_policy import ( ImpalaTF1Policy, ImpalaTF2Policy, @@ -11,5 +11,4 @@ "ImpalaTF1Policy", "ImpalaTF2Policy", "ImpalaTorchPolicy", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/impala/impala.py b/rllib/algorithms/impala/impala.py index c8d95bb931411..006a6f7268071 100644 --- a/rllib/algorithms/impala/impala.py +++ b/rllib/algorithms/impala/impala.py @@ -36,7 +36,6 @@ from ray.rllib.utils.metrics import ALL_MODULES from ray.rllib.utils.deprecation import ( DEPRECATED_VALUE, - Deprecated, deprecation_warning, ) from ray.rllib.utils.metrics import ( @@ -1159,20 +1158,3 @@ def process_episodes(self, batch: SampleBatchType) -> SampleBatchType: def get_host(self) -> str: return platform.node() - - -# Deprecated: Use ray.rllib.algorithms.impala.ImpalaConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(ImpalaConfig().to_dict()) - - @Deprecated( - old="ray.rllib.agents.impala.impala::DEFAULT_CONFIG", - new="ray.rllib.algorithms.impala.impala::IMPALAConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/maddpg/__init__.py b/rllib/algorithms/maddpg/__init__.py index c6636817f43f1..1722f775f1efe 100644 --- a/rllib/algorithms/maddpg/__init__.py +++ b/rllib/algorithms/maddpg/__init__.py @@ -1,7 +1,6 @@ from ray.rllib.algorithms.maddpg.maddpg import ( MADDPG, MADDPGConfig, - DEFAULT_CONFIG, ) -__all__ = ["MADDPGConfig", "MADDPG", "DEFAULT_CONFIG"] +__all__ = ["MADDPGConfig", "MADDPG"] diff --git a/rllib/algorithms/maddpg/maddpg.py b/rllib/algorithms/maddpg/maddpg.py index 6ed5f4c25efd2..d9cc96fa88e3e 100644 --- a/rllib/algorithms/maddpg/maddpg.py +++ b/rllib/algorithms/maddpg/maddpg.py @@ -17,7 +17,7 @@ from ray.rllib.algorithms.maddpg.maddpg_tf_policy import MADDPGTFPolicy from ray.rllib.policy.policy import Policy from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch -from ray.rllib.utils.annotations import Deprecated, override +from ray.rllib.utils.annotations import override from ray.rllib.utils.deprecation import DEPRECATED_VALUE logger = logging.getLogger(__name__) @@ -310,20 +310,3 @@ def get_default_policy_class( cls, config: AlgorithmConfig ) -> Optional[Type[Policy]]: return MADDPGTFPolicy - - -# Deprecated: Use ray.rllib.algorithms.maddpg.MADDPG instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(MADDPGConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.maddpg.maddpg.DEFAULT_CONFIG", - new="ray.rllib.algorithms.maddpg.maddpg.MADDPGConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/maml/__init__.py b/rllib/algorithms/maml/__init__.py index e7c844068f4aa..0fb24a5499e57 100644 --- a/rllib/algorithms/maml/__init__.py +++ b/rllib/algorithms/maml/__init__.py @@ -1,7 +1,6 @@ -from ray.rllib.algorithms.maml.maml import MAML, MAMLConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.maml.maml import MAML, MAMLConfig __all__ = [ "MAML", "MAMLConfig", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/maml/maml.py b/rllib/algorithms/maml/maml.py index 0c871f0f68e0c..fc9488e72d03d 100644 --- a/rllib/algorithms/maml/maml.py +++ b/rllib/algorithms/maml/maml.py @@ -20,7 +20,7 @@ from ray.rllib.execution.metric_ops import CollectMetrics from ray.rllib.evaluation.metrics import collect_metrics from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated, DEPRECATED_VALUE +from ray.rllib.utils.deprecation import DEPRECATED_VALUE from ray.rllib.utils.metrics.learner_info import LEARNER_INFO from ray.rllib.utils.sgd import standardized from ray.util.iter import from_actors, LocalIterator @@ -378,20 +378,3 @@ def inner_adaptation_steps(itr): ) ) return train_op - - -# Deprecated: Use ray.rllib.algorithms.qmix.qmix.QMixConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(MAMLConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.maml.maml.DEFAULT_CONFIG", - new="ray.rllib.algorithms.maml.maml.MAMLConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/maml/maml_torch_policy.py b/rllib/algorithms/maml/maml_torch_policy.py index 5eb68f0c5e035..0285347624029 100644 --- a/rllib/algorithms/maml/maml_torch_policy.py +++ b/rllib/algorithms/maml/maml_torch_policy.py @@ -300,7 +300,7 @@ class MAMLTorchPolicy(ValueNetworkMixin, KLCoeffMixin, TorchPolicyV2): """PyTorch policy class used with MAML.""" def __init__(self, observation_space, action_space, config): - config = dict(ray.rllib.algorithms.maml.maml.DEFAULT_CONFIG, **config) + config = dict(ray.rllib.algorithms.maml.maml.MAMLConfig(), **config) validate_config(config) TorchPolicyV2.__init__( diff --git a/rllib/algorithms/marwil/__init__.py b/rllib/algorithms/marwil/__init__.py index 02fe7b01d09b8..7a6e5d6b07200 100644 --- a/rllib/algorithms/marwil/__init__.py +++ b/rllib/algorithms/marwil/__init__.py @@ -1,5 +1,4 @@ from ray.rllib.algorithms.marwil.marwil import ( - DEFAULT_CONFIG, MARWIL, MARWILConfig, ) @@ -15,6 +14,4 @@ "MARWILTF1Policy", "MARWILTF2Policy", "MARWILTorchPolicy", - # Deprecated. - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/marwil/marwil.py b/rllib/algorithms/marwil/marwil.py index 74c222abafb62..d46ee82b5bff0 100644 --- a/rllib/algorithms/marwil/marwil.py +++ b/rllib/algorithms/marwil/marwil.py @@ -11,7 +11,7 @@ ) from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated, deprecation_warning +from ray.rllib.utils.deprecation import deprecation_warning from ray.rllib.utils.metrics import ( NUM_AGENT_STEPS_SAMPLED, NUM_ENV_STEPS_SAMPLED, @@ -266,20 +266,3 @@ def training_step(self) -> ResultDict: self.workers.local_worker().set_global_vars(global_vars) return train_results - - -# Deprecated: Use ray.rllib.algorithms.marwil.MARWILConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(MARWILConfig().to_dict()) - - @Deprecated( - old="ray.rllib.agents.marwil.marwil::DEFAULT_CONFIG", - new="ray.rllib.algorithms.marwil.marwil::MARWILConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/mbmpo/__init__.py b/rllib/algorithms/mbmpo/__init__.py index 90eb1c43a9055..16401cbf8364b 100644 --- a/rllib/algorithms/mbmpo/__init__.py +++ b/rllib/algorithms/mbmpo/__init__.py @@ -1,7 +1,6 @@ -from ray.rllib.algorithms.mbmpo.mbmpo import MBMPO, MBMPOConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.mbmpo.mbmpo import MBMPO, MBMPOConfig __all__ = [ "MBMPO", "MBMPOConfig", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/mbmpo/mbmpo.py b/rllib/algorithms/mbmpo/mbmpo.py index cf03cebdce88b..cc35b5703d366 100644 --- a/rllib/algorithms/mbmpo/mbmpo.py +++ b/rllib/algorithms/mbmpo/mbmpo.py @@ -29,7 +29,7 @@ concat_samples, convert_ma_batch_to_sample_batch, ) -from ray.rllib.utils.annotations import Deprecated, override +from ray.rllib.utils.annotations import override from ray.rllib.utils.deprecation import DEPRECATED_VALUE from ray.rllib.utils.metrics.learner_info import LEARNER_INFO from ray.rllib.utils.sgd import standardized @@ -598,20 +598,3 @@ def validate_env(env: EnvType, env_context: EnvContext) -> None: f"Env {env} doest not have a `reward()` method, needed for " "MB-MPO! This `reward()` method should return " ) - - -# Deprecated: Use ray.rllib.algorithms.mbmpo.MBMPOConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(MBMPOConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.mbmpo.mbmpo.DEFAULT_CONFIG", - new="ray.rllib.algorithms.mbmpo.mbmpo.MBMPOConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/pg/__init__.py b/rllib/algorithms/pg/__init__.py index 945c492c8168d..19115c7becdad 100644 --- a/rllib/algorithms/pg/__init__.py +++ b/rllib/algorithms/pg/__init__.py @@ -1,11 +1,10 @@ -from ray.rllib.algorithms.pg.pg import PG, PGConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.pg.pg import PG, PGConfig from ray.rllib.algorithms.pg.pg_tf_policy import PGTF1Policy, PGTF2Policy from ray.rllib.algorithms.pg.pg_torch_policy import PGTorchPolicy from ray.rllib.algorithms.pg.utils import post_process_advantages __all__ = [ - "DEFAULT_CONFIG", "post_process_advantages", "PG", "PGConfig", @@ -13,5 +12,4 @@ "PGTF2Policy", "PGTorchPolicy", "post_process_advantages", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/pg/pg.py b/rllib/algorithms/pg/pg.py index dab6c3d38463e..508c1ceb51e4c 100644 --- a/rllib/algorithms/pg/pg.py +++ b/rllib/algorithms/pg/pg.py @@ -4,7 +4,6 @@ from ray.rllib.algorithms.algorithm_config import AlgorithmConfig, NotProvided from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated class PGConfig(AlgorithmConfig): @@ -132,20 +131,3 @@ def get_default_policy_class( from ray.rllib.algorithms.pg.pg_tf_policy import PGTF2Policy return PGTF2Policy - - -# Deprecated: Use ray.rllib.algorithms.pg.PGConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(PGConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.pg.default_config::DEFAULT_CONFIG", - new="ray.rllib.algorithms.pg.pg::PGConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/ppo/__init__.py b/rllib/algorithms/ppo/__init__.py index c592b38f782dc..a54946f41ebc0 100644 --- a/rllib/algorithms/ppo/__init__.py +++ b/rllib/algorithms/ppo/__init__.py @@ -1,4 +1,4 @@ -from ray.rllib.algorithms.ppo.ppo import PPOConfig, PPO, DEFAULT_CONFIG +from ray.rllib.algorithms.ppo.ppo import PPOConfig, PPO from ray.rllib.algorithms.ppo.ppo_tf_policy import PPOTF1Policy, PPOTF2Policy from ray.rllib.algorithms.ppo.ppo_torch_policy import PPOTorchPolicy @@ -8,5 +8,4 @@ "PPOTF2Policy", "PPOTorchPolicy", "PPO", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/ppo/ppo.py b/rllib/algorithms/ppo/ppo.py index 0d59f3d8d40ec..18a06a5d660b6 100644 --- a/rllib/algorithms/ppo/ppo.py +++ b/rllib/algorithms/ppo/ppo.py @@ -30,7 +30,6 @@ from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override from ray.rllib.utils.deprecation import ( - Deprecated, DEPRECATED_VALUE, deprecation_warning, ) @@ -215,7 +214,7 @@ def training( """ if vf_share_layers != DEPRECATED_VALUE: deprecation_warning( - old="ppo.DEFAULT_CONFIG['vf_share_layers']", + old="PPOConfig().vf_share_layers", new="PPOConfig().training(model={'vf_share_layers': ...})", error=True, ) @@ -517,20 +516,3 @@ def training_step(self) -> ResultDict: self.workers.local_worker().set_global_vars(global_vars) return train_results - - -# Deprecated: Use ray.rllib.algorithms.ppo.PPOConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(PPOConfig().to_dict()) - - @Deprecated( - old="ray.rllib.agents.ppo.ppo::DEFAULT_CONFIG", - new="ray.rllib.algorithms.ppo.ppo::PPOConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/qmix/__init__.py b/rllib/algorithms/qmix/__init__.py index 77033456c1222..9781e470668eb 100644 --- a/rllib/algorithms/qmix/__init__.py +++ b/rllib/algorithms/qmix/__init__.py @@ -1,3 +1,3 @@ -from ray.rllib.algorithms.qmix.qmix import QMix, QMixConfig, DEFAULT_CONFIG +from ray.rllib.algorithms.qmix.qmix import QMix, QMixConfig -__all__ = ["QMix", "QMixConfig", "DEFAULT_CONFIG"] +__all__ = ["QMix", "QMixConfig"] diff --git a/rllib/algorithms/qmix/qmix.py b/rllib/algorithms/qmix/qmix.py index 79f02332b8ce5..5c00a6a4ac9d6 100644 --- a/rllib/algorithms/qmix/qmix.py +++ b/rllib/algorithms/qmix/qmix.py @@ -13,7 +13,6 @@ ) from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.metrics import ( LAST_TARGET_UPDATE_TS, NUM_AGENT_STEPS_SAMPLED, @@ -321,20 +320,3 @@ def training_step(self) -> ResultDict: # Return all collected metrics for the iteration. return train_results - - -# Deprecated: Use ray.rllib.algorithms.qmix.qmix.QMixConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(QMixConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.qmix.qmix.DEFAULT_CONFIG", - new="ray.rllib.algorithms.qmix.qmix.QMixConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/sac/rnnsac.py b/rllib/algorithms/sac/rnnsac.py index 0704a7da2b1a5..176e389f4aef3 100644 --- a/rllib/algorithms/sac/rnnsac.py +++ b/rllib/algorithms/sac/rnnsac.py @@ -8,7 +8,7 @@ from ray.rllib.algorithms.sac.rnnsac_torch_policy import RNNSACTorchPolicy from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import DEPRECATED_VALUE, Deprecated +from ray.rllib.utils.deprecation import DEPRECATED_VALUE class RNNSACConfig(SACConfig): @@ -124,19 +124,3 @@ def get_default_policy_class( cls, config: AlgorithmConfig ) -> Optional[Type[Policy]]: return RNNSACTorchPolicy - - -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(RNNSACConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.sac.rnnsac.DEFAULT_CONFIG", - new="ray.rllib.algorithms.sac.rnnsac.RNNSACConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/sac/rnnsac_torch_policy.py b/rllib/algorithms/sac/rnnsac_torch_policy.py index 32a562e8be61b..085c287594cbc 100644 --- a/rllib/algorithms/sac/rnnsac_torch_policy.py +++ b/rllib/algorithms/sac/rnnsac_torch_policy.py @@ -478,7 +478,7 @@ def reduce_mean_valid(t): RNNSACTorchPolicy = SACTorchPolicy.with_updates( name="RNNSACPolicy", - get_default_config=lambda: ray.rllib.algorithms.sac.rnnsac.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.sac.rnnsac.RNNSACConfig(), action_distribution_fn=action_distribution_fn, make_model_and_action_dist=build_sac_model_and_action_dist, loss_fn=actor_critic_loss, diff --git a/rllib/algorithms/sac/sac.py b/rllib/algorithms/sac/sac.py index b26bc26e0698a..f5939edcce157 100644 --- a/rllib/algorithms/sac/sac.py +++ b/rllib/algorithms/sac/sac.py @@ -10,7 +10,6 @@ from ray.rllib.utils.deprecation import ( DEPRECATED_VALUE, deprecation_warning, - Deprecated, ) from ray.rllib.utils.framework import try_import_tf, try_import_tfp @@ -359,20 +358,3 @@ def get_default_policy_class( return SACTorchPolicy else: return SACTFPolicy - - -# Deprecated: Use ray.rllib.algorithms.sac.SACConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(SACConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.sac.sac::DEFAULT_CONFIG", - new="ray.rllib.algorithms.sac.sac::SACConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/sac/sac_tf_policy.py b/rllib/algorithms/sac/sac_tf_policy.py index 32c9ec805d676..a2a72cd96f563 100644 --- a/rllib/algorithms/sac/sac_tf_policy.py +++ b/rllib/algorithms/sac/sac_tf_policy.py @@ -777,7 +777,7 @@ def validate_spaces( # above. SACTFPolicy = build_tf_policy( name="SACTFPolicy", - get_default_config=lambda: ray.rllib.algorithms.sac.sac.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.sac.sac.SACConfig(), make_model=build_sac_model, postprocess_fn=postprocess_trajectory, action_distribution_fn=get_distribution_inputs_and_class, diff --git a/rllib/algorithms/sac/sac_torch_policy.py b/rllib/algorithms/sac/sac_torch_policy.py index 4bb56d2825d40..aa79c7b7bd50b 100644 --- a/rllib/algorithms/sac/sac_torch_policy.py +++ b/rllib/algorithms/sac/sac_torch_policy.py @@ -503,7 +503,7 @@ def setup_late_mixins( name="SACTorchPolicy", framework="torch", loss_fn=actor_critic_loss, - get_default_config=lambda: ray.rllib.algorithms.sac.sac.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.sac.sac.SACConfig(), stats_fn=stats, postprocess_fn=postprocess_trajectory, extra_grad_process_fn=apply_grad_clipping, diff --git a/rllib/algorithms/simple_q/__init__.py b/rllib/algorithms/simple_q/__init__.py index 9ea347708547e..5ff44c34450ca 100644 --- a/rllib/algorithms/simple_q/__init__.py +++ b/rllib/algorithms/simple_q/__init__.py @@ -1,5 +1,4 @@ from ray.rllib.algorithms.simple_q.simple_q import ( - DEFAULT_CONFIG, SimpleQ, SimpleQConfig, ) @@ -15,5 +14,4 @@ "SimpleQTF1Policy", "SimpleQTF2Policy", "SimpleQTorchPolicy", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/simple_q/simple_q.py b/rllib/algorithms/simple_q/simple_q.py index 38ec3ba48c8be..2f9a8e60bd126 100644 --- a/rllib/algorithms/simple_q/simple_q.py +++ b/rllib/algorithms/simple_q/simple_q.py @@ -24,7 +24,7 @@ from ray.rllib.policy.policy import Policy from ray.rllib.utils import deep_update from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import DEPRECATED_VALUE, Deprecated +from ray.rllib.utils.deprecation import DEPRECATED_VALUE from ray.rllib.utils.metrics import ( LAST_TARGET_UPDATE_TS, NUM_AGENT_STEPS_SAMPLED, @@ -379,20 +379,3 @@ def training_step(self) -> ResultDict: # Return all collected metrics for the iteration. return train_results - - -# Deprecated: Use ray.rllib.algorithms.simple_q.simple_q.SimpleQConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(SimpleQConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.dqn.simple_q::DEFAULT_CONFIG", - new="ray.rllib.algorithms.simple_q.simple_q::SimpleQConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/slateq/__init__.py b/rllib/algorithms/slateq/__init__.py index 10733353c83dc..203fb486bcfe2 100644 --- a/rllib/algorithms/slateq/__init__.py +++ b/rllib/algorithms/slateq/__init__.py @@ -1,7 +1,6 @@ from ray.rllib.algorithms.slateq.slateq import ( SlateQ, SlateQConfig, - DEFAULT_CONFIG, ) from ray.rllib.algorithms.slateq.slateq_tf_policy import SlateQTFPolicy from ray.rllib.algorithms.slateq.slateq_torch_policy import SlateQTorchPolicy @@ -11,5 +10,4 @@ "SlateQConfig", "SlateQTFPolicy", "SlateQTorchPolicy", - "DEFAULT_CONFIG", ] diff --git a/rllib/algorithms/slateq/slateq.py b/rllib/algorithms/slateq/slateq.py index a068c85634e18..c912f98c4fcc5 100644 --- a/rllib/algorithms/slateq/slateq.py +++ b/rllib/algorithms/slateq/slateq.py @@ -21,7 +21,7 @@ from ray.rllib.algorithms.slateq.slateq_torch_policy import SlateQTorchPolicy from ray.rllib.policy.policy import Policy from ray.rllib.utils.annotations import override -from ray.rllib.utils.deprecation import Deprecated, DEPRECATED_VALUE +from ray.rllib.utils.deprecation import DEPRECATED_VALUE logger = logging.getLogger(__name__) @@ -241,20 +241,3 @@ def get_default_policy_class( return SlateQTorchPolicy else: return SlateQTFPolicy - - -# Deprecated: Use ray.rllib.algorithms.slateq.SlateQConfig instead! -class _deprecated_default_config(dict): - def __init__(self): - super().__init__(SlateQConfig().to_dict()) - - @Deprecated( - old="ray.rllib.algorithms.slateq.slateq::DEFAULT_CONFIG", - new="ray.rllib.algorithms.slateq.slateq::SlateQConfig(...)", - error=True, - ) - def __getitem__(self, item): - return super().__getitem__(item) - - -DEFAULT_CONFIG = _deprecated_default_config() diff --git a/rllib/algorithms/slateq/slateq_tf_policy.py b/rllib/algorithms/slateq/slateq_tf_policy.py index 91a8763203086..c6145bced515a 100644 --- a/rllib/algorithms/slateq/slateq_tf_policy.py +++ b/rllib/algorithms/slateq/slateq_tf_policy.py @@ -364,7 +364,7 @@ def rmsprop_optimizer( SlateQTFPolicy = build_tf_policy( name="SlateQTFPolicy", - get_default_config=lambda: ray.rllib.algorithms.slateq.slateq.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.slateq.slateq.SlateQConfig(), # Build model, loss functions, and optimizers make_model=build_slateq_model, loss_fn=build_slateq_losses, diff --git a/rllib/algorithms/slateq/slateq_torch_policy.py b/rllib/algorithms/slateq/slateq_torch_policy.py index f46ea2c86c229..ea21d062d961e 100644 --- a/rllib/algorithms/slateq/slateq_torch_policy.py +++ b/rllib/algorithms/slateq/slateq_torch_policy.py @@ -421,7 +421,7 @@ def setup_late_mixins( SlateQTorchPolicy = build_policy_class( name="SlateQTorchPolicy", framework="torch", - get_default_config=lambda: ray.rllib.algorithms.slateq.slateq.DEFAULT_CONFIG, + get_default_config=lambda: ray.rllib.algorithms.slateq.slateq.SlateQConfig(), before_init=setup_early, after_init=setup_late_mixins, loss_fn=build_slateq_losses, diff --git a/rllib/tests/test_local.py b/rllib/tests/test_local.py index ec884eba20afa..2cd97459d702d 100644 --- a/rllib/tests/test_local.py +++ b/rllib/tests/test_local.py @@ -1,7 +1,7 @@ import unittest import ray -from ray.rllib.algorithms.pg import PG, DEFAULT_CONFIG +from ray.rllib.algorithms.pg import PG, PGConfig from ray.rllib.utils.test_utils import framework_iterator @@ -13,7 +13,7 @@ def tearDown(self) -> None: ray.shutdown() def test_local(self): - cf = DEFAULT_CONFIG.copy() + cf = PGConfig() cf["model"]["fcnet_hiddens"] = [10] cf["num_workers"] = 2 diff --git a/rllib/tests/test_nested_action_spaces.py b/rllib/tests/test_nested_action_spaces.py index 054416f533eba..4ebaaf3b7a875 100644 --- a/rllib/tests/test_nested_action_spaces.py +++ b/rllib/tests/test_nested_action_spaces.py @@ -7,7 +7,7 @@ import ray from ray.rllib.algorithms.bc import BC -from ray.rllib.algorithms.pg import PG, DEFAULT_CONFIG +from ray.rllib.algorithms.pg import PG, PGConfig from ray.rllib.examples.env.random_env import RandomEnv from ray.rllib.offline.json_reader import JsonReader from ray.rllib.policy.sample_batch import convert_ma_batch_to_sample_batch @@ -59,7 +59,7 @@ def tearDownClass(cls): ray.shutdown() def test_nested_action_spaces(self): - config = DEFAULT_CONFIG.copy() + config = PGConfig() config["env"] = RandomEnv # Write output to check, whether actions are written correctly. tmp_dir = os.popen("mktemp -d").read()[:-1] diff --git a/rllib/tests/test_placement_groups.py b/rllib/tests/test_placement_groups.py index 63e52cdef1685..d50f2aa93234f 100644 --- a/rllib/tests/test_placement_groups.py +++ b/rllib/tests/test_placement_groups.py @@ -5,7 +5,7 @@ from ray import air from ray import tune from ray.tune import Callback -from ray.rllib.algorithms.pg import PG, DEFAULT_CONFIG +from ray.rllib.algorithms.pg import PG, PGConfig from ray.tune.experiment import Trial from ray.tune.execution.placement_groups import PlacementGroupFactory @@ -32,7 +32,7 @@ def tearDown(self) -> None: ray.shutdown() def test_overriding_default_resource_request(self): - config = DEFAULT_CONFIG.copy() + config = PGConfig() config["model"]["fcnet_hiddens"] = [10] config["num_workers"] = 2 # 3 Trials: Can only run 2 at a time (num_cpus=6; needed: 3). @@ -66,7 +66,7 @@ def default_resource_request(cls, config): ).fit() def test_default_resource_request(self): - config = DEFAULT_CONFIG.copy() + config = PGConfig() config["model"]["fcnet_hiddens"] = [10] config["num_workers"] = 2 config["num_cpus_per_worker"] = 2 @@ -88,7 +88,7 @@ def test_default_resource_request(self): ).fit() def test_default_resource_request_plus_manual_leads_to_error(self): - config = DEFAULT_CONFIG.copy() + config = PGConfig() config["model"]["fcnet_hiddens"] = [10] config["num_workers"] = 0 config["env"] = "CartPole-v1" From bcd62377905b9b617ae6c79253bfae49dccd0107 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Mon, 13 Mar 2023 10:12:27 -0700 Subject: [PATCH 05/11] Fix test_multi_agent_env missing config and multi_agent_two_trainers missing configs Signed-off-by: Artur Niederfahrenhorst --- rllib/env/tests/test_multi_agent_env.py | 3 +- rllib/examples/multi_agent_two_trainers.py | 74 ++++++++++++---------- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/rllib/env/tests/test_multi_agent_env.py b/rllib/env/tests/test_multi_agent_env.py index 85a6b6051bcaa..23c66160a2006 100644 --- a/rllib/env/tests/test_multi_agent_env.py +++ b/rllib/env/tests/test_multi_agent_env.py @@ -7,6 +7,7 @@ import ray from ray.tune.registry import register_env from ray.rllib.algorithms.algorithm_config import AlgorithmConfig +from ray.rllib.algorithms.dqn.dqn import DQNConfig from ray.rllib.algorithms.dqn.dqn_tf_policy import DQNTFPolicy from ray.rllib.algorithms.pg import PGConfig from ray.rllib.algorithms.ppo import PPOConfig @@ -447,7 +448,7 @@ def compute_actions_from_input_dict( ev = RolloutWorker( env_creator=lambda _: MultiAgentCartPole({"num_agents": 2}), default_policy_class=ModelBasedPolicy, - config=AlgorithmConfig() + config=DQNConfig() .rollouts( rollout_fragment_length=5, num_rollout_workers=0, diff --git a/rllib/examples/multi_agent_two_trainers.py b/rllib/examples/multi_agent_two_trainers.py index ee4d28f3078e6..5b1f060468fa2 100644 --- a/rllib/examples/multi_agent_two_trainers.py +++ b/rllib/examples/multi_agent_two_trainers.py @@ -9,6 +9,7 @@ """ import argparse + import gymnasium as gym import os @@ -77,29 +78,7 @@ def select_policy(algorithm, framework): else: raise ValueError("Unknown algorithm: ", algorithm) - # You can also have multiple policies per algorithm, but here we just - # show one each for PPO and DQN. - policies = { - "ppo_policy": ( - select_policy("PPO", args.framework), - obs_space, - act_space, - {}, - ), - "dqn_policy": ( - select_policy("DQN", args.framework), - obs_space, - act_space, - {}, - ), - } - - def policy_mapping_fn(agent_id, episode, worker, **kwargs): - if agent_id % 2 == 0: - return "ppo_policy" - else: - return "dqn_policy" - + # Construct two independent Algorithm configs ppo_config = ( PPOConfig() .environment("multi_agent_cartpole") @@ -112,15 +91,9 @@ def policy_mapping_fn(agent_id, episode, worker, **kwargs): vf_loss_coeff=0.01, num_sgd_iter=6, ) - .multi_agent( - policies=policies, - policy_mapping_fn=policy_mapping_fn, - policies_to_train=["ppo_policy"], - ) # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) ) - ppo = ppo_config.build() dqn_config = ( DQNConfig() @@ -134,14 +107,47 @@ def policy_mapping_fn(agent_id, episode, worker, **kwargs): n_step=3, gamma=0.95, ) - .multi_agent( - policies=policies, - policy_mapping_fn=policy_mapping_fn, - policies_to_train=["dqn_policy"], - ) # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) ) + + # Specify two policies, each with their own config created above + # You can also have multiple policies per algorithm, but here we just + # show one each for PPO and DQN. + policies = { + "ppo_policy": ( + select_policy("PPO", args.framework), + obs_space, + act_space, + ppo_config, + ), + "dqn_policy": ( + select_policy("DQN", args.framework), + obs_space, + act_space, + dqn_config, + ), + } + + def policy_mapping_fn(agent_id, episode, worker, **kwargs): + if agent_id % 2 == 0: + return "ppo_policy" + else: + return "dqn_policy" + + # Add multi-agent configuration options to both configs and build them. + ppo_config.multi_agent( + policies=policies, + policy_mapping_fn=policy_mapping_fn, + policies_to_train=["ppo_policy"], + ) + ppo = ppo_config.build() + + dqn_config.multi_agent( + policies=policies, + policy_mapping_fn=policy_mapping_fn, + policies_to_train=["dqn_policy"], + ) dqn = dqn_config.build() # You should see both the printed X and Y approach 200 as this trains: From fb3034b4029758f1d7da86dd84016cbbf51c570e Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Mon, 13 Mar 2023 12:59:47 -0700 Subject: [PATCH 06/11] get _enable_rl_module_api for backward compatibility Signed-off-by: Artur Niederfahrenhorst --- rllib/evaluation/postprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rllib/evaluation/postprocessing.py b/rllib/evaluation/postprocessing.py index 273a89a62e2cb..a3d801f334f33 100644 --- a/rllib/evaluation/postprocessing.py +++ b/rllib/evaluation/postprocessing.py @@ -188,7 +188,7 @@ def compute_gae_for_sample_batch( policy.model.view_requirements, index="last" ) - if policy.config["_enable_rl_module_api"]: + if policy.config.get("_enable_rl_module_api"): # Note: During sampling you are using the parameters at the beginning of # the sampling process. If I'll be using this advantages during training # should it not be the latest parameters during training for this to be From 2fa6ccf2c4cf0ed78032a17a269dec18447ad960 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Mon, 13 Mar 2023 13:17:22 -0700 Subject: [PATCH 07/11] Fix test_nested_action_spaces ad remove V1 backward compatibility test Signed-off-by: Artur Niederfahrenhorst --- .../backward_compat/test_backward_compat.py | 28 ------------------- rllib/tests/test_nested_action_spaces.py | 8 +++--- 2 files changed, 4 insertions(+), 32 deletions(-) diff --git a/rllib/tests/backward_compat/test_backward_compat.py b/rllib/tests/backward_compat/test_backward_compat.py index 77f421af106c2..c5e2d19a46aeb 100644 --- a/rllib/tests/backward_compat/test_backward_compat.py +++ b/rllib/tests/backward_compat/test_backward_compat.py @@ -81,34 +81,6 @@ def test_old_checkpoint_formats(self): print(algo.train()) algo.stop() - def test_v1_policy_from_checkpoint(self): - """Tests, whether we can load Policy checkpoints for different frameworks.""" - - # We wouldn't need this test once we get rid of V1 policy implementations. - - rllib_dir = Path(__file__).parent.parent.parent - print(f"rllib dir={rllib_dir} exists={os.path.isdir(rllib_dir)}") - - for fw in framework_iterator(with_eager_tracing=True): - path_to_checkpoint = os.path.join( - rllib_dir, - "tests", - "backward_compat", - "checkpoints", - "v1.0", - "dqn_frozenlake_" + fw, - "policies", - "default_policy", - ) - - print( - f"path_to_checkpoint={path_to_checkpoint} " - f"exists={os.path.isdir(path_to_checkpoint)}" - ) - - policy = Policy.from_checkpoint(path_to_checkpoint) - self.assertTrue(isinstance(policy, Policy)) - def test_old_algorithm_config_dicts(self): """Tests, whether we can build Algorithm objects with old config dicts.""" diff --git a/rllib/tests/test_nested_action_spaces.py b/rllib/tests/test_nested_action_spaces.py index 4ebaaf3b7a875..221fd06256f93 100644 --- a/rllib/tests/test_nested_action_spaces.py +++ b/rllib/tests/test_nested_action_spaces.py @@ -7,7 +7,7 @@ import ray from ray.rllib.algorithms.bc import BC -from ray.rllib.algorithms.pg import PG, PGConfig +from ray.rllib.algorithms.pg import PGConfig from ray.rllib.examples.env.random_env import RandomEnv from ray.rllib.offline.json_reader import JsonReader from ray.rllib.policy.sample_batch import convert_ma_batch_to_sample_batch @@ -76,7 +76,7 @@ def test_nested_action_spaces(self): config["actions_in_input_normalized"] = True # Remove lr schedule from config, not needed here, and not supported by BC. - del config["lr_schedule"] + del config.lr_schedule for _ in framework_iterator(config): for name, action_space in SPACES.items(): config["env_config"] = { @@ -86,7 +86,7 @@ def test_nested_action_spaces(self): print(f"A={action_space} flatten={flatten}") shutil.rmtree(config["output"]) config["_disable_action_flattening"] = not flatten - pg = PG(config) + pg = config.build() pg.train() pg.stop() @@ -117,7 +117,7 @@ def test_nested_action_spaces(self): ioctx.config["input_config"]["paths"], ioctx ) config["input_config"] = {"paths": config["output"]} - del config["output"] + del config.output bc = BC(config=config) bc.train() bc.stop() From 4d1e82483f33c06581f163628cbbdd0cc6a0a5a7 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Mon, 13 Mar 2023 14:34:55 -0700 Subject: [PATCH 08/11] Fix a couple of tests using old config dicts Signed-off-by: Artur Niederfahrenhorst --- rllib/tests/test_gpus.py | 27 ++++++++--------- rllib/tests/test_local.py | 14 ++++----- rllib/tests/test_placement_groups.py | 45 +++++++++++++++++----------- 3 files changed, 46 insertions(+), 40 deletions(-) diff --git a/rllib/tests/test_gpus.py b/rllib/tests/test_gpus.py index e77c8cfdd9bd0..177d1e4f57ab4 100644 --- a/rllib/tests/test_gpus.py +++ b/rllib/tests/test_gpus.py @@ -18,9 +18,7 @@ def test_gpus_in_non_local_mode(self): actual_gpus = torch.cuda.device_count() print(f"Actual GPUs found (by torch): {actual_gpus}") - config = A2CConfig() - config["num_workers"] = 2 - config["env"] = "CartPole-v1" + config = A2CConfig().rollouts(num_rollout_workers=2).environment("CartPole-v1") # Expect errors when we run a config w/ num_gpus>0 w/o a GPU # and _fake_gpus=False. @@ -32,9 +30,11 @@ def test_gpus_in_non_local_mode(self): ) for num_gpus_per_worker in per_worker: for fake_gpus in [False] + ([] if num_gpus == 0 else [True]): - config["num_gpus"] = num_gpus - config["num_gpus_per_worker"] = num_gpus_per_worker - config["_fake_gpus"] = fake_gpus + config.resources( + num_gpus=num_gpus, + num_gpus_per_worker=num_gpus_per_worker, + _fake_gpus=fake_gpus, + ) print( f"\n------------\nnum_gpus={num_gpus} " @@ -65,8 +65,8 @@ def test_gpus_in_non_local_mode(self): # expect no error. else: print("direct RLlib") - trainer = A2C(config, env="CartPole-v1") - trainer.stop() + algo = config.build() + algo.stop() # Cannot run through ray.tune.Tuner().fit() w/ fake GPUs # as it would simply wait infinitely for the # resources to become available (even though, we @@ -88,22 +88,19 @@ def test_gpus_in_local_mode(self): actual_gpus_available = torch.cuda.device_count() - config = A2CConfig() - config["num_workers"] = 2 - config["env"] = "CartPole-v1" + config = A2CConfig().rollouts(num_rollout_workers=2).environment("CartPole-v1") # Expect no errors in local mode. for num_gpus in [0, 0.1, 1, actual_gpus_available + 4]: print(f"num_gpus={num_gpus}") for fake_gpus in [False, True]: print(f"_fake_gpus={fake_gpus}") - config["num_gpus"] = num_gpus - config["_fake_gpus"] = fake_gpus + config.resources(num_gpus=num_gpus, _fake_gpus=fake_gpus) frameworks = ("tf", "torch") if num_gpus > 1 else ("tf2", "tf", "torch") for _ in framework_iterator(config, frameworks=frameworks): print("direct RLlib") - trainer = A2C(config, env="CartPole-v1") - trainer.stop() + algo = config.build() + algo.stop() print("via ray.tune.Tuner().fit()") tune.Tuner( "A2C", diff --git a/rllib/tests/test_local.py b/rllib/tests/test_local.py index 2cd97459d702d..0d35cb06eff9d 100644 --- a/rllib/tests/test_local.py +++ b/rllib/tests/test_local.py @@ -1,7 +1,7 @@ import unittest import ray -from ray.rllib.algorithms.pg import PG, PGConfig +from ray.rllib.algorithms.pg import PGConfig from ray.rllib.utils.test_utils import framework_iterator @@ -13,14 +13,14 @@ def tearDown(self) -> None: ray.shutdown() def test_local(self): - cf = PGConfig() - cf["model"]["fcnet_hiddens"] = [10] - cf["num_workers"] = 2 + cf = PGConfig().environment("CartPole-v1") + cf.model["fcnet_hiddens"] = [10] + cf.num_rollout_workers = 2 for _ in framework_iterator(cf): - agent = PG(cf, "CartPole-v1") - print(agent.train()) - agent.stop() + algo = cf.build() + print(algo.train()) + algo.stop() if __name__ == "__main__": diff --git a/rllib/tests/test_placement_groups.py b/rllib/tests/test_placement_groups.py index d50f2aa93234f..54ca5e8f97ae0 100644 --- a/rllib/tests/test_placement_groups.py +++ b/rllib/tests/test_placement_groups.py @@ -32,13 +32,16 @@ def tearDown(self) -> None: ray.shutdown() def test_overriding_default_resource_request(self): - config = PGConfig() - config["model"]["fcnet_hiddens"] = [10] - config["num_workers"] = 2 # 3 Trials: Can only run 2 at a time (num_cpus=6; needed: 3). - config["lr"] = tune.grid_search([0.1, 0.01, 0.001]) - config["env"] = "CartPole-v1" - config["framework"] = "tf" + config = ( + PGConfig() + .training( + model={"fcnet_hiddens": [10]}, lr=tune.grid_search([0.1, 0.01, 0.001]) + ) + .environment("CartPole-v1") + .rollouts(num_rollout_workers=2) + .framework("tf") + ) # Create an Algorithm with an overridden default_resource_request # method that returns a PlacementGroupFactory. @@ -66,15 +69,19 @@ def default_resource_request(cls, config): ).fit() def test_default_resource_request(self): - config = PGConfig() - config["model"]["fcnet_hiddens"] = [10] - config["num_workers"] = 2 - config["num_cpus_per_worker"] = 2 + config = ( + PGConfig() + .rollouts( + num_rollout_workers=2, + ) + .training( + model={"fcnet_hiddens": [10]}, lr=tune.grid_search([0.1, 0.01, 0.001]) + ) + .environment("CartPole-v1") + .framework("torch") + .resources(placement_strategy="SPREAD", num_cpus_per_worker=2) + ) # 3 Trials: Can only run 1 at a time (num_cpus=6; needed: 5). - config["lr"] = tune.grid_search([0.1, 0.01, 0.001]) - config["env"] = "CartPole-v1" - config["framework"] = "torch" - config["placement_strategy"] = "SPREAD" tune.Tuner( PG, @@ -88,10 +95,12 @@ def test_default_resource_request(self): ).fit() def test_default_resource_request_plus_manual_leads_to_error(self): - config = PGConfig() - config["model"]["fcnet_hiddens"] = [10] - config["num_workers"] = 0 - config["env"] = "CartPole-v1" + config = ( + PGConfig() + .training(model={"fcnet_hiddens": [10]}) + .environment("CartPole-v1") + .rollouts(num_rollout_workers=0) + ) try: tune.Tuner( From cd0f20d5d5e1bde94f5a87a5a25412cb2be7d2e4 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Mon, 13 Mar 2023 14:39:40 -0700 Subject: [PATCH 09/11] remove local_mode Signed-off-by: Artur Niederfahrenhorst --- rllib/algorithms/ars/tests/test_ars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rllib/algorithms/ars/tests/test_ars.py b/rllib/algorithms/ars/tests/test_ars.py index 22058d79373d3..f2da7c84735bc 100644 --- a/rllib/algorithms/ars/tests/test_ars.py +++ b/rllib/algorithms/ars/tests/test_ars.py @@ -8,7 +8,7 @@ class TestARS(unittest.TestCase): @classmethod def setUpClass(cls): - ray.init(num_cpus=3, local_mode=True) + ray.init(num_cpus=3) @classmethod def tearDownClass(cls): From 977577b2951f8a388f4677ac07257d9124af468b Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Mon, 13 Mar 2023 16:16:54 -0700 Subject: [PATCH 10/11] fix test_gpus Signed-off-by: Artur Niederfahrenhorst --- rllib/tests/test_gpus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rllib/tests/test_gpus.py b/rllib/tests/test_gpus.py index 177d1e4f57ab4..3d01901a5db63 100644 --- a/rllib/tests/test_gpus.py +++ b/rllib/tests/test_gpus.py @@ -2,7 +2,7 @@ import ray from ray import air -from ray.rllib.algorithms.a2c.a2c import A2C, A2CConfig +from ray.rllib.algorithms.a2c.a2c import A2CConfig from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.test_utils import framework_iterator from ray import tune @@ -59,7 +59,7 @@ def test_gpus_in_non_local_mode(self): self.assertRaisesRegex( RuntimeError, "Found 0 GPUs on your machine", - lambda: A2C(config, env="CartPole-v1"), + lambda: config.build(), ) # If actual_gpus >= num_gpus or faked, # expect no error. From f7316427e22c586054c6ed3d6453d9a9382c95c6 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Wed, 15 Mar 2023 13:46:13 -0700 Subject: [PATCH 11/11] Sven's nit Signed-off-by: Artur Niederfahrenhorst --- rllib/tests/test_nested_action_spaces.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rllib/tests/test_nested_action_spaces.py b/rllib/tests/test_nested_action_spaces.py index 221fd06256f93..6943bd5a2bee9 100644 --- a/rllib/tests/test_nested_action_spaces.py +++ b/rllib/tests/test_nested_action_spaces.py @@ -76,7 +76,7 @@ def test_nested_action_spaces(self): config["actions_in_input_normalized"] = True # Remove lr schedule from config, not needed here, and not supported by BC. - del config.lr_schedule + config.lr_schedule = None for _ in framework_iterator(config): for name, action_space in SPACES.items(): config["env_config"] = { @@ -117,7 +117,7 @@ def test_nested_action_spaces(self): ioctx.config["input_config"]["paths"], ioctx ) config["input_config"] = {"paths": config["output"]} - del config.output + config.output = None bc = BC(config=config) bc.train() bc.stop()