From 03ea4f6663fafaf64b8d10ac8db8e962302be561 Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Sun, 10 Nov 2024 17:34:57 +0100 Subject: [PATCH] [RLlib] New API stack: On by default for BC/MARWIL/CQL. (#48599) --- rllib/BUILD | 5 +- rllib/algorithms/algorithm_config.py | 61 ++++++++----------- rllib/algorithms/appo/appo.py | 8 +-- rllib/algorithms/appo/tests/test_appo.py | 4 +- .../bc/tests/test_bc_old_api_stack.py | 4 ++ rllib/algorithms/cql/cql.py | 10 --- ...{test_cql.py => test_cql_old_api_stack.py} | 4 ++ rllib/algorithms/dqn/dqn.py | 21 +++---- rllib/algorithms/impala/impala.py | 8 +-- rllib/algorithms/marwil/marwil.py | 22 +++---- .../marwil/tests/test_marwil_old_api_stack.py | 12 ++++ rllib/algorithms/ppo/ppo.py | 10 +-- rllib/algorithms/sac/sac.py | 26 ++++---- rllib/algorithms/tests/test_algorithm.py | 7 ++- rllib/env/single_agent_env_runner.py | 2 +- rllib/env/tests/test_env_runner_group.py | 54 ++++++++-------- rllib/examples/offline_rl/offline_rl.py | 4 ++ rllib/offline/estimators/tests/utils.py | 4 ++ .../offline/tests/test_feature_importance.py | 10 ++- rllib/utils/torch_utils.py | 12 +--- 20 files changed, 135 insertions(+), 153 deletions(-) rename rllib/algorithms/cql/tests/{test_cql.py => test_cql_old_api_stack.py} (96%) diff --git a/rllib/BUILD b/rllib/BUILD index 71d8ed4b234c..1b281b62ba65 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -948,12 +948,13 @@ py_test( ) # CQL +# @OldAPIStack py_test( - name = "test_cql", + name = "test_cql_old_api_stack", tags = ["team:rllib", "algorithms_dir"], size = "large", data = ["tests/data/pendulum/small.json"], - srcs = ["algorithms/cql/tests/test_cql.py"] + srcs = ["algorithms/cql/tests/test_cql_old_api_stack.py"] ) # DQN diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index 564d226bd631..542240a00dac 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -82,25 +82,6 @@ Space = gym.Space -"""TODO(jungong, sven): in "offline_data" we can potentially unify all input types -under input and input_config keys. E.g. -input: sample -input_config { -env: CartPole-v1 -} -or: -input: json_reader -input_config { -path: /tmp/ -} -or: -input: dataset -input_config { -format: parquet -path: /tmp/ -} -""" - if TYPE_CHECKING: from ray.rllib.algorithms.algorithm import Algorithm @@ -131,12 +112,13 @@ class AlgorithmConfig(_Config): from ray.rllib.algorithms.callbacks import MemoryTrackingCallbacks # Construct a generic config object, specifying values within different # sub-categories, e.g. "training". - config = (PPOConfig().training(gamma=0.9, lr=0.01) - .environment(env="CartPole-v1") - .resources(num_gpus=0) - .env_runners(num_env_runners=0) - .callbacks(MemoryTrackingCallbacks) - ) + config = ( + PPOConfig() + .training(gamma=0.9, lr=0.01) + .environment(env="CartPole-v1") + .env_runners(num_env_runners=0) + .callbacks(MemoryTrackingCallbacks) + ) # A config object can be used to construct the respective Algorithm. rllib_algo = config.build() @@ -321,10 +303,6 @@ def __init__(self, algo_class: Optional[type] = None): # Default setting for skipping `nan` gradient updates. self.torch_skip_nan_gradients = False - # `self.api_stack()` - self.enable_rl_module_and_learner = False - self.enable_env_runner_and_connector_v2 = False - # `self.environment()` self.env = None self.env_config = {} @@ -425,7 +403,19 @@ def __init__(self, algo_class: Optional[type] = None): self.explore = True # This is not compatible with RLModules, which have a method # `forward_exploration` to specify custom exploration behavior. - self.exploration_config = {} + if not hasattr(self, "exploration_config"): + # Helper to keep track of the original exploration config when dis-/enabling + # rl modules. + self._prior_exploration_config = None + self.exploration_config = {} + + # `self.api_stack()` + self.enable_rl_module_and_learner = True + self.enable_env_runner_and_connector_v2 = True + self.api_stack( + enable_rl_module_and_learner=True, + enable_env_runner_and_connector_v2=True, + ) # `self.multi_agent()` # TODO (sven): Prepare multi-agent setup for logging each agent's and each @@ -549,9 +539,6 @@ def __init__(self, algo_class: Optional[type] = None): # `self.rl_module()` self._model_config = {} self._rl_module_spec = None - # Helper to keep track of the original exploration config when dis-/enabling - # rl modules. - self.__prior_exploration_config = None # Module ID specific config overrides. self.algorithm_config_overrides_per_module = {} # Cached, actual AlgorithmConfig objects derived from @@ -1612,13 +1599,13 @@ def api_stack( self.enable_rl_module_and_learner = enable_rl_module_and_learner if enable_rl_module_and_learner is True and self.exploration_config: - self.__prior_exploration_config = self.exploration_config + self._prior_exploration_config = self.exploration_config self.exploration_config = {} elif enable_rl_module_and_learner is False and not self.exploration_config: - if self.__prior_exploration_config is not None: - self.exploration_config = self.__prior_exploration_config - self.__prior_exploration_config = None + if self._prior_exploration_config is not None: + self.exploration_config = self._prior_exploration_config + self._prior_exploration_config = None else: logger.warning( "config.enable_rl_module_and_learner was set to False, but no " diff --git a/rllib/algorithms/appo/appo.py b/rllib/algorithms/appo/appo.py index a623627122aa..99d32099b16b 100644 --- a/rllib/algorithms/appo/appo.py +++ b/rllib/algorithms/appo/appo.py @@ -88,8 +88,6 @@ class APPOConfig(IMPALAConfig): def __init__(self, algo_class=None): """Initializes a APPOConfig instance.""" - super().__init__(algo_class=algo_class or APPO) - self.exploration_config = { # The Exploration class to use. In the simplest case, this is the name # (str) of any class present in the `rllib.utils.exploration` package. @@ -100,6 +98,8 @@ def __init__(self, algo_class=None): # Add constructor kwargs here (if any). } + super().__init__(algo_class=algo_class or APPO) + # fmt: off # __sphinx_doc_begin__ # APPO specific settings: @@ -138,10 +138,6 @@ def __init__(self, algo_class=None): self.vf_loss_coeff = 0.5 self.entropy_coeff = 0.01 self.tau = 1.0 - self.api_stack( - enable_rl_module_and_learner=True, - enable_env_runner_and_connector_v2=True, - ) # __sphinx_doc_end__ # fmt: on diff --git a/rllib/algorithms/appo/tests/test_appo.py b/rllib/algorithms/appo/tests/test_appo.py index 988cb7968044..e58eea2c782d 100644 --- a/rllib/algorithms/appo/tests/test_appo.py +++ b/rllib/algorithms/appo/tests/test_appo.py @@ -4,9 +4,7 @@ import ray.rllib.algorithms.appo as appo from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID -from ray.rllib.utils.metrics import ( - LEARNER_RESULTS, -) +from ray.rllib.utils.metrics import LEARNER_RESULTS from ray.rllib.utils.test_utils import ( check_train_results, check_train_results_new_api_stack, diff --git a/rllib/algorithms/bc/tests/test_bc_old_api_stack.py b/rllib/algorithms/bc/tests/test_bc_old_api_stack.py index d564121fe028..335a751376ad 100644 --- a/rllib/algorithms/bc/tests/test_bc_old_api_stack.py +++ b/rllib/algorithms/bc/tests/test_bc_old_api_stack.py @@ -37,6 +37,10 @@ def test_bc_compilation_and_learning_from_offline_file(self): config = ( bc.BCConfig() + .api_stack( + enable_env_runner_and_connector_v2=False, + enable_rl_module_and_learner=False, + ) .evaluation( evaluation_interval=3, evaluation_num_env_runners=1, diff --git a/rllib/algorithms/cql/cql.py b/rllib/algorithms/cql/cql.py index b16f67264234..865c9c85c14f 100644 --- a/rllib/algorithms/cql/cql.py +++ b/rllib/algorithms/cql/cql.py @@ -108,19 +108,9 @@ def __init__(self, algo_class=None): # Changes to Algorithm's/SACConfig's default: - # `.api_stack()` - self.api_stack( - enable_rl_module_and_learner=False, - enable_env_runner_and_connector_v2=False, - ) # .reporting() self.min_sample_timesteps_per_iteration = 0 self.min_train_timesteps_per_iteration = 100 - # `.api_stack()` - self.api_stack( - enable_rl_module_and_learner=False, - enable_env_runner_and_connector_v2=False, - ) # fmt: on # __sphinx_doc_end__ diff --git a/rllib/algorithms/cql/tests/test_cql.py b/rllib/algorithms/cql/tests/test_cql_old_api_stack.py similarity index 96% rename from rllib/algorithms/cql/tests/test_cql.py rename to rllib/algorithms/cql/tests/test_cql_old_api_stack.py index 60ce30a74f1c..1321741253a8 100644 --- a/rllib/algorithms/cql/tests/test_cql.py +++ b/rllib/algorithms/cql/tests/test_cql_old_api_stack.py @@ -39,6 +39,10 @@ def test_cql_compilation(self): config = ( cql.CQLConfig() + .api_stack( + enable_rl_module_and_learner=False, + enable_env_runner_and_connector_v2=False, + ) .environment( env="Pendulum-v1", ) diff --git a/rllib/algorithms/dqn/dqn.py b/rllib/algorithms/dqn/dqn.py index 622718055e37..d62cb3242e44 100644 --- a/rllib/algorithms/dqn/dqn.py +++ b/rllib/algorithms/dqn/dqn.py @@ -134,18 +134,19 @@ class DQNConfig(AlgorithmConfig): def __init__(self, algo_class=None): """Initializes a DQNConfig instance.""" - super().__init__(algo_class=algo_class or DQN) - - # Overrides of AlgorithmConfig defaults - # `env_runners()` - # Set to `self.n_step`, if 'auto'. - self.rollout_fragment_length: Union[int, str] = "auto" self.exploration_config = { "type": "EpsilonGreedy", "initial_epsilon": 1.0, "final_epsilon": 0.02, "epsilon_timesteps": 10000, } + + super().__init__(algo_class=algo_class or DQN) + + # Overrides of AlgorithmConfig defaults + # `env_runners()` + # Set to `self.n_step`, if 'auto'. + self.rollout_fragment_length: Union[int, str] = "auto" # New stack uses `epsilon` as either a constant value or a scheduler # defined like this. # TODO (simon): Ensure that users can understand how to provide epsilon. @@ -174,7 +175,6 @@ def __init__(self, algo_class=None): self.target_network_update_freq = 500 self.num_steps_sampled_before_learning_starts = 1000 self.store_buffer_in_checkpoints = False - self.lr_schedule = None self.adam_epsilon = 1e-8 self.tau = 1.0 @@ -203,14 +203,11 @@ def __init__(self, algo_class=None): # Beta parameter for sampling from prioritized replay buffer. "beta": 0.4, } - # `.api_stack()` - self.api_stack( - enable_rl_module_and_learner=True, - enable_env_runner_and_connector_v2=True, - ) # fmt: on # __sphinx_doc_end__ + self.lr_schedule = None # @OldAPIStack + # Deprecated self.buffer_size = DEPRECATED_VALUE self.prioritized_replay = DEPRECATED_VALUE diff --git a/rllib/algorithms/impala/impala.py b/rllib/algorithms/impala/impala.py index 3e0692ddd188..78e511931471 100644 --- a/rllib/algorithms/impala/impala.py +++ b/rllib/algorithms/impala/impala.py @@ -123,8 +123,6 @@ class IMPALAConfig(AlgorithmConfig): def __init__(self, algo_class=None): """Initializes a IMPALAConfig instance.""" - super().__init__(algo_class=algo_class or IMPALA) - self.exploration_config = { # @OldAPIstack # The Exploration class to use. In the simplest case, this is the name # (str) of any class present in the `rllib.utils.exploration` package. @@ -135,6 +133,8 @@ def __init__(self, algo_class=None): # Add constructor kwargs here (if any). } + super().__init__(algo_class=algo_class or IMPALA) + # fmt: off # __sphinx_doc_begin__ @@ -170,10 +170,6 @@ def __init__(self, algo_class=None): self.num_env_runners = 2 self.lr = 0.0005 self.min_time_s_per_iteration = 10 - self.api_stack( - enable_rl_module_and_learner=True, - enable_env_runner_and_connector_v2=True, - ) # __sphinx_doc_end__ # fmt: on diff --git a/rllib/algorithms/marwil/marwil.py b/rllib/algorithms/marwil/marwil.py index c562113cf96a..21dbdbfbe181 100644 --- a/rllib/algorithms/marwil/marwil.py +++ b/rllib/algorithms/marwil/marwil.py @@ -137,6 +137,16 @@ class MARWILConfig(AlgorithmConfig): def __init__(self, algo_class=None): """Initializes a MARWILConfig instance.""" + self.exploration_config = { + # The Exploration class to use. In the simplest case, this is the name + # (str) of any class present in the `rllib.utils.exploration` package. + # You can also provide the python class directly or the full location + # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy. + # EpsilonGreedy"). + "type": "StochasticSampling", + # Add constructor kwargs here (if any). + } + super().__init__(algo_class=algo_class or MARWIL) # fmt: off @@ -165,18 +175,6 @@ def __init__(self, algo_class=None): self.lr = 1e-4 self.lambda_ = 1.0 self.train_batch_size = 2000 - # TODO (Artur): MARWIL should not need an exploration config as an offline - # algorithm. However, the current implementation of the CRR algorithm - # requires it. Investigate. - self.exploration_config = { - # The Exploration class to use. In the simplest case, this is the name - # (str) of any class present in the `rllib.utils.exploration` package. - # You can also provide the python class directly or the full location - # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy. - # EpsilonGreedy"). - "type": "StochasticSampling", - # Add constructor kwargs here (if any). - } # Materialize only the data in raw format, but not the mapped data b/c # MARWIL uses a connector to calculate values and therefore the module diff --git a/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py b/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py index bffcbe06db5f..bb1fabfed7ee 100644 --- a/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py +++ b/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py @@ -49,6 +49,10 @@ def test_marwil_compilation_and_learning_from_offline_file(self): config = ( marwil.MARWILConfig() + .api_stack( + enable_rl_module_and_learner=False, + enable_env_runner_and_connector_v2=False, + ) .env_runners(num_env_runners=2) .environment(env="CartPole-v1") .evaluation( @@ -111,6 +115,10 @@ def test_marwil_cont_actions_from_offline_file(self): config = ( marwil.MARWILConfig() + .api_stack( + enable_rl_module_and_learner=False, + enable_env_runner_and_connector_v2=False, + ) .env_runners(num_env_runners=1) .evaluation( evaluation_num_env_runners=1, @@ -148,6 +156,10 @@ def test_marwil_loss_function(self): config = ( marwil.MARWILConfig() + .api_stack( + enable_rl_module_and_learner=False, + enable_env_runner_and_connector_v2=False, + ) .env_runners(num_env_runners=0) .offline_data(input_=[data_file]) ) # Learn from offline data. diff --git a/rllib/algorithms/ppo/ppo.py b/rllib/algorithms/ppo/ppo.py index 792c313bc48f..1bb785643a70 100644 --- a/rllib/algorithms/ppo/ppo.py +++ b/rllib/algorithms/ppo/ppo.py @@ -110,8 +110,6 @@ class PPOConfig(AlgorithmConfig): def __init__(self, algo_class=None): """Initializes a PPOConfig instance.""" - super().__init__(algo_class=algo_class or PPO) - self.exploration_config = { # The Exploration class to use. In the simplest case, this is the name # (str) of any class present in the `rllib.utils.exploration` package. @@ -122,6 +120,8 @@ def __init__(self, algo_class=None): # Add constructor kwargs here (if any). } + super().__init__(algo_class=algo_class or PPO) + # fmt: off # __sphinx_doc_begin__ self.lr = 5e-5 @@ -146,12 +146,6 @@ def __init__(self, algo_class=None): # Override some of AlgorithmConfig's default values with PPO-specific values. self.num_env_runners = 2 - - # `.api_stack()` - self.api_stack( - enable_rl_module_and_learner=True, - enable_env_runner_and_connector_v2=True, - ) # __sphinx_doc_end__ # fmt: on diff --git a/rllib/algorithms/sac/sac.py b/rllib/algorithms/sac/sac.py index 35a9b9cece32..bcdfa0e69edf 100644 --- a/rllib/algorithms/sac/sac.py +++ b/rllib/algorithms/sac/sac.py @@ -48,7 +48,18 @@ class SACConfig(AlgorithmConfig): """ def __init__(self, algo_class=None): + self.exploration_config = { + # The Exploration class to use. In the simplest case, this is the name + # (str) of any class present in the `rllib.utils.exploration` package. + # You can also provide the python class directly or the full location + # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy. + # EpsilonGreedy"). + "type": "StochasticSampling", + # Add constructor kwargs here (if any). + } + super().__init__(algo_class=algo_class or SAC) + # fmt: off # __sphinx_doc_begin__ # SAC-specific config settings. @@ -105,15 +116,6 @@ def __init__(self, algo_class=None): # .env_runners() # Set to `self.n_step`, if 'auto'. self.rollout_fragment_length = "auto" - self.exploration_config = { - # The Exploration class to use. In the simplest case, this is the name - # (str) of any class present in the `rllib.utils.exploration` package. - # You can also provide the python class directly or the full location - # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy. - # EpsilonGreedy"). - "type": "StochasticSampling", - # Add constructor kwargs here (if any). - } self.train_batch_size_per_learner = 256 self.train_batch_size = 256 # @OldAPIstack # Number of timesteps to collect from rollout workers before we start @@ -124,12 +126,6 @@ def __init__(self, algo_class=None): # .reporting() self.min_time_s_per_iteration = 1 self.min_sample_timesteps_per_iteration = 100 - - # `.api_stack()` - self.api_stack( - enable_rl_module_and_learner=True, - enable_env_runner_and_connector_v2=True, - ) # __sphinx_doc_end__ # fmt: on diff --git a/rllib/algorithms/tests/test_algorithm.py b/rllib/algorithms/tests/test_algorithm.py index 2175eb62091f..45ba63b769b3 100644 --- a/rllib/algorithms/tests/test_algorithm.py +++ b/rllib/algorithms/tests/test_algorithm.py @@ -526,8 +526,13 @@ def test_no_env_but_eval_workers_do_have_env(self): offline_rl_config = ( BCConfig() + .api_stack( + enable_rl_module_and_learner=False, + enable_env_runner_and_connector_v2=False, + ) .environment( - observation_space=env.observation_space, action_space=env.action_space + observation_space=env.observation_space, + action_space=env.action_space, ) .evaluation( evaluation_interval=1, diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index b6a2dcd161bc..ef56b54fb5ad 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -575,7 +575,7 @@ def assert_healthy(self): AssertionError: If the EnvRunner Actor has NOT been properly initialized. """ # Make sure, we have built our gym.vector.Env and RLModule properly. - assert self.env and self.module + assert self.env and hasattr(self, "module") def make_env(self) -> None: """Creates a vectorized gymnasium env and stores it in `self.env`. diff --git a/rllib/env/tests/test_env_runner_group.py b/rllib/env/tests/test_env_runner_group.py index 234c32e015c8..aeabb4fb501c 100644 --- a/rllib/env/tests/test_env_runner_group.py +++ b/rllib/env/tests/test_env_runner_group.py @@ -1,11 +1,9 @@ -import gymnasium as gym import unittest import ray -from ray.rllib.algorithms.algorithm_config import AlgorithmConfig +from ray.rllib.algorithms.ppo import PPOConfig +from ray.rllib.core.rl_module.rl_module import RLModule from ray.rllib.env.env_runner_group import EnvRunnerGroup -from ray.rllib.examples._old_api_stack.policy.random_policy import RandomPolicy -from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID class TestEnvRunnerGroup(unittest.TestCase): @@ -20,67 +18,67 @@ def tearDownClass(cls): def test_foreach_worker(self): """Test to make sure basic sychronous calls to remote workers work.""" ws = EnvRunnerGroup( - env_creator=lambda _: gym.make("CartPole-v1"), - default_policy_class=RandomPolicy, - config=AlgorithmConfig().env_runners(num_env_runners=2), + config=( + PPOConfig().environment("CartPole-v1").env_runners(num_env_runners=2) + ), num_env_runners=2, ) - policies = ws.foreach_worker( - lambda w: w.get_policy(DEFAULT_POLICY_ID), + modules = ws.foreach_worker( + lambda w: w.module, local_env_runner=True, ) # 3 policies including the one from the local worker. - self.assertEqual(len(policies), 3) - for p in policies: - self.assertIsInstance(p, RandomPolicy) + self.assertEqual(len(modules), 3) + for m in modules: + self.assertIsInstance(m, RLModule) - policies = ws.foreach_worker( - lambda w: w.get_policy(DEFAULT_POLICY_ID), + modules = ws.foreach_worker( + lambda w: w.module, local_env_runner=False, ) # 2 policies from only the remote workers. - self.assertEqual(len(policies), 2) + self.assertEqual(len(modules), 2) ws.stop() def test_foreach_worker_return_obj_refss(self): """Test to make sure return_obj_refs parameter works.""" ws = EnvRunnerGroup( - env_creator=lambda _: gym.make("CartPole-v1"), - default_policy_class=RandomPolicy, - config=AlgorithmConfig().env_runners(num_env_runners=2), + config=( + PPOConfig().environment("CartPole-v1").env_runners(num_env_runners=2) + ), num_env_runners=2, ) - policy_refs = ws.foreach_worker( - lambda w: w.get_policy(DEFAULT_POLICY_ID), + module_refs = ws.foreach_worker( + lambda w: isinstance(w.module, RLModule), local_env_runner=False, return_obj_refs=True, ) # 2 policy references from remote workers. - self.assertEqual(len(policy_refs), 2) - self.assertTrue(isinstance(policy_refs[0], ray.ObjectRef)) - self.assertTrue(isinstance(policy_refs[1], ray.ObjectRef)) + self.assertEqual(len(module_refs), 2) + self.assertTrue(isinstance(module_refs[0], ray.ObjectRef)) + self.assertTrue(isinstance(module_refs[1], ray.ObjectRef)) ws.stop() def test_foreach_worker_async(self): """Test to make sure basic asychronous calls to remote workers work.""" ws = EnvRunnerGroup( - env_creator=lambda _: gym.make("CartPole-v1"), - default_policy_class=RandomPolicy, - config=AlgorithmConfig().env_runners(num_env_runners=2), + config=( + PPOConfig().environment("CartPole-v1").env_runners(num_env_runners=2) + ), num_env_runners=2, ) # Fired async request against both remote workers. self.assertEqual( ws.foreach_worker_async( - lambda w: w.get_policy(DEFAULT_POLICY_ID), + lambda w: isinstance(w.module, RLModule), ), 2, ) @@ -92,7 +90,7 @@ def test_foreach_worker_async(self): # First is the id of the remote worker. self.assertTrue(p[0] in [1, 2]) # Next is the actual policy. - self.assertIsInstance(p[1], RandomPolicy) + self.assertTrue(p[1]) ws.stop() diff --git a/rllib/examples/offline_rl/offline_rl.py b/rllib/examples/offline_rl/offline_rl.py index 6d19252bca27..5679fc1ac63b 100644 --- a/rllib/examples/offline_rl/offline_rl.py +++ b/rllib/examples/offline_rl/offline_rl.py @@ -57,6 +57,10 @@ # See rllib/tuned_examples/cql/pendulum-cql.yaml for comparison. config = ( cql.CQLConfig() + .api_stack( + enable_env_runner_and_connector_v2=False, + enable_rl_module_and_learner=False, + ) .framework(framework="torch") .env_runners(num_env_runners=0) .training( diff --git a/rllib/offline/estimators/tests/utils.py b/rllib/offline/estimators/tests/utils.py index baf74ae51d21..b7366e8609a3 100644 --- a/rllib/offline/estimators/tests/utils.py +++ b/rllib/offline/estimators/tests/utils.py @@ -43,6 +43,10 @@ def get_cliff_walking_wall_policy_and_data( config = ( AlgorithmConfig() + .api_stack( + enable_env_runner_and_connector_v2=False, + enable_rl_module_and_learner=False, + ) .debugging(seed=seed) .env_runners(batch_mode="complete_episodes") .experimental(_disable_preprocessor_api=True) diff --git a/rllib/offline/tests/test_feature_importance.py b/rllib/offline/tests/test_feature_importance.py index e6696bdb7e24..c19953aa4403 100644 --- a/rllib/offline/tests/test_feature_importance.py +++ b/rllib/offline/tests/test_feature_importance.py @@ -14,7 +14,15 @@ def tearDown(self): ray.shutdown() def test_feat_importance_cartpole(self): - config = MARWILConfig().environment("CartPole-v1").framework("torch") + config = ( + MARWILConfig() + .api_stack( + enable_rl_module_and_learner=False, + enable_env_runner_and_connector_v2=False, + ) + .environment("CartPole-v1") + .framework("torch") + ) algo = config.build() policy = algo.env_runner.get_policy() sample_batch = synchronous_parallel_sample(worker_set=algo.env_runner_group) diff --git a/rllib/utils/torch_utils.py b/rllib/utils/torch_utils.py index 462d0fe9ff69..0d360d4d1488 100644 --- a/rllib/utils/torch_utils.py +++ b/rllib/utils/torch_utils.py @@ -10,7 +10,7 @@ import tree # pip install dm_tree from ray.rllib.models.repeated_values import RepeatedValues -from ray.rllib.utils.annotations import Deprecated, PublicAPI, DeveloperAPI +from ray.rllib.utils.annotations import PublicAPI, DeveloperAPI from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.numpy import SMALL_NUMBER from ray.rllib.utils.typing import ( @@ -95,11 +95,6 @@ def apply_grad_clipping( return {"grad_gnorm": grad_gnorm} -@Deprecated(old="ray.rllib.utils.torch_utils.atanh", new="torch.math.atanh", error=True) -def atanh(x: TensorType) -> TensorType: - pass - - @PublicAPI def clip_gradients( gradients_dict: "ParamDict", @@ -236,11 +231,6 @@ def concat_multi_gpu_td_errors( } -@Deprecated(new="ray/rllib/utils/numpy.py::convert_to_numpy", error=True) -def convert_to_non_torch_type(stats: TensorStructType) -> TensorStructType: - pass - - @PublicAPI def convert_to_torch_tensor( x: TensorStructType,