From 03ea4f6663fafaf64b8d10ac8db8e962302be561 Mon Sep 17 00:00:00 2001
From: Sven Mika <svenmika1977@gmail.com>
Date: Sun, 10 Nov 2024 17:34:57 +0100
Subject: [PATCH] [RLlib] New API stack: On by default for BC/MARWIL/CQL.
 (#48599)

---
 rllib/BUILD                                   |  5 +-
 rllib/algorithms/algorithm_config.py          | 61 ++++++++-----------
 rllib/algorithms/appo/appo.py                 |  8 +--
 rllib/algorithms/appo/tests/test_appo.py      |  4 +-
 .../bc/tests/test_bc_old_api_stack.py         |  4 ++
 rllib/algorithms/cql/cql.py                   | 10 ---
 ...{test_cql.py => test_cql_old_api_stack.py} |  4 ++
 rllib/algorithms/dqn/dqn.py                   | 21 +++----
 rllib/algorithms/impala/impala.py             |  8 +--
 rllib/algorithms/marwil/marwil.py             | 22 +++----
 .../marwil/tests/test_marwil_old_api_stack.py | 12 ++++
 rllib/algorithms/ppo/ppo.py                   | 10 +--
 rllib/algorithms/sac/sac.py                   | 26 ++++----
 rllib/algorithms/tests/test_algorithm.py      |  7 ++-
 rllib/env/single_agent_env_runner.py          |  2 +-
 rllib/env/tests/test_env_runner_group.py      | 54 ++++++++--------
 rllib/examples/offline_rl/offline_rl.py       |  4 ++
 rllib/offline/estimators/tests/utils.py       |  4 ++
 .../offline/tests/test_feature_importance.py  | 10 ++-
 rllib/utils/torch_utils.py                    | 12 +---
 20 files changed, 135 insertions(+), 153 deletions(-)
 rename rllib/algorithms/cql/tests/{test_cql.py => test_cql_old_api_stack.py} (96%)

diff --git a/rllib/BUILD b/rllib/BUILD
index 71d8ed4b234c..1b281b62ba65 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -948,12 +948,13 @@ py_test(
 )
 
 # CQL
+# @OldAPIStack
 py_test(
-    name = "test_cql",
+    name = "test_cql_old_api_stack",
     tags = ["team:rllib", "algorithms_dir"],
     size = "large",
     data = ["tests/data/pendulum/small.json"],
-    srcs = ["algorithms/cql/tests/test_cql.py"]
+    srcs = ["algorithms/cql/tests/test_cql_old_api_stack.py"]
 )
 
 # DQN
diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
index 564d226bd631..542240a00dac 100644
--- a/rllib/algorithms/algorithm_config.py
+++ b/rllib/algorithms/algorithm_config.py
@@ -82,25 +82,6 @@
 
 Space = gym.Space
 
-"""TODO(jungong, sven): in "offline_data" we can potentially unify all input types
-under input and input_config keys. E.g.
-input: sample
-input_config {
-env: CartPole-v1
-}
-or:
-input: json_reader
-input_config {
-path: /tmp/
-}
-or:
-input: dataset
-input_config {
-format: parquet
-path: /tmp/
-}
-"""
-
 
 if TYPE_CHECKING:
     from ray.rllib.algorithms.algorithm import Algorithm
@@ -131,12 +112,13 @@ class AlgorithmConfig(_Config):
         from ray.rllib.algorithms.callbacks import MemoryTrackingCallbacks
         # Construct a generic config object, specifying values within different
         # sub-categories, e.g. "training".
-        config = (PPOConfig().training(gamma=0.9, lr=0.01)
-                .environment(env="CartPole-v1")
-                .resources(num_gpus=0)
-                .env_runners(num_env_runners=0)
-                .callbacks(MemoryTrackingCallbacks)
-            )
+        config = (
+            PPOConfig()
+            .training(gamma=0.9, lr=0.01)
+            .environment(env="CartPole-v1")
+            .env_runners(num_env_runners=0)
+            .callbacks(MemoryTrackingCallbacks)
+        )
         # A config object can be used to construct the respective Algorithm.
         rllib_algo = config.build()
 
@@ -321,10 +303,6 @@ def __init__(self, algo_class: Optional[type] = None):
         # Default setting for skipping `nan` gradient updates.
         self.torch_skip_nan_gradients = False
 
-        # `self.api_stack()`
-        self.enable_rl_module_and_learner = False
-        self.enable_env_runner_and_connector_v2 = False
-
         # `self.environment()`
         self.env = None
         self.env_config = {}
@@ -425,7 +403,19 @@ def __init__(self, algo_class: Optional[type] = None):
         self.explore = True
         # This is not compatible with RLModules, which have a method
         # `forward_exploration` to specify custom exploration behavior.
-        self.exploration_config = {}
+        if not hasattr(self, "exploration_config"):
+            # Helper to keep track of the original exploration config when dis-/enabling
+            # rl modules.
+            self._prior_exploration_config = None
+            self.exploration_config = {}
+
+        # `self.api_stack()`
+        self.enable_rl_module_and_learner = True
+        self.enable_env_runner_and_connector_v2 = True
+        self.api_stack(
+            enable_rl_module_and_learner=True,
+            enable_env_runner_and_connector_v2=True,
+        )
 
         # `self.multi_agent()`
         # TODO (sven): Prepare multi-agent setup for logging each agent's and each
@@ -549,9 +539,6 @@ def __init__(self, algo_class: Optional[type] = None):
         # `self.rl_module()`
         self._model_config = {}
         self._rl_module_spec = None
-        # Helper to keep track of the original exploration config when dis-/enabling
-        # rl modules.
-        self.__prior_exploration_config = None
         # Module ID specific config overrides.
         self.algorithm_config_overrides_per_module = {}
         # Cached, actual AlgorithmConfig objects derived from
@@ -1612,13 +1599,13 @@ def api_stack(
             self.enable_rl_module_and_learner = enable_rl_module_and_learner
 
             if enable_rl_module_and_learner is True and self.exploration_config:
-                self.__prior_exploration_config = self.exploration_config
+                self._prior_exploration_config = self.exploration_config
                 self.exploration_config = {}
 
             elif enable_rl_module_and_learner is False and not self.exploration_config:
-                if self.__prior_exploration_config is not None:
-                    self.exploration_config = self.__prior_exploration_config
-                    self.__prior_exploration_config = None
+                if self._prior_exploration_config is not None:
+                    self.exploration_config = self._prior_exploration_config
+                    self._prior_exploration_config = None
                 else:
                     logger.warning(
                         "config.enable_rl_module_and_learner was set to False, but no "
diff --git a/rllib/algorithms/appo/appo.py b/rllib/algorithms/appo/appo.py
index a623627122aa..99d32099b16b 100644
--- a/rllib/algorithms/appo/appo.py
+++ b/rllib/algorithms/appo/appo.py
@@ -88,8 +88,6 @@ class APPOConfig(IMPALAConfig):
 
     def __init__(self, algo_class=None):
         """Initializes a APPOConfig instance."""
-        super().__init__(algo_class=algo_class or APPO)
-
         self.exploration_config = {
             # The Exploration class to use. In the simplest case, this is the name
             # (str) of any class present in the `rllib.utils.exploration` package.
@@ -100,6 +98,8 @@ def __init__(self, algo_class=None):
             # Add constructor kwargs here (if any).
         }
 
+        super().__init__(algo_class=algo_class or APPO)
+
         # fmt: off
         # __sphinx_doc_begin__
         # APPO specific settings:
@@ -138,10 +138,6 @@ def __init__(self, algo_class=None):
         self.vf_loss_coeff = 0.5
         self.entropy_coeff = 0.01
         self.tau = 1.0
-        self.api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         # __sphinx_doc_end__
         # fmt: on
 
diff --git a/rllib/algorithms/appo/tests/test_appo.py b/rllib/algorithms/appo/tests/test_appo.py
index 988cb7968044..e58eea2c782d 100644
--- a/rllib/algorithms/appo/tests/test_appo.py
+++ b/rllib/algorithms/appo/tests/test_appo.py
@@ -4,9 +4,7 @@
 import ray.rllib.algorithms.appo as appo
 from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
-from ray.rllib.utils.metrics import (
-    LEARNER_RESULTS,
-)
+from ray.rllib.utils.metrics import LEARNER_RESULTS
 from ray.rllib.utils.test_utils import (
     check_train_results,
     check_train_results_new_api_stack,
diff --git a/rllib/algorithms/bc/tests/test_bc_old_api_stack.py b/rllib/algorithms/bc/tests/test_bc_old_api_stack.py
index d564121fe028..335a751376ad 100644
--- a/rllib/algorithms/bc/tests/test_bc_old_api_stack.py
+++ b/rllib/algorithms/bc/tests/test_bc_old_api_stack.py
@@ -37,6 +37,10 @@ def test_bc_compilation_and_learning_from_offline_file(self):
 
         config = (
             bc.BCConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .evaluation(
                 evaluation_interval=3,
                 evaluation_num_env_runners=1,
diff --git a/rllib/algorithms/cql/cql.py b/rllib/algorithms/cql/cql.py
index b16f67264234..865c9c85c14f 100644
--- a/rllib/algorithms/cql/cql.py
+++ b/rllib/algorithms/cql/cql.py
@@ -108,19 +108,9 @@ def __init__(self, algo_class=None):
 
         # Changes to Algorithm's/SACConfig's default:
 
-        # `.api_stack()`
-        self.api_stack(
-            enable_rl_module_and_learner=False,
-            enable_env_runner_and_connector_v2=False,
-        )
         # .reporting()
         self.min_sample_timesteps_per_iteration = 0
         self.min_train_timesteps_per_iteration = 100
-        # `.api_stack()`
-        self.api_stack(
-            enable_rl_module_and_learner=False,
-            enable_env_runner_and_connector_v2=False,
-        )
         # fmt: on
         # __sphinx_doc_end__
 
diff --git a/rllib/algorithms/cql/tests/test_cql.py b/rllib/algorithms/cql/tests/test_cql_old_api_stack.py
similarity index 96%
rename from rllib/algorithms/cql/tests/test_cql.py
rename to rllib/algorithms/cql/tests/test_cql_old_api_stack.py
index 60ce30a74f1c..1321741253a8 100644
--- a/rllib/algorithms/cql/tests/test_cql.py
+++ b/rllib/algorithms/cql/tests/test_cql_old_api_stack.py
@@ -39,6 +39,10 @@ def test_cql_compilation(self):
 
         config = (
             cql.CQLConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment(
                 env="Pendulum-v1",
             )
diff --git a/rllib/algorithms/dqn/dqn.py b/rllib/algorithms/dqn/dqn.py
index 622718055e37..d62cb3242e44 100644
--- a/rllib/algorithms/dqn/dqn.py
+++ b/rllib/algorithms/dqn/dqn.py
@@ -134,18 +134,19 @@ class DQNConfig(AlgorithmConfig):
 
     def __init__(self, algo_class=None):
         """Initializes a DQNConfig instance."""
-        super().__init__(algo_class=algo_class or DQN)
-
-        # Overrides of AlgorithmConfig defaults
-        # `env_runners()`
-        # Set to `self.n_step`, if 'auto'.
-        self.rollout_fragment_length: Union[int, str] = "auto"
         self.exploration_config = {
             "type": "EpsilonGreedy",
             "initial_epsilon": 1.0,
             "final_epsilon": 0.02,
             "epsilon_timesteps": 10000,
         }
+
+        super().__init__(algo_class=algo_class or DQN)
+
+        # Overrides of AlgorithmConfig defaults
+        # `env_runners()`
+        # Set to `self.n_step`, if 'auto'.
+        self.rollout_fragment_length: Union[int, str] = "auto"
         # New stack uses `epsilon` as either a constant value or a scheduler
         # defined like this.
         # TODO (simon): Ensure that users can understand how to provide epsilon.
@@ -174,7 +175,6 @@ def __init__(self, algo_class=None):
         self.target_network_update_freq = 500
         self.num_steps_sampled_before_learning_starts = 1000
         self.store_buffer_in_checkpoints = False
-        self.lr_schedule = None
         self.adam_epsilon = 1e-8
 
         self.tau = 1.0
@@ -203,14 +203,11 @@ def __init__(self, algo_class=None):
             # Beta parameter for sampling from prioritized replay buffer.
             "beta": 0.4,
         }
-        # `.api_stack()`
-        self.api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         # fmt: on
         # __sphinx_doc_end__
 
+        self.lr_schedule = None  # @OldAPIStack
+
         # Deprecated
         self.buffer_size = DEPRECATED_VALUE
         self.prioritized_replay = DEPRECATED_VALUE
diff --git a/rllib/algorithms/impala/impala.py b/rllib/algorithms/impala/impala.py
index 3e0692ddd188..78e511931471 100644
--- a/rllib/algorithms/impala/impala.py
+++ b/rllib/algorithms/impala/impala.py
@@ -123,8 +123,6 @@ class IMPALAConfig(AlgorithmConfig):
 
     def __init__(self, algo_class=None):
         """Initializes a IMPALAConfig instance."""
-        super().__init__(algo_class=algo_class or IMPALA)
-
         self.exploration_config = {  # @OldAPIstack
             # The Exploration class to use. In the simplest case, this is the name
             # (str) of any class present in the `rllib.utils.exploration` package.
@@ -135,6 +133,8 @@ def __init__(self, algo_class=None):
             # Add constructor kwargs here (if any).
         }
 
+        super().__init__(algo_class=algo_class or IMPALA)
+
         # fmt: off
         # __sphinx_doc_begin__
 
@@ -170,10 +170,6 @@ def __init__(self, algo_class=None):
         self.num_env_runners = 2
         self.lr = 0.0005
         self.min_time_s_per_iteration = 10
-        self.api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         # __sphinx_doc_end__
         # fmt: on
 
diff --git a/rllib/algorithms/marwil/marwil.py b/rllib/algorithms/marwil/marwil.py
index c562113cf96a..21dbdbfbe181 100644
--- a/rllib/algorithms/marwil/marwil.py
+++ b/rllib/algorithms/marwil/marwil.py
@@ -137,6 +137,16 @@ class MARWILConfig(AlgorithmConfig):
 
     def __init__(self, algo_class=None):
         """Initializes a MARWILConfig instance."""
+        self.exploration_config = {
+            # The Exploration class to use. In the simplest case, this is the name
+            # (str) of any class present in the `rllib.utils.exploration` package.
+            # You can also provide the python class directly or the full location
+            # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy.
+            # EpsilonGreedy").
+            "type": "StochasticSampling",
+            # Add constructor kwargs here (if any).
+        }
+
         super().__init__(algo_class=algo_class or MARWIL)
 
         # fmt: off
@@ -165,18 +175,6 @@ def __init__(self, algo_class=None):
         self.lr = 1e-4
         self.lambda_ = 1.0
         self.train_batch_size = 2000
-        # TODO (Artur): MARWIL should not need an exploration config as an offline
-        #  algorithm. However, the current implementation of the CRR algorithm
-        #  requires it. Investigate.
-        self.exploration_config = {
-            # The Exploration class to use. In the simplest case, this is the name
-            # (str) of any class present in the `rllib.utils.exploration` package.
-            # You can also provide the python class directly or the full location
-            # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy.
-            # EpsilonGreedy").
-            "type": "StochasticSampling",
-            # Add constructor kwargs here (if any).
-        }
 
         # Materialize only the data in raw format, but not the mapped data b/c
         # MARWIL uses a connector to calculate values and therefore the module
diff --git a/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py b/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py
index bffcbe06db5f..bb1fabfed7ee 100644
--- a/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py
+++ b/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py
@@ -49,6 +49,10 @@ def test_marwil_compilation_and_learning_from_offline_file(self):
 
         config = (
             marwil.MARWILConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .env_runners(num_env_runners=2)
             .environment(env="CartPole-v1")
             .evaluation(
@@ -111,6 +115,10 @@ def test_marwil_cont_actions_from_offline_file(self):
 
         config = (
             marwil.MARWILConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .env_runners(num_env_runners=1)
             .evaluation(
                 evaluation_num_env_runners=1,
@@ -148,6 +156,10 @@ def test_marwil_loss_function(self):
 
         config = (
             marwil.MARWILConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .env_runners(num_env_runners=0)
             .offline_data(input_=[data_file])
         )  # Learn from offline data.
diff --git a/rllib/algorithms/ppo/ppo.py b/rllib/algorithms/ppo/ppo.py
index 792c313bc48f..1bb785643a70 100644
--- a/rllib/algorithms/ppo/ppo.py
+++ b/rllib/algorithms/ppo/ppo.py
@@ -110,8 +110,6 @@ class PPOConfig(AlgorithmConfig):
 
     def __init__(self, algo_class=None):
         """Initializes a PPOConfig instance."""
-        super().__init__(algo_class=algo_class or PPO)
-
         self.exploration_config = {
             # The Exploration class to use. In the simplest case, this is the name
             # (str) of any class present in the `rllib.utils.exploration` package.
@@ -122,6 +120,8 @@ def __init__(self, algo_class=None):
             # Add constructor kwargs here (if any).
         }
 
+        super().__init__(algo_class=algo_class or PPO)
+
         # fmt: off
         # __sphinx_doc_begin__
         self.lr = 5e-5
@@ -146,12 +146,6 @@ def __init__(self, algo_class=None):
 
         # Override some of AlgorithmConfig's default values with PPO-specific values.
         self.num_env_runners = 2
-
-        # `.api_stack()`
-        self.api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         # __sphinx_doc_end__
         # fmt: on
 
diff --git a/rllib/algorithms/sac/sac.py b/rllib/algorithms/sac/sac.py
index 35a9b9cece32..bcdfa0e69edf 100644
--- a/rllib/algorithms/sac/sac.py
+++ b/rllib/algorithms/sac/sac.py
@@ -48,7 +48,18 @@ class SACConfig(AlgorithmConfig):
     """
 
     def __init__(self, algo_class=None):
+        self.exploration_config = {
+            # The Exploration class to use. In the simplest case, this is the name
+            # (str) of any class present in the `rllib.utils.exploration` package.
+            # You can also provide the python class directly or the full location
+            # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy.
+            # EpsilonGreedy").
+            "type": "StochasticSampling",
+            # Add constructor kwargs here (if any).
+        }
+
         super().__init__(algo_class=algo_class or SAC)
+
         # fmt: off
         # __sphinx_doc_begin__
         # SAC-specific config settings.
@@ -105,15 +116,6 @@ def __init__(self, algo_class=None):
         # .env_runners()
         # Set to `self.n_step`, if 'auto'.
         self.rollout_fragment_length = "auto"
-        self.exploration_config = {
-            # The Exploration class to use. In the simplest case, this is the name
-            # (str) of any class present in the `rllib.utils.exploration` package.
-            # You can also provide the python class directly or the full location
-            # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy.
-            # EpsilonGreedy").
-            "type": "StochasticSampling",
-            # Add constructor kwargs here (if any).
-        }
         self.train_batch_size_per_learner = 256
         self.train_batch_size = 256  # @OldAPIstack
         # Number of timesteps to collect from rollout workers before we start
@@ -124,12 +126,6 @@ def __init__(self, algo_class=None):
         # .reporting()
         self.min_time_s_per_iteration = 1
         self.min_sample_timesteps_per_iteration = 100
-
-        # `.api_stack()`
-        self.api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         # __sphinx_doc_end__
         # fmt: on
 
diff --git a/rllib/algorithms/tests/test_algorithm.py b/rllib/algorithms/tests/test_algorithm.py
index 2175eb62091f..45ba63b769b3 100644
--- a/rllib/algorithms/tests/test_algorithm.py
+++ b/rllib/algorithms/tests/test_algorithm.py
@@ -526,8 +526,13 @@ def test_no_env_but_eval_workers_do_have_env(self):
 
         offline_rl_config = (
             BCConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment(
-                observation_space=env.observation_space, action_space=env.action_space
+                observation_space=env.observation_space,
+                action_space=env.action_space,
             )
             .evaluation(
                 evaluation_interval=1,
diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py
index b6a2dcd161bc..ef56b54fb5ad 100644
--- a/rllib/env/single_agent_env_runner.py
+++ b/rllib/env/single_agent_env_runner.py
@@ -575,7 +575,7 @@ def assert_healthy(self):
             AssertionError: If the EnvRunner Actor has NOT been properly initialized.
         """
         # Make sure, we have built our gym.vector.Env and RLModule properly.
-        assert self.env and self.module
+        assert self.env and hasattr(self, "module")
 
     def make_env(self) -> None:
         """Creates a vectorized gymnasium env and stores it in `self.env`.
diff --git a/rllib/env/tests/test_env_runner_group.py b/rllib/env/tests/test_env_runner_group.py
index 234c32e015c8..aeabb4fb501c 100644
--- a/rllib/env/tests/test_env_runner_group.py
+++ b/rllib/env/tests/test_env_runner_group.py
@@ -1,11 +1,9 @@
-import gymnasium as gym
 import unittest
 
 import ray
-from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
+from ray.rllib.algorithms.ppo import PPOConfig
+from ray.rllib.core.rl_module.rl_module import RLModule
 from ray.rllib.env.env_runner_group import EnvRunnerGroup
-from ray.rllib.examples._old_api_stack.policy.random_policy import RandomPolicy
-from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
 
 
 class TestEnvRunnerGroup(unittest.TestCase):
@@ -20,67 +18,67 @@ def tearDownClass(cls):
     def test_foreach_worker(self):
         """Test to make sure basic sychronous calls to remote workers work."""
         ws = EnvRunnerGroup(
-            env_creator=lambda _: gym.make("CartPole-v1"),
-            default_policy_class=RandomPolicy,
-            config=AlgorithmConfig().env_runners(num_env_runners=2),
+            config=(
+                PPOConfig().environment("CartPole-v1").env_runners(num_env_runners=2)
+            ),
             num_env_runners=2,
         )
 
-        policies = ws.foreach_worker(
-            lambda w: w.get_policy(DEFAULT_POLICY_ID),
+        modules = ws.foreach_worker(
+            lambda w: w.module,
             local_env_runner=True,
         )
 
         # 3 policies including the one from the local worker.
-        self.assertEqual(len(policies), 3)
-        for p in policies:
-            self.assertIsInstance(p, RandomPolicy)
+        self.assertEqual(len(modules), 3)
+        for m in modules:
+            self.assertIsInstance(m, RLModule)
 
-        policies = ws.foreach_worker(
-            lambda w: w.get_policy(DEFAULT_POLICY_ID),
+        modules = ws.foreach_worker(
+            lambda w: w.module,
             local_env_runner=False,
         )
 
         # 2 policies from only the remote workers.
-        self.assertEqual(len(policies), 2)
+        self.assertEqual(len(modules), 2)
 
         ws.stop()
 
     def test_foreach_worker_return_obj_refss(self):
         """Test to make sure return_obj_refs parameter works."""
         ws = EnvRunnerGroup(
-            env_creator=lambda _: gym.make("CartPole-v1"),
-            default_policy_class=RandomPolicy,
-            config=AlgorithmConfig().env_runners(num_env_runners=2),
+            config=(
+                PPOConfig().environment("CartPole-v1").env_runners(num_env_runners=2)
+            ),
             num_env_runners=2,
         )
 
-        policy_refs = ws.foreach_worker(
-            lambda w: w.get_policy(DEFAULT_POLICY_ID),
+        module_refs = ws.foreach_worker(
+            lambda w: isinstance(w.module, RLModule),
             local_env_runner=False,
             return_obj_refs=True,
         )
 
         # 2 policy references from remote workers.
-        self.assertEqual(len(policy_refs), 2)
-        self.assertTrue(isinstance(policy_refs[0], ray.ObjectRef))
-        self.assertTrue(isinstance(policy_refs[1], ray.ObjectRef))
+        self.assertEqual(len(module_refs), 2)
+        self.assertTrue(isinstance(module_refs[0], ray.ObjectRef))
+        self.assertTrue(isinstance(module_refs[1], ray.ObjectRef))
 
         ws.stop()
 
     def test_foreach_worker_async(self):
         """Test to make sure basic asychronous calls to remote workers work."""
         ws = EnvRunnerGroup(
-            env_creator=lambda _: gym.make("CartPole-v1"),
-            default_policy_class=RandomPolicy,
-            config=AlgorithmConfig().env_runners(num_env_runners=2),
+            config=(
+                PPOConfig().environment("CartPole-v1").env_runners(num_env_runners=2)
+            ),
             num_env_runners=2,
         )
 
         # Fired async request against both remote workers.
         self.assertEqual(
             ws.foreach_worker_async(
-                lambda w: w.get_policy(DEFAULT_POLICY_ID),
+                lambda w: isinstance(w.module, RLModule),
             ),
             2,
         )
@@ -92,7 +90,7 @@ def test_foreach_worker_async(self):
             # First is the id of the remote worker.
             self.assertTrue(p[0] in [1, 2])
             # Next is the actual policy.
-            self.assertIsInstance(p[1], RandomPolicy)
+            self.assertTrue(p[1])
 
         ws.stop()
 
diff --git a/rllib/examples/offline_rl/offline_rl.py b/rllib/examples/offline_rl/offline_rl.py
index 6d19252bca27..5679fc1ac63b 100644
--- a/rllib/examples/offline_rl/offline_rl.py
+++ b/rllib/examples/offline_rl/offline_rl.py
@@ -57,6 +57,10 @@
     # See rllib/tuned_examples/cql/pendulum-cql.yaml for comparison.
     config = (
         cql.CQLConfig()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
         .framework(framework="torch")
         .env_runners(num_env_runners=0)
         .training(
diff --git a/rllib/offline/estimators/tests/utils.py b/rllib/offline/estimators/tests/utils.py
index baf74ae51d21..b7366e8609a3 100644
--- a/rllib/offline/estimators/tests/utils.py
+++ b/rllib/offline/estimators/tests/utils.py
@@ -43,6 +43,10 @@ def get_cliff_walking_wall_policy_and_data(
 
     config = (
         AlgorithmConfig()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
         .debugging(seed=seed)
         .env_runners(batch_mode="complete_episodes")
         .experimental(_disable_preprocessor_api=True)
diff --git a/rllib/offline/tests/test_feature_importance.py b/rllib/offline/tests/test_feature_importance.py
index e6696bdb7e24..c19953aa4403 100644
--- a/rllib/offline/tests/test_feature_importance.py
+++ b/rllib/offline/tests/test_feature_importance.py
@@ -14,7 +14,15 @@ def tearDown(self):
         ray.shutdown()
 
     def test_feat_importance_cartpole(self):
-        config = MARWILConfig().environment("CartPole-v1").framework("torch")
+        config = (
+            MARWILConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
+            .environment("CartPole-v1")
+            .framework("torch")
+        )
         algo = config.build()
         policy = algo.env_runner.get_policy()
         sample_batch = synchronous_parallel_sample(worker_set=algo.env_runner_group)
diff --git a/rllib/utils/torch_utils.py b/rllib/utils/torch_utils.py
index 462d0fe9ff69..0d360d4d1488 100644
--- a/rllib/utils/torch_utils.py
+++ b/rllib/utils/torch_utils.py
@@ -10,7 +10,7 @@
 import tree  # pip install dm_tree
 
 from ray.rllib.models.repeated_values import RepeatedValues
-from ray.rllib.utils.annotations import Deprecated, PublicAPI, DeveloperAPI
+from ray.rllib.utils.annotations import PublicAPI, DeveloperAPI
 from ray.rllib.utils.framework import try_import_torch
 from ray.rllib.utils.numpy import SMALL_NUMBER
 from ray.rllib.utils.typing import (
@@ -95,11 +95,6 @@ def apply_grad_clipping(
     return {"grad_gnorm": grad_gnorm}
 
 
-@Deprecated(old="ray.rllib.utils.torch_utils.atanh", new="torch.math.atanh", error=True)
-def atanh(x: TensorType) -> TensorType:
-    pass
-
-
 @PublicAPI
 def clip_gradients(
     gradients_dict: "ParamDict",
@@ -236,11 +231,6 @@ def concat_multi_gpu_td_errors(
     }
 
 
-@Deprecated(new="ray/rllib/utils/numpy.py::convert_to_numpy", error=True)
-def convert_to_non_torch_type(stats: TensorStructType) -> TensorStructType:
-    pass
-
-
 @PublicAPI
 def convert_to_torch_tensor(
     x: TensorStructType,