diff --git a/rllib/BUILD b/rllib/BUILD
index 684873b15da6..bed02f025a09 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -1898,13 +1898,6 @@ py_test(
     srcs = ["utils/postprocessing/tests/test_value_predictions.py"]
 )
 
-py_test(
-    name = "test_random_encoder",
-    tags = ["team:rllib", "utils"],
-    size = "large",
-    srcs = ["utils/exploration/tests/test_random_encoder.py"]
-)
-
 py_test(
     name = "test_torch_utils",
     tags = ["team:rllib", "utils", "gpu"],
diff --git a/rllib/algorithms/appo/tests/test_appo.py b/rllib/algorithms/appo/tests/test_appo.py
index e9c114bbc2e7..505319218577 100644
--- a/rllib/algorithms/appo/tests/test_appo.py
+++ b/rllib/algorithms/appo/tests/test_appo.py
@@ -4,11 +4,7 @@
 import ray.rllib.algorithms.appo as appo
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO, LEARNER_STATS_KEY
-from ray.rllib.utils.test_utils import (
-    check_compute_single_action,
-    check_train_results,
-    framework_iterator,
-)
+from ray.rllib.utils.test_utils import check_compute_single_action, check_train_results
 
 
 class TestAPPO(unittest.TestCase):
@@ -25,15 +21,14 @@ def test_appo_compilation(self):
         config = appo.APPOConfig().env_runners(num_env_runners=1)
         num_iterations = 2
 
-        for _ in framework_iterator(config):
-            algo = config.build(env="CartPole-v1")
-            for i in range(num_iterations):
-                results = algo.train()
-                print(results)
-                check_train_results(results)
+        algo = config.build(env="CartPole-v1")
+        for i in range(num_iterations):
+            results = algo.train()
+            print(results)
+            check_train_results(results)
 
-            check_compute_single_action(algo)
-            algo.stop()
+        check_compute_single_action(algo)
+        algo.stop()
 
     def test_appo_compilation_use_kl_loss(self):
         """Test whether APPO can be built with kl_loss enabled."""
@@ -42,14 +37,13 @@ def test_appo_compilation_use_kl_loss(self):
         )
         num_iterations = 2
 
-        for _ in framework_iterator(config):
-            algo = config.build(env="CartPole-v1")
-            for i in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-            check_compute_single_action(algo)
-            algo.stop()
+        algo = config.build(env="CartPole-v1")
+        for i in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+        check_compute_single_action(algo)
+        algo.stop()
 
     def test_appo_two_optimizers_two_lrs(self):
         # Not explicitly setting this should cause a warning, but not fail.
@@ -71,14 +65,13 @@ def test_appo_two_optimizers_two_lrs(self):
         num_iterations = 2
 
         # Only supported for tf so far.
-        for _ in framework_iterator(config, frameworks=("torch", "tf2", "tf")):
-            algo = config.build(env="CartPole-v1")
-            for i in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-            check_compute_single_action(algo)
-            algo.stop()
+        algo = config.build(env="CartPole-v1")
+        for i in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+        check_compute_single_action(algo)
+        algo.stop()
 
     def test_appo_entropy_coeff_schedule(self):
         # Initial lr, doesn't really matter because of the schedule below.
@@ -122,19 +115,18 @@ def _step_n_times(algo, n: int):
                 "entropy_coeff"
             ]
 
-        for _ in framework_iterator(config, frameworks=("torch", "tf")):
-            algo = config.build(env="CartPole-v1")
+        algo = config.build(env="CartPole-v1")
 
-            coeff = _step_n_times(algo, 10)  # 200 timesteps
-            # Should be close to the starting coeff of 0.01.
-            self.assertLessEqual(coeff, 0.01)
-            self.assertGreaterEqual(coeff, 0.001)
+        coeff = _step_n_times(algo, 10)  # 200 timesteps
+        # Should be close to the starting coeff of 0.01.
+        self.assertLessEqual(coeff, 0.01)
+        self.assertGreaterEqual(coeff, 0.001)
 
-            coeff = _step_n_times(algo, 20)  # 400 timesteps
-            # Should have annealed to the final coeff of 0.0001.
-            self.assertLessEqual(coeff, 0.001)
+        coeff = _step_n_times(algo, 20)  # 400 timesteps
+        # Should have annealed to the final coeff of 0.0001.
+        self.assertLessEqual(coeff, 0.001)
 
-            algo.stop()
+        algo.stop()
 
     def test_appo_learning_rate_schedule(self):
         config = (
@@ -173,15 +165,14 @@ def _step_n_times(algo, n: int):
                 "cur_lr"
             ]
 
-        for _ in framework_iterator(config):
-            algo = config.build(env="CartPole-v1")
+        algo = config.build(env="CartPole-v1")
 
-            lr1 = _step_n_times(algo, 10)  # 200 timesteps
-            lr2 = _step_n_times(algo, 10)  # 200 timesteps
+        lr1 = _step_n_times(algo, 10)  # 200 timesteps
+        lr2 = _step_n_times(algo, 10)  # 200 timesteps
 
-            self.assertGreater(lr1, lr2)
+        self.assertGreater(lr1, lr2)
 
-            algo.stop()
+        algo.stop()
 
     def test_appo_model_variables(self):
         config = (
@@ -202,13 +193,12 @@ def test_appo_model_variables(self):
             )
         )
 
-        for _ in framework_iterator(config, frameworks=["tf2", "torch"]):
-            algo = config.build(env="CartPole-v1")
-            state = algo.get_policy(DEFAULT_POLICY_ID).get_state()
-            # Weights and Biases for the single hidden layer, the output layer
-            # of the policy and value networks. So 6 tensors in total.
-            # We should not get the tensors from the target model here.
-            self.assertEqual(len(state["weights"]), 6)
+        algo = config.build(env="CartPole-v1")
+        state = algo.get_policy(DEFAULT_POLICY_ID).get_state()
+        # Weights and Biases for the single hidden layer, the output layer
+        # of the policy and value networks. So 6 tensors in total.
+        # We should not get the tensors from the target model here.
+        self.assertEqual(len(state["weights"]), 6)
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/bc/tests/test_bc_old_api_stack.py b/rllib/algorithms/bc/tests/test_bc_old_api_stack.py
index 98621e8be8bc..d564121fe028 100644
--- a/rllib/algorithms/bc/tests/test_bc_old_api_stack.py
+++ b/rllib/algorithms/bc/tests/test_bc_old_api_stack.py
@@ -11,7 +11,6 @@
 from ray.rllib.utils.test_utils import (
     check_compute_single_action,
     check_train_results,
-    framework_iterator,
 )
 
 
@@ -50,48 +49,42 @@ def test_bc_compilation_and_learning_from_offline_file(self):
         num_iterations = 350
         min_return_to_reach = 75.0
 
-        # Test for the following frameworks.
-        frameworks_to_test = ("torch", "tf")
-
-        for _ in framework_iterator(config, frameworks=frameworks_to_test):
-            for recurrent in [True, False]:
-                # We only test recurrent networks with RLModules.
-                if recurrent:
-                    # TODO (Artur): We read input data without a time-dimensions.
-                    #  In order for a recurrent offline learning RL Module to
-                    #  work, the input data needs to be transformed do add a
-                    #  time-dimension.
-                    continue
-
-                config.training(model={"use_lstm": recurrent})
-                algo = config.build(env="CartPole-v1")
-                learnt = False
-                for i in range(num_iterations):
-                    results = algo.train()
-                    check_train_results(results)
-                    print(results)
-
-                    eval_results = results.get("evaluation")
-                    if eval_results:
-                        mean_return = eval_results[ENV_RUNNER_RESULTS][
-                            EPISODE_RETURN_MEAN
-                        ]
-                        print("iter={} R={}".format(i, mean_return))
-                        # Learn until good reward is reached in the actual env.
-                        if mean_return > min_return_to_reach:
-                            print("learnt!")
-                            learnt = True
-                            break
-
-                if not learnt:
-                    raise ValueError(
-                        "`BC` did not reach {} reward from expert offline "
-                        "data!".format(min_return_to_reach)
-                    )
-
-                check_compute_single_action(algo, include_prev_action_reward=True)
-
-                algo.stop()
+        for recurrent in [True, False]:
+            # We only test recurrent networks with RLModules.
+            if recurrent:
+                # TODO (Artur): We read input data without a time-dimensions.
+                #  In order for a recurrent offline learning RL Module to
+                #  work, the input data needs to be transformed do add a
+                #  time-dimension.
+                continue
+
+            config.training(model={"use_lstm": recurrent})
+            algo = config.build(env="CartPole-v1")
+            learnt = False
+            for i in range(num_iterations):
+                results = algo.train()
+                check_train_results(results)
+                print(results)
+
+                eval_results = results.get("evaluation")
+                if eval_results:
+                    mean_return = eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
+                    print("iter={} R={}".format(i, mean_return))
+                    # Learn until good reward is reached in the actual env.
+                    if mean_return > min_return_to_reach:
+                        print("learnt!")
+                        learnt = True
+                        break
+
+            if not learnt:
+                raise ValueError(
+                    "`BC` did not reach {} reward from expert offline "
+                    "data!".format(min_return_to_reach)
+                )
+
+            check_compute_single_action(algo, include_prev_action_reward=True)
+
+            algo.stop()
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/cql/tests/test_cql.py b/rllib/algorithms/cql/tests/test_cql.py
index 5ed4f007f52a..60ce30a74f1c 100644
--- a/rllib/algorithms/cql/tests/test_cql.py
+++ b/rllib/algorithms/cql/tests/test_cql.py
@@ -1,23 +1,17 @@
-import numpy as np
 from pathlib import Path
 import os
 import unittest
 
 import ray
 from ray.rllib.algorithms import cql
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
+from ray.rllib.utils.framework import try_import_torch
 from ray.rllib.utils.metrics import (
     ENV_RUNNER_RESULTS,
     EPISODE_RETURN_MEAN,
     EVALUATION_RESULTS,
 )
-from ray.rllib.utils.test_utils import (
-    check_compute_single_action,
-    check_train_results,
-    framework_iterator,
-)
+from ray.rllib.utils.test_utils import check_compute_single_action, check_train_results
 
-tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -75,77 +69,51 @@ def test_cql_compilation(self):
         )
         num_iterations = 4
 
-        # Test for tf/torch frameworks.
-        for fw in framework_iterator(config):
-            algo = config.build()
-            for i in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-                eval_results = results.get(EVALUATION_RESULTS)
-                if eval_results:
-                    print(
-                        f"iter={algo.iteration} "
-                        f"R={eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]}"
-                    )
-            check_compute_single_action(algo)
-
-            # Get policy and model.
-            pol = algo.get_policy()
-            cql_model = pol.model
-            if fw == "tf":
-                pol.get_session().__enter__()
-
-            # Example on how to do evaluation on the trained Algorithm
-            # using the data from CQL's global replay buffer.
-            # Get a sample (MultiAgentBatch).
-
-            batch = algo.env_runner.input_reader.next()
-            multi_agent_batch = batch.as_multi_agent()
-            # All experiences have been buffered for `default_policy`
-            batch = multi_agent_batch.policy_batches["default_policy"]
-
-            if fw == "torch":
-                obs = torch.from_numpy(batch["obs"])
-            else:
-                obs = batch["obs"]
-                batch["actions"] = batch["actions"].astype(np.float32)
-
-            # Pass the observations through our model to get the
-            # features, which then to pass through the Q-head.
-            model_out, _ = cql_model({"obs": obs})
-            # The estimated Q-values from the (historic) actions in the batch.
-            if fw == "torch":
-                q_values_old = cql_model.get_q_values(
-                    model_out, torch.from_numpy(batch["actions"])
-                )
-            else:
-                q_values_old = cql_model.get_q_values(
-                    tf.convert_to_tensor(model_out), batch["actions"]
+        algo = config.build()
+        for i in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+            eval_results = results.get(EVALUATION_RESULTS)
+            if eval_results:
+                print(
+                    f"iter={algo.iteration} "
+                    f"R={eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]}"
                 )
+        check_compute_single_action(algo)
 
-            # The estimated Q-values for the new actions computed
-            # by our policy.
-            actions_new = pol.compute_actions_from_input_dict({"obs": obs})[0]
-            if fw == "torch":
-                q_values_new = cql_model.get_q_values(
-                    model_out, torch.from_numpy(actions_new)
-                )
-            else:
-                q_values_new = cql_model.get_q_values(model_out, actions_new)
+        # Get policy and model.
+        pol = algo.get_policy()
+        cql_model = pol.model
 
-            if fw == "tf":
-                q_values_old, q_values_new = pol.get_session().run(
-                    [q_values_old, q_values_new]
-                )
+        # Example on how to do evaluation on the trained Algorithm
+        # using the data from CQL's global replay buffer.
+        # Get a sample (MultiAgentBatch).
+
+        batch = algo.env_runner.input_reader.next()
+        multi_agent_batch = batch.as_multi_agent()
+        # All experiences have been buffered for `default_policy`
+        batch = multi_agent_batch.policy_batches["default_policy"]
+
+        obs = torch.from_numpy(batch["obs"])
+
+        # Pass the observations through our model to get the
+        # features, which then to pass through the Q-head.
+        model_out, _ = cql_model({"obs": obs})
+        # The estimated Q-values from the (historic) actions in the batch.
+        q_values_old = cql_model.get_q_values(
+            model_out, torch.from_numpy(batch["actions"])
+        )
 
-            print(f"Q-val batch={q_values_old}")
-            print(f"Q-val policy={q_values_new}")
+        # The estimated Q-values for the new actions computed
+        # by our policy.
+        actions_new = pol.compute_actions_from_input_dict({"obs": obs})[0]
+        q_values_new = cql_model.get_q_values(model_out, torch.from_numpy(actions_new))
 
-            if fw == "tf":
-                pol.get_session().__exit__(None, None, None)
+        print(f"Q-val batch={q_values_old}")
+        print(f"Q-val policy={q_values_new}")
 
-            algo.stop()
+        algo.stop()
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/dqn/tests/test_dqn.py b/rllib/algorithms/dqn/tests/test_dqn.py
index b2472d24e03f..c442f731b816 100644
--- a/rllib/algorithms/dqn/tests/test_dqn.py
+++ b/rllib/algorithms/dqn/tests/test_dqn.py
@@ -8,7 +8,6 @@
     check,
     check_compute_single_action,
     check_train_results,
-    framework_iterator,
 )
 
 
@@ -31,32 +30,31 @@ def test_dqn_compilation(self):
             .training(num_steps_sampled_before_learning_starts=0)
         )
 
-        for _ in framework_iterator(config):
-            # Double-dueling DQN.
-            print("Double-dueling")
-            algo = config.build()
-            for i in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-
-            check_compute_single_action(algo)
-            algo.stop()
-
-            # Rainbow.
-            print("Rainbow")
-            rainbow_config = deepcopy(config).training(
-                num_atoms=10, noisy=True, double_q=True, dueling=True, n_step=5
-            )
-            algo = rainbow_config.build()
-            for i in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-
-            check_compute_single_action(algo)
-
-            algo.stop()
+        # Double-dueling DQN.
+        print("Double-dueling")
+        algo = config.build()
+        for i in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+
+        check_compute_single_action(algo)
+        algo.stop()
+
+        # Rainbow.
+        print("Rainbow")
+        rainbow_config = deepcopy(config).training(
+            num_atoms=10, noisy=True, double_q=True, dueling=True, n_step=5
+        )
+        algo = rainbow_config.build()
+        for i in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+
+        check_compute_single_action(algo)
+
+        algo.stop()
 
     def test_dqn_compilation_integer_rewards(self):
         """Test whether DQN can be built on all frameworks.
@@ -70,32 +68,31 @@ def test_dqn_compilation_integer_rewards(self):
             .training(num_steps_sampled_before_learning_starts=0)
         )
 
-        for _ in framework_iterator(config):
-            # Double-dueling DQN.
-            print("Double-dueling")
-            algo = config.build()
-            for i in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-
-            check_compute_single_action(algo)
-            algo.stop()
-
-            # Rainbow.
-            print("Rainbow")
-            rainbow_config = deepcopy(config).training(
-                num_atoms=10, noisy=True, double_q=True, dueling=True, n_step=5
-            )
-            algo = rainbow_config.build()
-            for i in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-
-            check_compute_single_action(algo)
-
-            algo.stop()
+        # Double-dueling DQN.
+        print("Double-dueling")
+        algo = config.build()
+        for i in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+
+        check_compute_single_action(algo)
+        algo.stop()
+
+        # Rainbow.
+        print("Rainbow")
+        rainbow_config = deepcopy(config).training(
+            num_atoms=10, noisy=True, double_q=True, dueling=True, n_step=5
+        )
+        algo = rainbow_config.build()
+        for i in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+
+        check_compute_single_action(algo)
+
+        algo.stop()
 
     def test_dqn_exploration_and_soft_q_config(self):
         """Tests, whether a DQN Agent outputs exploration/softmaxed actions."""
@@ -108,62 +105,60 @@ def test_dqn_exploration_and_soft_q_config(self):
 
         obs = np.array(0)
 
-        # Test against all frameworks.
-        for _ in framework_iterator(config):
-            # Default EpsilonGreedy setup.
-            algo = config.build()
-            # Setting explore=False should always return the same action.
-            a_ = algo.compute_single_action(obs, explore=False)
-            for _ in range(50):
-                a = algo.compute_single_action(obs, explore=False)
-                check(a, a_)
-            # explore=None (default: explore) should return different actions.
-            actions = []
-            for _ in range(50):
-                actions.append(algo.compute_single_action(obs))
-            check(np.std(actions), 0.0, false=True)
-            algo.stop()
-
-            # Low softmax temperature. Behaves like argmax
-            # (but no epsilon exploration).
-            config.env_runners(
-                exploration_config={"type": "SoftQ", "temperature": 0.000001}
-            )
-            algo = config.build()
-            # Due to the low temp, always expect the same action.
-            actions = [algo.compute_single_action(obs)]
-            for _ in range(50):
-                actions.append(algo.compute_single_action(obs))
-            check(np.std(actions), 0.0, decimals=3)
-            algo.stop()
-
-            # Higher softmax temperature.
-            config.exploration_config["temperature"] = 1.0
-            algo = config.build()
-
-            # Even with the higher temperature, if we set explore=False, we
-            # should expect the same actions always.
-            a_ = algo.compute_single_action(obs, explore=False)
-            for _ in range(50):
-                a = algo.compute_single_action(obs, explore=False)
-                check(a, a_)
-
-            # Due to the higher temp, expect different actions avg'ing
-            # around 1.5.
-            actions = []
-            for _ in range(300):
-                actions.append(algo.compute_single_action(obs))
-            check(np.std(actions), 0.0, false=True)
-            algo.stop()
-
-            # With Random exploration.
-            config.env_runners(exploration_config={"type": "Random"}, explore=True)
-            algo = config.build()
-            actions = []
-            for _ in range(300):
-                actions.append(algo.compute_single_action(obs))
-            check(np.std(actions), 0.0, false=True)
-            algo.stop()
+        # Default EpsilonGreedy setup.
+        algo = config.build()
+        # Setting explore=False should always return the same action.
+        a_ = algo.compute_single_action(obs, explore=False)
+        for _ in range(50):
+            a = algo.compute_single_action(obs, explore=False)
+            check(a, a_)
+        # explore=None (default: explore) should return different actions.
+        actions = []
+        for _ in range(50):
+            actions.append(algo.compute_single_action(obs))
+        check(np.std(actions), 0.0, false=True)
+        algo.stop()
+
+        # Low softmax temperature. Behaves like argmax
+        # (but no epsilon exploration).
+        config.env_runners(
+            exploration_config={"type": "SoftQ", "temperature": 0.000001}
+        )
+        algo = config.build()
+        # Due to the low temp, always expect the same action.
+        actions = [algo.compute_single_action(obs)]
+        for _ in range(50):
+            actions.append(algo.compute_single_action(obs))
+        check(np.std(actions), 0.0, decimals=3)
+        algo.stop()
+
+        # Higher softmax temperature.
+        config.exploration_config["temperature"] = 1.0
+        algo = config.build()
+
+        # Even with the higher temperature, if we set explore=False, we
+        # should expect the same actions always.
+        a_ = algo.compute_single_action(obs, explore=False)
+        for _ in range(50):
+            a = algo.compute_single_action(obs, explore=False)
+            check(a, a_)
+
+        # Due to the higher temp, expect different actions avg'ing
+        # around 1.5.
+        actions = []
+        for _ in range(300):
+            actions.append(algo.compute_single_action(obs))
+        check(np.std(actions), 0.0, false=True)
+        algo.stop()
+
+        # With Random exploration.
+        config.env_runners(exploration_config={"type": "Random"}, explore=True)
+        algo = config.build()
+        actions = []
+        for _ in range(300):
+            actions.append(algo.compute_single_action(obs))
+        check(np.std(actions), 0.0, false=True)
+        algo.stop()
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py b/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py
index f9919816ea13..7fbb8fd55c2a 100644
--- a/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py
+++ b/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py
@@ -21,7 +21,6 @@
 from ray.rllib.algorithms.dreamerv3 import dreamerv3
 from ray.rllib.core import DEFAULT_MODULE_ID
 from ray.rllib.utils.numpy import one_hot
-from ray.rllib.utils.test_utils import framework_iterator
 from ray import tune
 
 
@@ -186,57 +185,54 @@ def test_dreamerv3_dreamer_model_sizes(self):
             symlog_obs=True,
         )
 
-        for _ in framework_iterator(config, frameworks="tf2"):
-            # Check all model_sizes described in the paper ([1]) on matching the number
-            # of parameters to RLlib's implementation.
-            for model_size in ["XS", "S", "M", "L", "XL"]:
-                config.model_size = model_size
-
-                # Atari and CartPole spaces.
-                for obs_space, num_actions, env_name in [
-                    (gym.spaces.Box(-1.0, 0.0, (4,), np.float32), 2, "cartpole"),
-                    (gym.spaces.Box(-1.0, 0.0, (64, 64, 3), np.float32), 6, "atari"),
-                ]:
-                    print(f"Testing model_size={model_size} on env-type: {env_name} ..")
-                    config.environment(
-                        observation_space=obs_space,
-                        action_space=gym.spaces.Discrete(num_actions),
-                    )
+        # Check all model_sizes described in the paper ([1]) on matching the number
+        # of parameters to RLlib's implementation.
+        for model_size in ["XS", "S", "M", "L", "XL"]:
+            config.model_size = model_size
+
+            # Atari and CartPole spaces.
+            for obs_space, num_actions, env_name in [
+                (gym.spaces.Box(-1.0, 0.0, (4,), np.float32), 2, "cartpole"),
+                (gym.spaces.Box(-1.0, 0.0, (64, 64, 3), np.float32), 6, "atari"),
+            ]:
+                print(f"Testing model_size={model_size} on env-type: {env_name} ..")
+                config.environment(
+                    observation_space=obs_space,
+                    action_space=gym.spaces.Discrete(num_actions),
+                )
 
-                    # Create our RLModule to compute actions with.
-                    policy_dict, _ = config.get_multi_agent_setup()
-                    module_spec = config.get_multi_rl_module_spec(
-                        policy_dict=policy_dict
-                    )
-                    rl_module = module_spec.build()[DEFAULT_MODULE_ID]
+                # Create our RLModule to compute actions with.
+                policy_dict, _ = config.get_multi_agent_setup()
+                module_spec = config.get_multi_rl_module_spec(policy_dict=policy_dict)
+                rl_module = module_spec.build()[DEFAULT_MODULE_ID]
 
-                    # Count the generated RLModule's parameters and compare to the
-                    # paper's reported numbers ([1] and [3]).
-                    num_params_world_model = sum(
-                        np.prod(v.shape.as_list())
-                        for v in rl_module.world_model.trainable_variables
-                    )
-                    self.assertEqual(
-                        num_params_world_model,
-                        expected_num_params_world_model[f"{model_size}_{env_name}"],
-                    )
-                    num_params_actor = sum(
-                        np.prod(v.shape.as_list())
-                        for v in rl_module.actor.trainable_variables
-                    )
-                    self.assertEqual(
-                        num_params_actor,
-                        expected_num_params_actor[f"{model_size}_{env_name}"],
-                    )
-                    num_params_critic = sum(
-                        np.prod(v.shape.as_list())
-                        for v in rl_module.critic.trainable_variables
-                    )
-                    self.assertEqual(
-                        num_params_critic,
-                        expected_num_params_critic[f"{model_size}_{env_name}"],
-                    )
-                    print("\tok")
+                # Count the generated RLModule's parameters and compare to the
+                # paper's reported numbers ([1] and [3]).
+                num_params_world_model = sum(
+                    np.prod(v.shape.as_list())
+                    for v in rl_module.world_model.trainable_variables
+                )
+                self.assertEqual(
+                    num_params_world_model,
+                    expected_num_params_world_model[f"{model_size}_{env_name}"],
+                )
+                num_params_actor = sum(
+                    np.prod(v.shape.as_list())
+                    for v in rl_module.actor.trainable_variables
+                )
+                self.assertEqual(
+                    num_params_actor,
+                    expected_num_params_actor[f"{model_size}_{env_name}"],
+                )
+                num_params_critic = sum(
+                    np.prod(v.shape.as_list())
+                    for v in rl_module.critic.trainable_variables
+                )
+                self.assertEqual(
+                    num_params_critic,
+                    expected_num_params_critic[f"{model_size}_{env_name}"],
+                )
+                print("\tok")
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/impala/tests/test_impala.py b/rllib/algorithms/impala/tests/test_impala.py
index 4e30d9805afe..5f39f4bf5fe7 100644
--- a/rllib/algorithms/impala/tests/test_impala.py
+++ b/rllib/algorithms/impala/tests/test_impala.py
@@ -3,17 +3,13 @@
 import ray
 import ray.rllib.algorithms.impala as impala
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
-from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO, LEARNER_STATS_KEY
 from ray.rllib.utils.test_utils import (
     check,
     check_compute_single_action,
     check_train_results,
-    framework_iterator,
 )
 
-tf1, tf, tfv = try_import_tf()
-
 
 class TestIMPALA(unittest.TestCase):
     @classmethod
@@ -40,29 +36,28 @@ def test_impala_compilation(self):
         )
         num_iterations = 2
 
-        for _ in framework_iterator(config):
-            for lstm in [False, True]:
-                config.num_aggregation_workers = 0 if not lstm else 1
-                config.model["use_lstm"] = lstm
-                print(
-                    "lstm={} aggregation-workers={}".format(
-                        lstm, config.num_aggregation_workers
-                    )
+        for lstm in [False, True]:
+            config.num_aggregation_workers = 0 if not lstm else 1
+            config.model["use_lstm"] = lstm
+            print(
+                "lstm={} aggregation-workers={}".format(
+                    lstm, config.num_aggregation_workers
                 )
-                # Test with and w/o aggregation workers (this has nothing
-                # to do with LSTMs, though).
-                algo = config.build()
-                for i in range(num_iterations):
-                    results = algo.train()
-                    print(results)
-                    check_train_results(results)
+            )
+            # Test with and w/o aggregation workers (this has nothing
+            # to do with LSTMs, though).
+            algo = config.build()
+            for i in range(num_iterations):
+                results = algo.train()
+                print(results)
+                check_train_results(results)
 
-                check_compute_single_action(
-                    algo,
-                    include_state=lstm,
-                    include_prev_action_reward=lstm,
-                )
-                algo.stop()
+            check_compute_single_action(
+                algo,
+                include_state=lstm,
+                include_prev_action_reward=lstm,
+            )
+            algo.stop()
 
     def test_impala_lr_schedule(self):
         # Test whether we correctly ignore the "lr" setting.
@@ -87,32 +82,28 @@ def get_lr(result):
                 "cur_lr"
             ]
 
-        for fw in framework_iterator(config):
-            algo = config.build()
-            policy = algo.get_policy()
+        algo = config.build()
+        policy = algo.get_policy()
 
-            try:
-                if fw == "tf":
-                    check(policy.get_session().run(policy.cur_lr), 0.05)
-                else:
-                    check(policy.cur_lr, 0.05)
-                for _ in range(1):
-                    r1 = algo.train()
-                for _ in range(2):
-                    r2 = algo.train()
-                for _ in range(2):
-                    r3 = algo.train()
-                # Due to the asynch'ness of IMPALA, learner-stats metrics
-                # could be delayed by one iteration. Do 3 train() calls here
-                # and measure guaranteed decrease in lr between 1st and 3rd.
-                lr1 = get_lr(r1)
-                lr2 = get_lr(r2)
-                lr3 = get_lr(r3)
-                assert lr2 <= lr1, (lr1, lr2)
-                assert lr3 <= lr2, (lr2, lr3)
-                assert lr3 < lr1, (lr1, lr3)
-            finally:
-                algo.stop()
+        try:
+            check(policy.cur_lr, 0.05)
+            for _ in range(1):
+                r1 = algo.train()
+            for _ in range(2):
+                r2 = algo.train()
+            for _ in range(2):
+                r3 = algo.train()
+            # Due to the asynch'ness of IMPALA, learner-stats metrics
+            # could be delayed by one iteration. Do 3 train() calls here
+            # and measure guaranteed decrease in lr between 1st and 3rd.
+            lr1 = get_lr(r1)
+            lr2 = get_lr(r2)
+            lr3 = get_lr(r3)
+            assert lr2 <= lr1, (lr1, lr2)
+            assert lr3 <= lr2, (lr2, lr3)
+            assert lr3 < lr1, (lr1, lr3)
+        finally:
+            algo.stop()
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/impala/tests/test_vtrace.py b/rllib/algorithms/impala/tests/test_vtrace.py
index 6c9a9998b711..a767ed61ca10 100644
--- a/rllib/algorithms/impala/tests/test_vtrace.py
+++ b/rllib/algorithms/impala/tests/test_vtrace.py
@@ -24,13 +24,11 @@
 import numpy as np
 import unittest
 
-from ray.rllib.algorithms.impala import vtrace_tf as vtrace_tf
 from ray.rllib.algorithms.impala import vtrace_torch as vtrace_torch
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
+from ray.rllib.utils.framework import try_import_torch
 from ray.rllib.utils.numpy import softmax
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 
-tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
@@ -124,41 +122,33 @@ def test_log_probs_from_logits_and_actions(self):
         num_actions = 3
         batch_size = 4
 
-        for fw, sess in framework_iterator(frameworks=("torch", "tf"), session=True):
-            vtrace = vtrace_tf if fw != "torch" else vtrace_torch
-            policy_logits = Box(
-                -1.0, 1.0, (seq_len, batch_size, num_actions), np.float32
-            ).sample()
-            actions = np.random.randint(
-                0, num_actions - 1, size=(seq_len, batch_size), dtype=np.int32
-            )
+        vtrace = vtrace_torch
+        policy_logits = Box(
+            -1.0, 1.0, (seq_len, batch_size, num_actions), np.float32
+        ).sample()
+        actions = np.random.randint(
+            0, num_actions - 1, size=(seq_len, batch_size), dtype=np.int32
+        )
 
-            if fw == "torch":
-                action_log_probs_tensor = vtrace.log_probs_from_logits_and_actions(
-                    torch.from_numpy(policy_logits), torch.from_numpy(actions)
-                )
-            else:
-                action_log_probs_tensor = vtrace.log_probs_from_logits_and_actions(
-                    policy_logits, actions
-                )
-
-            # Ground Truth
-            # Using broadcasting to create a mask that indexes action logits
-            action_index_mask = actions[..., None] == np.arange(num_actions)
-
-            def index_with_mask(array, mask):
-                return array[mask].reshape(*array.shape[:-1])
-
-            # Note: Normally log(softmax) is not a good idea because it's not
-            # numerically stable. However, in this test we have well-behaved
-            # values.
-            ground_truth_v = index_with_mask(
-                np.log(softmax(policy_logits)), action_index_mask
-            )
+        action_log_probs_tensor = vtrace.log_probs_from_logits_and_actions(
+            torch.from_numpy(policy_logits), torch.from_numpy(actions)
+        )
+
+        # Ground Truth
+        # Using broadcasting to create a mask that indexes action logits
+        action_index_mask = actions[..., None] == np.arange(num_actions)
 
-            if sess:
-                action_log_probs_tensor = sess.run(action_log_probs_tensor)
-            check(action_log_probs_tensor, ground_truth_v)
+        def index_with_mask(array, mask):
+            return array[mask].reshape(*array.shape[:-1])
+
+        # Note: Normally log(softmax) is not a good idea because it's not
+        # numerically stable. However, in this test we have well-behaved
+        # values.
+        ground_truth_v = index_with_mask(
+            np.log(softmax(policy_logits)), action_index_mask
+        )
+
+        check(action_log_probs_tensor, ground_truth_v)
 
 
 class VtraceTest(unittest.TestCase):
@@ -188,15 +178,12 @@ def test_vtrace(self):
             "clip_pg_rho_threshold": 2.2,
         }
 
-        for fw, sess in framework_iterator(frameworks=("torch", "tf"), session=True):
-            vtrace = vtrace_tf if fw != "torch" else vtrace_torch
-            output = vtrace.from_importance_weights(**values)
-            if sess:
-                output = sess.run(output)
+        vtrace = vtrace_torch
+        output = vtrace.from_importance_weights(**values)
 
-            gt_vs, gt_pg_advantags = _ground_truth_vtrace_calculation(**values)
-            check(output.vs, gt_vs)
-            check(output.pg_advantages, gt_pg_advantags)
+        gt_vs, gt_pg_advantags = _ground_truth_vtrace_calculation(**values)
+        check(output.vs, gt_vs)
+        check(output.pg_advantages, gt_pg_advantags)
 
     def test_vtrace_from_logits(self):
         """Tests V-trace calculated from logits."""
@@ -225,172 +212,77 @@ def test_vtrace_from_logits(self):
         )
         space_only_batch = Box(-1.0, 1.0, (batch_size,))
 
-        for fw, sess in framework_iterator(frameworks=("torch", "tf"), session=True):
-            vtrace = vtrace_tf if fw != "torch" else vtrace_torch
-
-            if fw == "tf":
-                # Intentionally leaving shapes unspecified to test if V-trace
-                # can deal with that.
-                inputs_ = {
-                    # T, B, NUM_ACTIONS
-                    "behaviour_policy_logits": tf1.placeholder(
-                        dtype=tf.float32, shape=[None, None, None]
-                    ),
-                    # T, B, NUM_ACTIONS
-                    "target_policy_logits": tf1.placeholder(
-                        dtype=tf.float32, shape=[None, None, None]
-                    ),
-                    "actions": tf1.placeholder(dtype=tf.int32, shape=[None, None]),
-                    "discounts": tf1.placeholder(dtype=tf.float32, shape=[None, None]),
-                    "rewards": tf1.placeholder(dtype=tf.float32, shape=[None, None]),
-                    "values": tf1.placeholder(dtype=tf.float32, shape=[None, None]),
-                    "bootstrap_value": tf1.placeholder(dtype=tf.float32, shape=[None]),
-                }
-            else:
-                inputs_ = {
-                    # T, B, NUM_ACTIONS
-                    "behaviour_policy_logits": space.sample(),
-                    # T, B, NUM_ACTIONS
-                    "target_policy_logits": space.sample(),
-                    "actions": action_space.sample(),
-                    "discounts": space_w_time.sample(),
-                    "rewards": space_w_time.sample(),
-                    "values": space_w_time.sample(),
-                    "bootstrap_value": space_only_batch.sample(),
-                }
-            from_logits_output = vtrace.from_logits(
-                clip_rho_threshold=clip_rho_threshold,
-                clip_pg_rho_threshold=clip_pg_rho_threshold,
-                **inputs_
-            )
+        inputs_ = {
+            # T, B, NUM_ACTIONS
+            "behaviour_policy_logits": space.sample(),
+            # T, B, NUM_ACTIONS
+            "target_policy_logits": space.sample(),
+            "actions": action_space.sample(),
+            "discounts": space_w_time.sample(),
+            "rewards": space_w_time.sample(),
+            "values": space_w_time.sample(),
+            "bootstrap_value": space_only_batch.sample(),
+        }
+        from_logits_output = vtrace_torch.from_logits(
+            clip_rho_threshold=clip_rho_threshold,
+            clip_pg_rho_threshold=clip_pg_rho_threshold,
+            **inputs_
+        )
 
-            if fw != "torch":
-                target_log_probs = vtrace.log_probs_from_logits_and_actions(
-                    inputs_["target_policy_logits"], inputs_["actions"]
-                )
-                behaviour_log_probs = vtrace.log_probs_from_logits_and_actions(
-                    inputs_["behaviour_policy_logits"], inputs_["actions"]
-                )
-            else:
-                target_log_probs = vtrace.log_probs_from_logits_and_actions(
-                    torch.from_numpy(inputs_["target_policy_logits"]),
-                    torch.from_numpy(inputs_["actions"]),
-                )
-                behaviour_log_probs = vtrace.log_probs_from_logits_and_actions(
-                    torch.from_numpy(inputs_["behaviour_policy_logits"]),
-                    torch.from_numpy(inputs_["actions"]),
-                )
-            log_rhos = target_log_probs - behaviour_log_probs
-            ground_truth = (log_rhos, behaviour_log_probs, target_log_probs)
-
-            if sess:
-                values = {
-                    "behaviour_policy_logits": space.sample(),
-                    "target_policy_logits": space.sample(),
-                    "actions": action_space.sample(),
-                    "discounts": space_w_time.sample(),
-                    "rewards": space_w_time.sample(),
-                    "values": space_w_time.sample() / batch_size,
-                    "bootstrap_value": space_only_batch.sample() + 1.0,
-                }
-                feed_dict = {inputs_[k]: v for k, v in values.items()}
-                from_logits_output = sess.run(from_logits_output, feed_dict=feed_dict)
-                log_rhos, behaviour_log_probs, target_log_probs = sess.run(
-                    ground_truth, feed_dict=feed_dict
-                )
-
-                # Calculate V-trace using the ground truth logits.
-                from_iw = vtrace.from_importance_weights(
-                    log_rhos=log_rhos,
-                    discounts=values["discounts"],
-                    rewards=values["rewards"],
-                    values=values["values"],
-                    bootstrap_value=values["bootstrap_value"],
-                    clip_rho_threshold=clip_rho_threshold,
-                    clip_pg_rho_threshold=clip_pg_rho_threshold,
-                )
-                from_iw = sess.run(from_iw)
-            else:
-                from_iw = vtrace.from_importance_weights(
-                    log_rhos=log_rhos,
-                    discounts=inputs_["discounts"],
-                    rewards=inputs_["rewards"],
-                    values=inputs_["values"],
-                    bootstrap_value=inputs_["bootstrap_value"],
-                    clip_rho_threshold=clip_rho_threshold,
-                    clip_pg_rho_threshold=clip_pg_rho_threshold,
-                )
-
-            check(from_iw.vs, from_logits_output.vs)
-            check(from_iw.pg_advantages, from_logits_output.pg_advantages)
-            check(behaviour_log_probs, from_logits_output.behaviour_action_log_probs)
-            check(target_log_probs, from_logits_output.target_action_log_probs)
-            check(log_rhos, from_logits_output.log_rhos)
+        target_log_probs = vtrace_torch.log_probs_from_logits_and_actions(
+            torch.from_numpy(inputs_["target_policy_logits"]),
+            torch.from_numpy(inputs_["actions"]),
+        )
+        behaviour_log_probs = vtrace_torch.log_probs_from_logits_and_actions(
+            torch.from_numpy(inputs_["behaviour_policy_logits"]),
+            torch.from_numpy(inputs_["actions"]),
+        )
+        log_rhos = target_log_probs - behaviour_log_probs
+
+        from_iw = vtrace_torch.from_importance_weights(
+            log_rhos=log_rhos,
+            discounts=inputs_["discounts"],
+            rewards=inputs_["rewards"],
+            values=inputs_["values"],
+            bootstrap_value=inputs_["bootstrap_value"],
+            clip_rho_threshold=clip_rho_threshold,
+            clip_pg_rho_threshold=clip_pg_rho_threshold,
+        )
+
+        check(from_iw.vs, from_logits_output.vs)
+        check(from_iw.pg_advantages, from_logits_output.pg_advantages)
+        check(behaviour_log_probs, from_logits_output.behaviour_action_log_probs)
+        check(target_log_probs, from_logits_output.target_action_log_probs)
+        check(log_rhos, from_logits_output.log_rhos)
 
     def test_higher_rank_inputs_for_importance_weights(self):
         """Checks support for additional dimensions in inputs."""
-        for fw in framework_iterator(frameworks=("torch", "tf"), session=True):
-            vtrace = vtrace_tf if fw != "torch" else vtrace_torch
-            if fw == "tf":
-                inputs_ = {
-                    "log_rhos": tf1.placeholder(
-                        dtype=tf.float32, shape=[None, None, 1]
-                    ),
-                    "discounts": tf1.placeholder(
-                        dtype=tf.float32, shape=[None, None, 1]
-                    ),
-                    "rewards": tf1.placeholder(
-                        dtype=tf.float32, shape=[None, None, 42]
-                    ),
-                    "values": tf1.placeholder(dtype=tf.float32, shape=[None, None, 42]),
-                    "bootstrap_value": tf1.placeholder(
-                        dtype=tf.float32, shape=[None, 42]
-                    ),
-                }
-            else:
-                inputs_ = {
-                    "log_rhos": Box(-1.0, 1.0, (8, 10, 1)).sample(),
-                    "discounts": Box(-1.0, 1.0, (8, 10, 1)).sample(),
-                    "rewards": Box(-1.0, 1.0, (8, 10, 42)).sample(),
-                    "values": Box(-1.0, 1.0, (8, 10, 42)).sample(),
-                    "bootstrap_value": Box(-1.0, 1.0, (10, 42)).sample(),
-                }
-            output = vtrace.from_importance_weights(**inputs_)
-            check(int(output.vs.shape[-1]), 42)
+        inputs_ = {
+            "log_rhos": Box(-1.0, 1.0, (8, 10, 1)).sample(),
+            "discounts": Box(-1.0, 1.0, (8, 10, 1)).sample(),
+            "rewards": Box(-1.0, 1.0, (8, 10, 42)).sample(),
+            "values": Box(-1.0, 1.0, (8, 10, 42)).sample(),
+            "bootstrap_value": Box(-1.0, 1.0, (10, 42)).sample(),
+        }
+        output = vtrace_torch.from_importance_weights(**inputs_)
+        check(int(output.vs.shape[-1]), 42)
 
     def test_inconsistent_rank_inputs_for_importance_weights(self):
         """Test one of many possible errors in shape of inputs."""
-        for fw in framework_iterator(frameworks=("torch", "tf"), session=True):
-            vtrace = vtrace_tf if fw != "torch" else vtrace_torch
-            if fw == "tf":
-                inputs_ = {
-                    "log_rhos": tf1.placeholder(
-                        dtype=tf.float32, shape=[None, None, 1]
-                    ),
-                    "discounts": tf1.placeholder(
-                        dtype=tf.float32, shape=[None, None, 1]
-                    ),
-                    "rewards": tf1.placeholder(
-                        dtype=tf.float32, shape=[None, None, 42]
-                    ),
-                    "values": tf1.placeholder(dtype=tf.float32, shape=[None, None, 42]),
-                    # Should be [None, 42].
-                    "bootstrap_value": tf1.placeholder(dtype=tf.float32, shape=[None]),
-                }
-            else:
-                inputs_ = {
-                    "log_rhos": Box(-1.0, 1.0, (7, 15, 1)).sample(),
-                    "discounts": Box(-1.0, 1.0, (7, 15, 1)).sample(),
-                    "rewards": Box(-1.0, 1.0, (7, 15, 42)).sample(),
-                    "values": Box(-1.0, 1.0, (7, 15, 42)).sample(),
-                    # Should be [15, 42].
-                    "bootstrap_value": Box(-1.0, 1.0, (7,)).sample(),
-                }
-            with self.assertRaisesRegex(
-                (ValueError, AssertionError), "must have rank 2"
-            ):
-                vtrace.from_importance_weights(**inputs_)
+        inputs_ = {
+            "log_rhos": Box(-1.0, 1.0, (7, 15, 1)).sample(),
+            "discounts": Box(-1.0, 1.0, (7, 15, 1)).sample(),
+            "rewards": Box(-1.0, 1.0, (7, 15, 42)).sample(),
+            "values": Box(-1.0, 1.0, (7, 15, 42)).sample(),
+            # Should be [15, 42].
+            "bootstrap_value": Box(-1.0, 1.0, (7,)).sample(),
+        }
+        with self.assertRaisesRegex((ValueError, AssertionError), "must have rank 2"):
+            vtrace_torch.from_importance_weights(**inputs_)
 
 
 if __name__ == "__main__":
-    tf.test.main()
+    import pytest
+    import sys
+
+    sys.exit(pytest.main(["-v", __file__]))
diff --git a/rllib/algorithms/marwil/tests/test_marwil.py b/rllib/algorithms/marwil/tests/test_marwil.py
index 4fb3fb1da191..703674d7cd60 100644
--- a/rllib/algorithms/marwil/tests/test_marwil.py
+++ b/rllib/algorithms/marwil/tests/test_marwil.py
@@ -71,8 +71,6 @@ def test_marwil_compilation_and_learning_from_offline_file(self):
         num_iterations = 350
         min_reward = 100.0
 
-        # Test for all frameworks.
-
         algo = config.build()
         learnt = False
         for i in range(num_iterations):
diff --git a/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py b/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py
index b43c5b0abd5f..bffcbe06db5f 100644
--- a/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py
+++ b/rllib/algorithms/marwil/tests/test_marwil_old_api_stack.py
@@ -5,7 +5,6 @@
 
 import ray
 import ray.rllib.algorithms.marwil as marwil
-from ray.rllib.algorithms.marwil.marwil_tf_policy import MARWILTF2Policy
 from ray.rllib.algorithms.marwil.marwil_torch_policy import MARWILTorchPolicy
 from ray.rllib.evaluation.postprocessing import compute_advantages
 from ray.rllib.offline import JsonReader
@@ -19,7 +18,6 @@
     check,
     check_compute_single_action,
     check_train_results,
-    framework_iterator,
 )
 
 tf1, tf, tfv = try_import_tf()
@@ -67,40 +65,35 @@ def test_marwil_compilation_and_learning_from_offline_file(self):
         num_iterations = 350
         min_reward = 100.0
 
-        # Test for all frameworks.
-        for _ in framework_iterator(config, frameworks=("torch", "tf")):
-            algo = config.build()
-            learnt = False
-            for i in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-
-                eval_results = results.get(EVALUATION_RESULTS)
-                if eval_results:
-                    print(
-                        "iter={} R={} ".format(
-                            i, eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
-                        )
+        algo = config.build()
+        learnt = False
+        for i in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+
+            eval_results = results.get(EVALUATION_RESULTS)
+            if eval_results:
+                print(
+                    "iter={} R={} ".format(
+                        i, eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
                     )
-                    # Learn until some reward is reached on an actual live env.
-                    if (
-                        eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
-                        > min_reward
-                    ):
-                        print("learnt!")
-                        learnt = True
-                        break
-
-            if not learnt:
-                raise ValueError(
-                    "MARWILAlgorithm did not reach {} reward from expert "
-                    "offline data!".format(min_reward)
                 )
+                # Learn until some reward is reached on an actual live env.
+                if eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN] > min_reward:
+                    print("learnt!")
+                    learnt = True
+                    break
+
+        if not learnt:
+            raise ValueError(
+                "MARWILAlgorithm did not reach {} reward from expert "
+                "offline data!".format(min_reward)
+            )
 
-            check_compute_single_action(algo, include_prev_action_reward=True)
+        check_compute_single_action(algo, include_prev_action_reward=True)
 
-            algo.stop()
+        algo.stop()
 
     def test_marwil_cont_actions_from_offline_file(self):
         """Test whether MARWIL runs with cont. actions.
@@ -136,12 +129,10 @@ def test_marwil_cont_actions_from_offline_file(self):
 
         num_iterations = 3
 
-        # Test for all frameworks.
-        for _ in framework_iterator(config, frameworks=("torch", "tf")):
-            algo = config.build(env="Pendulum-v1")
-            for i in range(num_iterations):
-                print(algo.train())
-            algo.stop()
+        algo = config.build(env="Pendulum-v1")
+        for i in range(num_iterations):
+            print(algo.train())
+        algo.stop()
 
     def test_marwil_loss_function(self):
         """
@@ -161,84 +152,48 @@ def test_marwil_loss_function(self):
             .offline_data(input_=[data_file])
         )  # Learn from offline data.
 
-        for fw, sess in framework_iterator(config, session=True):
-            reader = JsonReader(inputs=[data_file])
-            batch = reader.next()
-
-            algo = config.build(env="CartPole-v1")
-            policy = algo.get_policy()
-            model = policy.model
-
-            # Calculate our own expected values (to then compare against the
-            # agent's loss output).
-            cummulative_rewards = compute_advantages(
-                batch, 0.0, config.gamma, 1.0, False, False
-            )["advantages"]
-            if fw == "torch":
-                cummulative_rewards = torch.tensor(cummulative_rewards)
-            if fw != "tf":
-                batch = policy._lazy_tensor_dict(batch)
-            model_out, _ = model(batch)
-            vf_estimates = model.value_function()
-            if fw == "tf":
-                model_out, vf_estimates = policy.get_session().run(
-                    [model_out, vf_estimates]
-                )
-            adv = cummulative_rewards - vf_estimates
-            if fw == "torch":
-                adv = adv.detach().cpu().numpy()
-            adv_squared = np.mean(np.square(adv))
-            c_2 = 100.0 + 1e-8 * (adv_squared - 100.0)
-            c = np.sqrt(c_2)
-            exp_advs = np.exp(config.beta * (adv / c))
-            dist = policy.dist_class(model_out, model)
-            logp = dist.logp(batch["actions"])
-            if fw == "torch":
-                logp = logp.detach().cpu().numpy()
-            elif fw == "tf":
-                logp = sess.run(logp)
-            # Calculate all expected loss components.
-            expected_vf_loss = 0.5 * adv_squared
-            expected_pol_loss = -1.0 * np.mean(exp_advs * logp)
-            expected_loss = expected_pol_loss + config.vf_coeff * expected_vf_loss
-
-            # Calculate the algorithm's loss (to check against our own
-            # calculation above).
-            batch.set_get_interceptor(None)
-            postprocessed_batch = policy.postprocess_trajectory(batch)
-            loss_func = (
-                MARWILTF2Policy.loss if fw != "torch" else MARWILTorchPolicy.loss
-            )
-            if fw != "tf":
-                policy._lazy_tensor_dict(postprocessed_batch)
-                loss_out = loss_func(
-                    policy, model, policy.dist_class, postprocessed_batch
-                )
-            else:
-                loss_out, v_loss, p_loss = policy.get_session().run(
-                    # policy._loss is create by TFPolicy, and is basically the
-                    # loss tensor of the static graph.
-                    [
-                        policy._loss,
-                        policy._marwil_loss.v_loss,
-                        policy._marwil_loss.p_loss,
-                    ],
-                    feed_dict=policy._get_loss_inputs_dict(
-                        postprocessed_batch, shuffle=False
-                    ),
-                )
-
-            # Check all components.
-            if fw == "torch":
-                check(policy.v_loss, expected_vf_loss, decimals=4)
-                check(policy.p_loss, expected_pol_loss, decimals=4)
-            elif fw == "tf":
-                check(v_loss, expected_vf_loss, decimals=4)
-                check(p_loss, expected_pol_loss, decimals=4)
-            else:
-                check(policy._marwil_loss.v_loss, expected_vf_loss, decimals=4)
-                check(policy._marwil_loss.p_loss, expected_pol_loss, decimals=4)
-            check(loss_out, expected_loss, decimals=3)
+        reader = JsonReader(inputs=[data_file])
+        batch = reader.next()
+
+        algo = config.build(env="CartPole-v1")
+        policy = algo.get_policy()
+        model = policy.model
+
+        # Calculate our own expected values (to then compare against the
+        # agent's loss output).
+        cummulative_rewards = compute_advantages(
+            batch, 0.0, config.gamma, 1.0, False, False
+        )["advantages"]
+        cummulative_rewards = torch.tensor(cummulative_rewards)
+        batch = policy._lazy_tensor_dict(batch)
+        model_out, _ = model(batch)
+        vf_estimates = model.value_function()
+        adv = cummulative_rewards - vf_estimates
+        adv = adv.detach().cpu().numpy()
+        adv_squared = np.mean(np.square(adv))
+        c_2 = 100.0 + 1e-8 * (adv_squared - 100.0)
+        c = np.sqrt(c_2)
+        exp_advs = np.exp(config.beta * (adv / c))
+        dist = policy.dist_class(model_out, model)
+        logp = dist.logp(batch["actions"])
+        logp = logp.detach().cpu().numpy()
+        # Calculate all expected loss components.
+        expected_vf_loss = 0.5 * adv_squared
+        expected_pol_loss = -1.0 * np.mean(exp_advs * logp)
+        expected_loss = expected_pol_loss + config.vf_coeff * expected_vf_loss
+
+        # Calculate the algorithm's loss (to check against our own
+        # calculation above).
+        batch.set_get_interceptor(None)
+        postprocessed_batch = policy.postprocess_trajectory(batch)
+        loss_func = MARWILTorchPolicy.loss
+        policy._lazy_tensor_dict(postprocessed_batch)
+        loss_out = loss_func(policy, model, policy.dist_class, postprocessed_batch)
+
+        # Check all components.
+        check(policy.v_loss, expected_vf_loss, decimals=4)
+        check(policy.p_loss, expected_pol_loss, decimals=4)
+        check(loss_out, expected_loss, decimals=3)
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/ppo/tests/test_ppo.py b/rllib/algorithms/ppo/tests/test_ppo.py
index 61c02521d24f..c99bc9c8feac 100644
--- a/rllib/algorithms/ppo/tests/test_ppo.py
+++ b/rllib/algorithms/ppo/tests/test_ppo.py
@@ -5,14 +5,12 @@
 import ray
 from ray.rllib.algorithms.callbacks import DefaultCallbacks
 import ray.rllib.algorithms.ppo as ppo
-from ray.rllib.algorithms.ppo.ppo_tf_policy import PPOTF2Policy
 from ray.rllib.algorithms.ppo.ppo_torch_policy import PPOTorchPolicy
 from ray.rllib.core.columns import Columns
 from ray.rllib.evaluation.postprocessing import (
     compute_gae_for_sample_batch,
     Postprocessing,
 )
-from ray.rllib.models.tf.tf_action_dist import Categorical
 from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.models.torch.torch_action_dist import TorchCategorical
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, SampleBatch
@@ -23,7 +21,6 @@
     check_compute_single_action,
     check_off_policyness,
     check_train_results,
-    framework_iterator,
     check_inference_w_connectors,
 )
 
@@ -159,39 +156,34 @@ def test_ppo_compilation_w_connectors(self):
 
         num_iterations = 2
 
-        for fw in framework_iterator(config):
-            for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]:
-                print("Env={}".format(env))
-                for lstm in [False, True]:
-                    print("LSTM={}".format(lstm))
-                    config.training(
-                        model=dict(
-                            use_lstm=lstm,
-                            lstm_use_prev_action=lstm,
-                            lstm_use_prev_reward=lstm,
-                        )
+        for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]:
+            print("Env={}".format(env))
+            for lstm in [False, True]:
+                print("LSTM={}".format(lstm))
+                config.training(
+                    model=dict(
+                        use_lstm=lstm,
+                        lstm_use_prev_action=lstm,
+                        lstm_use_prev_reward=lstm,
                     )
+                )
 
-                    algo = config.build(env=env)
-                    policy = algo.get_policy()
-                    entropy_coeff = algo.get_policy().entropy_coeff
-                    lr = policy.cur_lr
-                    if fw == "tf":
-                        entropy_coeff, lr = policy.get_session().run(
-                            [entropy_coeff, lr]
-                        )
-                    check(entropy_coeff, 0.1)
-                    check(lr, config.lr)
+                algo = config.build(env=env)
+                policy = algo.get_policy()
+                entropy_coeff = algo.get_policy().entropy_coeff
+                lr = policy.cur_lr
+                check(entropy_coeff, 0.1)
+                check(lr, config.lr)
 
-                    for i in range(num_iterations):
-                        results = algo.train()
-                        check_train_results(results)
-                        print(results)
+                for i in range(num_iterations):
+                    results = algo.train()
+                    check_train_results(results)
+                    print(results)
 
-                    algo.evaluate()
+                algo.evaluate()
 
-                    check_inference_w_connectors(policy, env_name=env)
-                    algo.stop()
+                check_inference_w_connectors(policy, env_name=env)
+                algo.stop()
 
     def test_ppo_compilation_and_schedule_mixins(self):
         """Test whether PPO can be built with all frameworks."""
@@ -225,45 +217,40 @@ def test_ppo_compilation_and_schedule_mixins(self):
 
         num_iterations = 2
 
-        for fw in framework_iterator(config):
-            for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]:
-                print("Env={}".format(env))
-                for lstm in [False, True]:
-                    print("LSTM={}".format(lstm))
-                    config.training(
-                        model=dict(
-                            use_lstm=lstm,
-                            lstm_use_prev_action=lstm,
-                            lstm_use_prev_reward=lstm,
-                        )
+        for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]:
+            print("Env={}".format(env))
+            for lstm in [False, True]:
+                print("LSTM={}".format(lstm))
+                config.training(
+                    model=dict(
+                        use_lstm=lstm,
+                        lstm_use_prev_action=lstm,
+                        lstm_use_prev_reward=lstm,
                     )
+                )
 
-                    algo = config.build(env=env)
-                    policy = algo.get_policy()
-                    entropy_coeff = algo.get_policy().entropy_coeff
-                    lr = policy.cur_lr
-                    if fw == "tf":
-                        entropy_coeff, lr = policy.get_session().run(
-                            [entropy_coeff, lr]
-                        )
-                    check(entropy_coeff, 0.1)
-                    check(lr, config.lr)
-
-                    for i in range(num_iterations):
-                        results = algo.train()
-                        print(results)
-                        check_train_results(results)
-                        # 2 sgd iters per update, 2 minibatches per trainbatch -> 4x
-                        # avg(0.0, 1.0, 2.0, 3.0) -> 1.5
-                        off_policy_ness = check_off_policyness(
-                            results, lower_limit=1.5, upper_limit=1.5
-                        )
-                        print(f"off-policy'ness={off_policy_ness}")
-
-                    check_compute_single_action(
-                        algo, include_prev_action_reward=True, include_state=lstm
+                algo = config.build(env=env)
+                policy = algo.get_policy()
+                entropy_coeff = algo.get_policy().entropy_coeff
+                lr = policy.cur_lr
+                check(entropy_coeff, 0.1)
+                check(lr, config.lr)
+
+                for i in range(num_iterations):
+                    results = algo.train()
+                    print(results)
+                    check_train_results(results)
+                    # 2 sgd iters per update, 2 minibatches per trainbatch -> 4x
+                    # avg(0.0, 1.0, 2.0, 3.0) -> 1.5
+                    off_policy_ness = check_off_policyness(
+                        results, lower_limit=1.5, upper_limit=1.5
                     )
-                    algo.stop()
+                    print(f"off-policy'ness={off_policy_ness}")
+
+                check_compute_single_action(
+                    algo, include_prev_action_reward=True, include_state=lstm
+                )
+                algo.stop()
 
     def test_ppo_exploration_setup(self):
         """Tests, whether PPO runs with different exploration setups."""
@@ -280,34 +267,32 @@ def test_ppo_exploration_setup(self):
         )
         obs = np.array(0)
 
-        # Test against all frameworks.
-        for fw, sess in framework_iterator(config, session=True):
-            # Default Agent should be setup with StochasticSampling.
-            algo = config.build()
-            # explore=False, always expect the same (deterministic) action.
-            a_ = algo.compute_single_action(
-                obs, explore=False, prev_action=np.array(2), prev_reward=np.array(1.0)
-            )
+        # Default Agent should be setup with StochasticSampling.
+        algo = config.build()
+        # explore=False, always expect the same (deterministic) action.
+        a_ = algo.compute_single_action(
+            obs, explore=False, prev_action=np.array(2), prev_reward=np.array(1.0)
+        )
 
-            for _ in range(50):
-                a = algo.compute_single_action(
-                    obs,
-                    explore=False,
-                    prev_action=np.array(2),
-                    prev_reward=np.array(1.0),
-                )
-                check(a, a_)
-
-            # With explore=True (default), expect stochastic actions.
-            actions = []
-            for _ in range(300):
-                actions.append(
-                    algo.compute_single_action(
-                        obs, prev_action=np.array(2), prev_reward=np.array(1.0)
-                    )
+        for _ in range(50):
+            a = algo.compute_single_action(
+                obs,
+                explore=False,
+                prev_action=np.array(2),
+                prev_reward=np.array(1.0),
+            )
+            check(a, a_)
+
+        # With explore=True (default), expect stochastic actions.
+        actions = []
+        for _ in range(300):
+            actions.append(
+                algo.compute_single_action(
+                    obs, prev_action=np.array(2), prev_reward=np.array(1.0)
                 )
-            check(np.mean(actions), 1.5, atol=0.2)
-            algo.stop()
+            )
+        check(np.mean(actions), 1.5, atol=0.2)
+        algo.stop()
 
     def test_ppo_free_log_std(self):
         """Tests the free log std option works.
@@ -334,45 +319,31 @@ def test_ppo_free_log_std(self):
             )
         )
 
-        for fw, sess in framework_iterator(config, session=True):
-            algo = config.build()
-            policy = algo.get_policy()
-
-            # Check the free log std var is created.
-            if fw == "torch":
-                matching = [
-                    v for (n, v) in policy.model.named_parameters() if "log_std" in n
-                ]
-            else:
-                matching = [
-                    v for v in policy.model.trainable_variables() if "log_std" in str(v)
-                ]
-            assert len(matching) == 1, matching
-            log_std_var = matching[0]
-
-            # linter yells at you if you don't pass in the parameters.
-            # reason: https://docs.python-guide.org/writing/gotchas/
-            # #late-binding-closures
-            def get_value(fw=fw, policy=policy, log_std_var=log_std_var):
-                if fw == "tf":
-                    return policy.get_session().run(log_std_var)[0]
-                elif fw == "torch":
-                    return log_std_var.detach().cpu().numpy()[0]
-                else:
-                    return log_std_var.numpy()[0]
-
-            # Check the variable is initially zero.
-            init_std = get_value()
-            assert init_std == 0.0, init_std
-            batch = compute_gae_for_sample_batch(policy, CARTPOLE_FAKE_BATCH.copy())
-            if fw == "torch":
-                batch = policy._lazy_tensor_dict(batch)
-            policy.learn_on_batch(batch)
-
-            # Check the variable is updated.
-            post_std = get_value()
-            assert post_std != 0.0, post_std
-            algo.stop()
+        algo = config.build()
+        policy = algo.get_policy()
+
+        # Check the free log std var is created.
+        matching = [v for (n, v) in policy.model.named_parameters() if "log_std" in n]
+        assert len(matching) == 1, matching
+        log_std_var = matching[0]
+
+        # linter yells at you if you don't pass in the parameters.
+        # reason: https://docs.python-guide.org/writing/gotchas/
+        # #late-binding-closures
+        def get_value(fw="torch", policy=policy, log_std_var=log_std_var):
+            return log_std_var.detach().cpu().numpy()[0]
+
+        # Check the variable is initially zero.
+        init_std = get_value()
+        assert init_std == 0.0, init_std
+        batch = compute_gae_for_sample_batch(policy, CARTPOLE_FAKE_BATCH.copy())
+        batch = policy._lazy_tensor_dict(batch)
+        policy.learn_on_batch(batch)
+
+        # Check the variable is updated.
+        post_std = get_value()
+        assert post_std != 0.0, post_std
+        algo.stop()
 
     def test_ppo_loss_function(self):
         """Tests the PPO loss function math.
@@ -397,108 +368,62 @@ def test_ppo_loss_function(self):
             )
         )
 
-        for fw, sess in framework_iterator(config, session=True):
-            algo = config.build()
-            policy = algo.get_policy()
-
-            # Check no free log std var by default.
-            if fw == "torch":
-                matching = [
-                    v for (n, v) in policy.model.named_parameters() if "log_std" in n
-                ]
-            else:
-                matching = [
-                    v for v in policy.model.trainable_variables() if "log_std" in str(v)
-                ]
-            assert len(matching) == 0, matching
-
-            # Post-process (calculate simple (non-GAE) advantages) and attach
-            # to train_batch dict.
-            # A = [0.99^2 * 0.5 + 0.99 * -1.0 + 1.0, 0.99 * 0.5 - 1.0, 0.5] =
-            # [0.50005, -0.505, 0.5]
-            train_batch = compute_gae_for_sample_batch(
-                policy, CARTPOLE_FAKE_BATCH.copy()
-            )
-            if fw == "torch":
-                train_batch = policy._lazy_tensor_dict(train_batch)
-
-            # Check Advantage values.
-            check(train_batch[Postprocessing.VALUE_TARGETS], [0.50005, -0.505, 0.5])
-
-            # Calculate actual PPO loss.
-            if fw == "tf2":
-                PPOTF2Policy.loss(policy, policy.model, Categorical, train_batch)
-            elif fw == "torch":
-                PPOTorchPolicy.loss(
-                    policy, policy.model, policy.dist_class, train_batch
-                )
+        algo = config.build()
+        policy = algo.get_policy()
 
-            vars = (
-                policy.model.variables()
-                if fw != "torch"
-                else list(policy.model.parameters())
-            )
-            if fw == "tf":
-                vars = policy.get_session().run(vars)
-            expected_shared_out = fc(
-                train_batch[Columns.OBS],
-                vars[0 if fw != "torch" else 2],
-                vars[1 if fw != "torch" else 3],
-                framework=fw,
-            )
-            expected_logits = fc(
-                expected_shared_out,
-                vars[2 if fw != "torch" else 0],
-                vars[3 if fw != "torch" else 1],
-                framework=fw,
-            )
-            expected_value_outs = fc(
-                expected_shared_out, vars[4], vars[5], framework=fw
-            )
+        # Check no free log std var by default.
+        matching = [v for (n, v) in policy.model.named_parameters() if "log_std" in n]
+        assert len(matching) == 0, matching
 
-            kl, entropy, pg_loss, vf_loss, overall_loss = self._ppo_loss_helper(
-                policy,
-                policy.model,
-                Categorical if fw != "torch" else TorchCategorical,
-                train_batch,
-                expected_logits,
-                expected_value_outs,
-                sess=sess,
-            )
-            if sess:
-                policy_sess = policy.get_session()
-                k, e, pl, v, tl = policy_sess.run(
-                    [
-                        policy._mean_kl_loss,
-                        policy._mean_entropy,
-                        policy._mean_policy_loss,
-                        policy._mean_vf_loss,
-                        policy._total_loss,
-                    ],
-                    feed_dict=policy._get_loss_inputs_dict(train_batch, shuffle=False),
-                )
-                check(k, kl)
-                check(e, entropy)
-                check(pl, np.mean(-pg_loss))
-                check(v, np.mean(vf_loss), decimals=4)
-                check(tl, overall_loss, decimals=4)
-            elif fw == "torch":
-                check(policy.model.tower_stats["mean_kl_loss"], kl)
-                check(policy.model.tower_stats["mean_entropy"], entropy)
-                check(policy.model.tower_stats["mean_policy_loss"], np.mean(-pg_loss))
-                check(
-                    policy.model.tower_stats["mean_vf_loss"],
-                    np.mean(vf_loss),
-                    decimals=4,
-                )
-                check(policy.model.tower_stats["total_loss"], overall_loss, decimals=4)
-            else:
-                check(policy._mean_kl_loss, kl)
-                check(policy._mean_entropy, entropy)
-                check(policy._mean_policy_loss, np.mean(-pg_loss))
-                check(policy._mean_vf_loss, np.mean(vf_loss), decimals=4)
-                check(policy._total_loss, overall_loss, decimals=4)
-            algo.stop()
+        # Post-process (calculate simple (non-GAE) advantages) and attach
+        # to train_batch dict.
+        # A = [0.99^2 * 0.5 + 0.99 * -1.0 + 1.0, 0.99 * 0.5 - 1.0, 0.5] =
+        # [0.50005, -0.505, 0.5]
+        train_batch = compute_gae_for_sample_batch(policy, CARTPOLE_FAKE_BATCH.copy())
+        train_batch = policy._lazy_tensor_dict(train_batch)
+
+        # Check Advantage values.
+        check(train_batch[Postprocessing.VALUE_TARGETS], [0.50005, -0.505, 0.5])
+
+        # Calculate actual PPO loss.
+        PPOTorchPolicy.loss(policy, policy.model, policy.dist_class, train_batch)
+
+        vars = list(policy.model.parameters())
+        expected_shared_out = fc(
+            train_batch[Columns.OBS],
+            vars[2],
+            vars[3],
+            framework="torch",
+        )
+        expected_logits = fc(
+            expected_shared_out,
+            vars[0],
+            vars[1],
+            framework="torch",
+        )
+        expected_value_outs = fc(
+            expected_shared_out, vars[4], vars[5], framework="torch"
+        )
+
+        kl, entropy, pg_loss, vf_loss, overall_loss = self._ppo_loss_helper(
+            policy,
+            policy.model,
+            TorchCategorical,
+            train_batch,
+            expected_logits,
+            expected_value_outs,
+            sess=None,
+        )
+        check(policy.model.tower_stats["mean_kl_loss"], kl)
+        check(policy.model.tower_stats["mean_entropy"], entropy)
+        check(policy.model.tower_stats["mean_policy_loss"], np.mean(-pg_loss))
+        check(
+            policy.model.tower_stats["mean_vf_loss"],
+            np.mean(vf_loss),
+            decimals=4,
+        )
+        check(policy.model.tower_stats["total_loss"], overall_loss, decimals=4)
+        algo.stop()
 
     def _ppo_loss_helper(
         self, policy, model, dist_class, train_batch, logits, vf_outs, sess=None
diff --git a/rllib/algorithms/ppo/tests/test_ppo_learner.py b/rllib/algorithms/ppo/tests/test_ppo_learner.py
index 28242452b1ed..809875fcfb49 100644
--- a/rllib/algorithms/ppo/tests/test_ppo_learner.py
+++ b/rllib/algorithms/ppo/tests/test_ppo_learner.py
@@ -3,7 +3,6 @@
 
 import gymnasium as gym
 import numpy as np
-import tensorflow as tf
 import torch
 import tree  # pip install dm-tree
 
@@ -15,7 +14,7 @@
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
 from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 from ray.tune.registry import register_env
 
 
@@ -69,34 +68,28 @@ def test_loss(self):
             )
         )
 
-        for fw in framework_iterator(config, ("tf2", "torch")):
-            algo = config.build()
-            policy = algo.get_policy()
+        algo = config.build()
+        policy = algo.get_policy()
 
-            train_batch = SampleBatch(FAKE_BATCH)
-            train_batch = compute_gae_for_sample_batch(policy, train_batch)
+        train_batch = SampleBatch(FAKE_BATCH)
+        train_batch = compute_gae_for_sample_batch(policy, train_batch)
 
-            # convert to proper tensors with tree.map_structure
-            if fw == "torch":
-                train_batch = tree.map_structure(
-                    lambda x: torch.as_tensor(x).float(), train_batch
-                )
-            else:
-                train_batch = tree.map_structure(
-                    lambda x: tf.convert_to_tensor(x), train_batch
-                )
+        # Convert to proper tensors with tree.map_structure.
+        train_batch = tree.map_structure(
+            lambda x: torch.as_tensor(x).float(), train_batch
+        )
 
-            algo_config = config.copy(copy_frozen=False)
-            algo_config.validate()
-            algo_config.freeze()
+        algo_config = config.copy(copy_frozen=False)
+        algo_config.validate()
+        algo_config.freeze()
 
-            learner_group = algo_config.build_learner_group(env=self.ENV)
+        learner_group = algo_config.build_learner_group(env=self.ENV)
 
-            # Load the algo weights onto the learner_group.
-            learner_group.set_weights(algo.get_weights())
-            learner_group.update_from_batch(batch=train_batch.as_multi_agent())
+        # Load the algo weights onto the learner_group.
+        learner_group.set_weights(algo.get_weights())
+        learner_group.update_from_batch(batch=train_batch.as_multi_agent())
 
-            algo.stop()
+        algo.stop()
 
     def test_save_to_path_and_restore_from_path(self):
         """Tests saving and loading the state of the PPO Learner Group."""
@@ -117,19 +110,18 @@ def test_save_to_path_and_restore_from_path(self):
             )
         )
 
-        for _ in framework_iterator(config, ("tf2", "torch")):
-            algo_config = config.copy(copy_frozen=False)
-            algo_config.validate()
-            algo_config.freeze()
-            learner_group1 = algo_config.build_learner_group(env=self.ENV)
-            learner_group2 = algo_config.build_learner_group(env=self.ENV)
-            with tempfile.TemporaryDirectory() as tmpdir:
-                learner_group1.save_to_path(tmpdir)
-                learner_group2.restore_from_path(tmpdir)
-                # Remove functions from state b/c they are not comparable via `check`.
-                s1 = learner_group1.get_state()
-                s2 = learner_group2.get_state()
-                check(s1, s2)
+        algo_config = config.copy(copy_frozen=False)
+        algo_config.validate()
+        algo_config.freeze()
+        learner_group1 = algo_config.build_learner_group(env=self.ENV)
+        learner_group2 = algo_config.build_learner_group(env=self.ENV)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            learner_group1.save_to_path(tmpdir)
+            learner_group2.restore_from_path(tmpdir)
+            # Remove functions from state b/c they are not comparable via `check`.
+            s1 = learner_group1.get_state()
+            s2 = learner_group2.get_state()
+            check(s1, s2)
 
     def test_kl_coeff_changes(self):
         # Simple environment with 4 independent cartpole entities
@@ -165,29 +157,28 @@ def test_kl_coeff_changes(self):
             )
         )
 
-        for _ in framework_iterator(config, ("torch", "tf2")):
-            algo = config.build()
-            # Call train while results aren't returned because this is
-            # a asynchronous Algorithm and results are returned asynchronously.
-            curr_kl_coeff_1 = None
-            curr_kl_coeff_2 = None
-            while not curr_kl_coeff_1 or not curr_kl_coeff_2:
-                results = algo.train()
-
-                # Attempt to get the current KL coefficient from the learner.
-                # Iterate until we have found both coefficients at least once.
-                if results and "info" in results and LEARNER_INFO in results["info"]:
-                    if "p0" in results["info"][LEARNER_INFO]:
-                        curr_kl_coeff_1 = results["info"][LEARNER_INFO]["p0"][
-                            LEARNER_RESULTS_CURR_KL_COEFF_KEY
-                        ]
-                    if "p1" in results["info"][LEARNER_INFO]:
-                        curr_kl_coeff_2 = results["info"][LEARNER_INFO]["p1"][
-                            LEARNER_RESULTS_CURR_KL_COEFF_KEY
-                        ]
-
-            self.assertNotEqual(curr_kl_coeff_1, initial_kl_coeff)
-            self.assertNotEqual(curr_kl_coeff_2, initial_kl_coeff)
+        algo = config.build()
+        # Call train while results aren't returned because this is
+        # a asynchronous Algorithm and results are returned asynchronously.
+        curr_kl_coeff_1 = None
+        curr_kl_coeff_2 = None
+        while not curr_kl_coeff_1 or not curr_kl_coeff_2:
+            results = algo.train()
+
+            # Attempt to get the current KL coefficient from the learner.
+            # Iterate until we have found both coefficients at least once.
+            if results and "info" in results and LEARNER_INFO in results["info"]:
+                if "p0" in results["info"][LEARNER_INFO]:
+                    curr_kl_coeff_1 = results["info"][LEARNER_INFO]["p0"][
+                        LEARNER_RESULTS_CURR_KL_COEFF_KEY
+                    ]
+                if "p1" in results["info"][LEARNER_INFO]:
+                    curr_kl_coeff_2 = results["info"][LEARNER_INFO]["p1"][
+                        LEARNER_RESULTS_CURR_KL_COEFF_KEY
+                    ]
+
+        self.assertNotEqual(curr_kl_coeff_1, initial_kl_coeff)
+        self.assertNotEqual(curr_kl_coeff_2, initial_kl_coeff)
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/sac/tests/test_rnnsac.py b/rllib/algorithms/sac/tests/test_rnnsac.py
index e0fec4b5d7ee..704be42d1773 100644
--- a/rllib/algorithms/sac/tests/test_rnnsac.py
+++ b/rllib/algorithms/sac/tests/test_rnnsac.py
@@ -2,10 +2,9 @@
 
 import ray
 from ray.rllib.algorithms import sac
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.test_utils import check_compute_single_action, framework_iterator
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.utils.test_utils import check_compute_single_action
 
-tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
@@ -53,17 +52,16 @@ def test_rnnsac_compilation(self):
         num_iterations = 1
 
         # Test building an RNNSAC agent in all frameworks.
-        for _ in framework_iterator(config, frameworks="torch"):
-            algo = config.build()
-            for i in range(num_iterations):
-                results = algo.train()
-                print(results)
+        algo = config.build()
+        for i in range(num_iterations):
+            results = algo.train()
+            print(results)
 
-            check_compute_single_action(
-                algo,
-                include_state=True,
-                include_prev_action_reward=True,
-            )
+        check_compute_single_action(
+            algo,
+            include_state=True,
+            include_prev_action_reward=True,
+        )
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/sac/tests/test_sac.py b/rllib/algorithms/sac/tests/test_sac.py
index 1425fb6d93e7..ec9b7a4415a9 100644
--- a/rllib/algorithms/sac/tests/test_sac.py
+++ b/rllib/algorithms/sac/tests/test_sac.py
@@ -22,7 +22,6 @@
     check,
     check_compute_single_action,
     check_train_results,
-    framework_iterator,
 )
 from ray.rllib.utils.torch_utils import convert_to_torch_tensor
 from ray import tune
@@ -119,47 +118,44 @@ def test_sac_compilation(self):
             ),
         )
 
-        for fw in framework_iterator(config):
-            # Test for different env types (discrete w/ and w/o image, + cont).
-            for env in [
-                "random_dict_env",
-                "random_tuple_env",
-                "CartPole-v1",
-            ]:
-                print("Env={}".format(env))
-                config.environment(env)
-                # Test making the Q-model a custom one for CartPole, otherwise,
-                # use the default model.
-                config.q_model_config["custom_model"] = (
-                    "batch_norm{}".format("_torch" if fw == "torch" else "")
-                    if env == "CartPole-v1"
-                    else None
-                )
-                algo = config.build()
-                for i in range(num_iterations):
-                    results = algo.train()
-                    check_train_results(results)
-                    print(results)
-                check_compute_single_action(algo)
-
-                # Test, whether the replay buffer is saved along with
-                # a checkpoint (no point in doing it for all frameworks since
-                # this is framework agnostic).
-                if fw == "tf" and env == "CartPole-v1":
-                    checkpoint = algo.save()
-                    new_algo = config.build()
-                    new_algo.restore(checkpoint)
-                    # Get some data from the buffer and compare.
-                    data = algo.local_replay_buffer.replay_buffers[
-                        "default_policy"
-                    ]._storage[: 42 + 42]
-                    new_data = new_algo.local_replay_buffer.replay_buffers[
-                        "default_policy"
-                    ]._storage[: 42 + 42]
-                    check(data, new_data)
-                    new_algo.stop()
-
-                algo.stop()
+        # Test for different env types (discrete w/ and w/o image, + cont).
+        for env in [
+            "random_dict_env",
+            "random_tuple_env",
+            "CartPole-v1",
+        ]:
+            print("Env={}".format(env))
+            config.environment(env)
+            # Test making the Q-model a custom one for CartPole, otherwise,
+            # use the default model.
+            config.q_model_config["custom_model"] = (
+                "batch_norm{}".format("_torch") if env == "CartPole-v1" else None
+            )
+            algo = config.build()
+            for i in range(num_iterations):
+                results = algo.train()
+                check_train_results(results)
+                print(results)
+            check_compute_single_action(algo)
+
+            # Test, whether the replay buffer is saved along with
+            # a checkpoint (no point in doing it for all frameworks since
+            # this is framework agnostic).
+            if env == "CartPole-v1":
+                checkpoint = algo.save()
+                new_algo = config.build()
+                new_algo.restore(checkpoint)
+                # Get some data from the buffer and compare.
+                data = algo.local_replay_buffer.replay_buffers[
+                    "default_policy"
+                ]._storage[: 42 + 42]
+                new_data = new_algo.local_replay_buffer.replay_buffers[
+                    "default_policy"
+                ]._storage[: 42 + 42]
+                check(data, new_data)
+                new_algo.stop()
+
+            algo.stop()
 
     def test_sac_dict_obs_order(self):
         dict_space = Dict(
@@ -210,13 +206,12 @@ def step(self, action):
         )
         num_iterations = 1
 
-        for _ in framework_iterator(config):
-            algo = config.build()
-            for _ in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-            check_compute_single_action(algo)
+        algo = config.build()
+        for _ in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+        check_compute_single_action(algo)
 
     def _get_batch_helper(self, obs_size, actions, batch_size):
         return SampleBatch(
diff --git a/rllib/algorithms/tests/test_algorithm.py b/rllib/algorithms/tests/test_algorithm.py
index ffe45ea858b3..97b1cda0c9fe 100644
--- a/rllib/algorithms/tests/test_algorithm.py
+++ b/rllib/algorithms/tests/test_algorithm.py
@@ -26,7 +26,7 @@
     LEARNER_RESULTS,
 )
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 from ray.tune import register_env
 
 
@@ -248,174 +248,166 @@ def test_add_policy_and_remove_policy(self):
         obs_space = gym.spaces.Box(-2.0, 2.0, (4,))
         act_space = gym.spaces.Discrete(2)
 
-        for fw in framework_iterator(config):
-            # Pre-generate a policy instance to test adding these directly to an
-            # existing algorithm.
-            if fw == "tf":
-                policy_obj = ppo.PPOTF1Policy(obs_space, act_space, config.to_dict())
-            elif fw == "tf2":
-                policy_obj = ppo.PPOTF2Policy(obs_space, act_space, config.to_dict())
+        # Pre-generate a policy instance to test adding these directly to an
+        # existing algorithm.
+        policy_obj = ppo.PPOTorchPolicy(obs_space, act_space, config.to_dict())
+
+        # Construct the Algorithm with a single policy in it.
+        algo = config.build()
+        pol0 = algo.get_policy("p0")
+        r = algo.train()
+        self.assertTrue("p0" in r["info"][LEARNER_INFO])
+        for i in range(1, 3):
+
+            def new_mapping_fn(agent_id, episode, worker, i=i, **kwargs):
+                return f"p{choice([i, i - 1])}"
+
+            # Add a new policy either by class (and options) or by instance.
+            pid = f"p{i}"
+            print(f"Adding policy {pid} ...")
+            # By (already instantiated) instance.
+            if i == 2:
+                new_pol = algo.add_policy(
+                    pid,
+                    # Pass in an already existing policy instance.
+                    policy=policy_obj,
+                    # Test changing the mapping fn.
+                    policy_mapping_fn=new_mapping_fn,
+                    # Change the list of policies to train.
+                    policies_to_train=[f"p{i}", f"p{i - 1}"],
+                )
+            # By class (and options).
             else:
-                policy_obj = ppo.PPOTorchPolicy(obs_space, act_space, config.to_dict())
-
-            # Construct the Algorithm with a single policy in it.
-            algo = config.build()
-            pol0 = algo.get_policy("p0")
-            r = algo.train()
-            self.assertTrue("p0" in r["info"][LEARNER_INFO])
-            for i in range(1, 3):
-
-                def new_mapping_fn(agent_id, episode, worker, i=i, **kwargs):
-                    return f"p{choice([i, i - 1])}"
-
-                # Add a new policy either by class (and options) or by instance.
-                pid = f"p{i}"
-                print(f"Adding policy {pid} ...")
-                # By (already instantiated) instance.
-                if i == 2:
-                    new_pol = algo.add_policy(
-                        pid,
-                        # Pass in an already existing policy instance.
-                        policy=policy_obj,
-                        # Test changing the mapping fn.
-                        policy_mapping_fn=new_mapping_fn,
-                        # Change the list of policies to train.
-                        policies_to_train=[f"p{i}", f"p{i - 1}"],
-                    )
-                # By class (and options).
-                else:
-                    new_pol = algo.add_policy(
-                        pid,
-                        algo.get_default_policy_class(config),
-                        observation_space=obs_space,
-                        action_space=act_space,
-                        # Test changing the mapping fn.
-                        policy_mapping_fn=new_mapping_fn,
-                        # Change the list of policies to train.
-                        policies_to_train=[f"p{i}", f"p{i-1}"],
-                    )
+                new_pol = algo.add_policy(
+                    pid,
+                    algo.get_default_policy_class(config),
+                    observation_space=obs_space,
+                    action_space=act_space,
+                    # Test changing the mapping fn.
+                    policy_mapping_fn=new_mapping_fn,
+                    # Change the list of policies to train.
+                    policies_to_train=[f"p{i}", f"p{i-1}"],
+                )
 
-                # Make sure new policy is part of remote workers in the
-                # worker set and the eval worker set.
-                self.assertTrue(
-                    all(
-                        algo.env_runner_group.foreach_worker(
-                            func=lambda w, pid=pid: pid in w.policy_map
-                        )
+            # Make sure new policy is part of remote workers in the
+            # worker set and the eval worker set.
+            self.assertTrue(
+                all(
+                    algo.env_runner_group.foreach_worker(
+                        func=lambda w, pid=pid: pid in w.policy_map
                     )
                 )
-                self.assertTrue(
-                    all(
-                        algo.eval_env_runner_group.foreach_worker(
-                            func=lambda w, pid=pid: pid in w.policy_map
-                        )
+            )
+            self.assertTrue(
+                all(
+                    algo.eval_env_runner_group.foreach_worker(
+                        func=lambda w, pid=pid: pid in w.policy_map
                     )
                 )
+            )
 
-                # Assert new policy is part of local worker (eval worker set does NOT
-                # have a local worker, only the main EnvRunnerGroup does).
-                pol_map = algo.env_runner.policy_map
-                self.assertTrue(new_pol is not pol0)
-                for j in range(i + 1):
-                    self.assertTrue(f"p{j}" in pol_map)
-                self.assertTrue(len(pol_map) == i + 1)
-                algo.train()
-                checkpoint = algo.save().checkpoint
-
-                # Test restoring from the checkpoint (which has more policies
-                # than what's defined in the config dict).
-                test = ppo.PPO.from_checkpoint(checkpoint=checkpoint)
-
-                # Make sure evaluation worker also got the restored, added policy.
-                def _has_policies(w, pid=pid):
-                    return (
-                        w.get_policy("p0") is not None and w.get_policy(pid) is not None
-                    )
+            # Assert new policy is part of local worker (eval worker set does NOT
+            # have a local worker, only the main EnvRunnerGroup does).
+            pol_map = algo.env_runner.policy_map
+            self.assertTrue(new_pol is not pol0)
+            for j in range(i + 1):
+                self.assertTrue(f"p{j}" in pol_map)
+            self.assertTrue(len(pol_map) == i + 1)
+            algo.train()
+            checkpoint = algo.save().checkpoint
 
-                self.assertTrue(
-                    all(test.eval_env_runner_group.foreach_worker(_has_policies))
-                )
+            # Test restoring from the checkpoint (which has more policies
+            # than what's defined in the config dict).
+            test = ppo.PPO.from_checkpoint(checkpoint=checkpoint)
 
-                # Make sure algorithm can continue training the restored policy.
-                pol0 = test.get_policy("p0")
-                test.train()
-                # Test creating an action with the added (and restored) policy.
-                a = test.compute_single_action(
-                    np.zeros_like(pol0.observation_space.sample()), policy_id=pid
-                )
-                self.assertTrue(pol0.action_space.contains(a))
-                test.stop()
-
-                # After having added 2 policies, try to restore the Algorithm,
-                # but only with 1 of the originally added policies (plus the initial
-                # p0).
-                if i == 2:
-
-                    def new_mapping_fn(agent_id, episode, worker, **kwargs):
-                        return f"p{choice([0, 2])}"
-
-                    test2 = ppo.PPO.from_checkpoint(
-                        path=checkpoint,
-                        policy_ids=["p0", "p2"],
-                        policy_mapping_fn=new_mapping_fn,
-                        policies_to_train=["p0"],
-                    )
+            # Make sure evaluation worker also got the restored, added policy.
+            def _has_policies(w, pid=pid):
+                return w.get_policy("p0") is not None and w.get_policy(pid) is not None
 
-                    # Make sure evaluation workers have the same policies.
-                    def _has_policies(w):
-                        return (
-                            w.get_policy("p0") is not None
-                            and w.get_policy("p2") is not None
-                            and w.get_policy("p1") is None
-                        )
+            self.assertTrue(
+                all(test.eval_env_runner_group.foreach_worker(_has_policies))
+            )
 
-                    self.assertTrue(
-                        all(test2.eval_env_runner_group.foreach_worker(_has_policies))
-                    )
+            # Make sure algorithm can continue training the restored policy.
+            pol0 = test.get_policy("p0")
+            test.train()
+            # Test creating an action with the added (and restored) policy.
+            a = test.compute_single_action(
+                np.zeros_like(pol0.observation_space.sample()), policy_id=pid
+            )
+            self.assertTrue(pol0.action_space.contains(a))
+            test.stop()
 
-                    # Make sure algorithm can continue training the restored policy.
-                    pol2 = test2.get_policy("p2")
-                    test2.train()
-                    # Test creating an action with the added (and restored) policy.
-                    a = test2.compute_single_action(
-                        np.zeros_like(pol2.observation_space.sample()), policy_id=pid
-                    )
-                    self.assertTrue(pol2.action_space.contains(a))
-                    test2.stop()
+            # After having added 2 policies, try to restore the Algorithm,
+            # but only with 1 of the originally added policies (plus the initial
+            # p0).
+            if i == 2:
 
-            # Delete all added policies again from Algorithm.
-            for i in range(2, 0, -1):
-                pid = f"p{i}"
-                algo.remove_policy(
-                    pid,
-                    # Note that the complete signature of a policy_mapping_fn
-                    # is: `agent_id, episode, worker, **kwargs`.
-                    policy_mapping_fn=(
-                        lambda agent_id, episode, worker, i=i, **kwargs: f"p{i - 1}"
-                    ),
-                    # Update list of policies to train.
-                    policies_to_train=[f"p{i - 1}"],
+                def new_mapping_fn(agent_id, episode, worker, **kwargs):
+                    return f"p{choice([0, 2])}"
+
+                test2 = ppo.PPO.from_checkpoint(
+                    path=checkpoint,
+                    policy_ids=["p0", "p2"],
+                    policy_mapping_fn=new_mapping_fn,
+                    policies_to_train=["p0"],
                 )
-                # Make sure removed policy is no longer part of remote workers in the
-                # worker set and the eval worker set.
+
+                # Make sure evaluation workers have the same policies.
+                def _has_policies(w):
+                    return (
+                        w.get_policy("p0") is not None
+                        and w.get_policy("p2") is not None
+                        and w.get_policy("p1") is None
+                    )
+
                 self.assertTrue(
-                    algo.env_runner_group.foreach_worker(
-                        func=lambda w, pid=pid: pid not in w.policy_map
-                    )[0]
+                    all(test2.eval_env_runner_group.foreach_worker(_has_policies))
                 )
-                self.assertTrue(
-                    algo.eval_env_runner_group.foreach_worker(
-                        func=lambda w, pid=pid: pid not in w.policy_map
-                    )[0]
+
+                # Make sure algorithm can continue training the restored policy.
+                pol2 = test2.get_policy("p2")
+                test2.train()
+                # Test creating an action with the added (and restored) policy.
+                a = test2.compute_single_action(
+                    np.zeros_like(pol2.observation_space.sample()), policy_id=pid
                 )
-                # Assert removed policy is no longer part of local worker
-                # (eval worker set does NOT have a local worker, only the main
-                # EnvRunnerGroup does).
-                pol_map = algo.env_runner.policy_map
-                self.assertTrue(pid not in pol_map)
-                self.assertTrue(len(pol_map) == i)
+                self.assertTrue(pol2.action_space.contains(a))
+                test2.stop()
 
-            algo.stop()
+        # Delete all added policies again from Algorithm.
+        for i in range(2, 0, -1):
+            pid = f"p{i}"
+            algo.remove_policy(
+                pid,
+                # Note that the complete signature of a policy_mapping_fn
+                # is: `agent_id, episode, worker, **kwargs`.
+                policy_mapping_fn=(
+                    lambda agent_id, episode, worker, i=i, **kwargs: f"p{i - 1}"
+                ),
+                # Update list of policies to train.
+                policies_to_train=[f"p{i - 1}"],
+            )
+            # Make sure removed policy is no longer part of remote workers in the
+            # worker set and the eval worker set.
+            self.assertTrue(
+                algo.env_runner_group.foreach_worker(
+                    func=lambda w, pid=pid: pid not in w.policy_map
+                )[0]
+            )
+            self.assertTrue(
+                algo.eval_env_runner_group.foreach_worker(
+                    func=lambda w, pid=pid: pid not in w.policy_map
+                )[0]
+            )
+            # Assert removed policy is no longer part of local worker
+            # (eval worker set does NOT have a local worker, only the main
+            # EnvRunnerGroup does).
+            pol_map = algo.env_runner.policy_map
+            self.assertTrue(pid not in pol_map)
+            self.assertTrue(len(pol_map) == i)
+
+        algo.stop()
 
     def test_evaluation_option(self):
         # Use a custom callback that asserts that we are running the
@@ -432,29 +424,28 @@ def test_evaluation_option(self):
             .callbacks(callbacks_class=AssertEvalCallback)
         )
 
-        for _ in framework_iterator(config, frameworks=("tf", "torch")):
-            algo = config.build()
-            # Given evaluation_interval=2, r0, r2, r4 should not contain
-            # evaluation metrics, while r1, r3 should.
-            r0 = algo.train()
-            print(r0)
-            r1 = algo.train()
-            print(r1)
-            r2 = algo.train()
-            print(r2)
-            r3 = algo.train()
-            print(r3)
-            algo.stop()
-
-            self.assertFalse(EVALUATION_RESULTS in r0)
-            self.assertTrue(EVALUATION_RESULTS in r1)
-            self.assertFalse(EVALUATION_RESULTS in r2)
-            self.assertTrue(EVALUATION_RESULTS in r3)
-            self.assertTrue(ENV_RUNNER_RESULTS in r1[EVALUATION_RESULTS])
-            self.assertTrue(
-                EPISODE_RETURN_MEAN in r1[EVALUATION_RESULTS][ENV_RUNNER_RESULTS]
-            )
-            self.assertNotEqual(r1[EVALUATION_RESULTS], r3[EVALUATION_RESULTS])
+        algo = config.build()
+        # Given evaluation_interval=2, r0, r2, r4 should not contain
+        # evaluation metrics, while r1, r3 should.
+        r0 = algo.train()
+        print(r0)
+        r1 = algo.train()
+        print(r1)
+        r2 = algo.train()
+        print(r2)
+        r3 = algo.train()
+        print(r3)
+        algo.stop()
+
+        self.assertFalse(EVALUATION_RESULTS in r0)
+        self.assertTrue(EVALUATION_RESULTS in r1)
+        self.assertFalse(EVALUATION_RESULTS in r2)
+        self.assertTrue(EVALUATION_RESULTS in r3)
+        self.assertTrue(ENV_RUNNER_RESULTS in r1[EVALUATION_RESULTS])
+        self.assertTrue(
+            EPISODE_RETURN_MEAN in r1[EVALUATION_RESULTS][ENV_RUNNER_RESULTS]
+        )
+        self.assertNotEqual(r1[EVALUATION_RESULTS], r3[EVALUATION_RESULTS])
 
     def test_evaluation_option_always_attach_eval_metrics(self):
         # Use a custom callback that asserts that we are running the
@@ -471,22 +462,21 @@ def test_evaluation_option_always_attach_eval_metrics(self):
             .reporting(min_sample_timesteps_per_iteration=100)
             .callbacks(callbacks_class=AssertEvalCallback)
         )
-        for _ in framework_iterator(config, frameworks=("torch", "tf")):
-            algo = config.build()
-            # Should only see eval results, when eval actually ran.
-            r0 = algo.train()
-            r1 = algo.train()
-            r2 = algo.train()
-            r3 = algo.train()
-            algo.stop()
-
-            # Eval results are not available at step 0.
-            # But step 3 should still have it, even though no eval was
-            # run during that step.
-            self.assertTrue(EVALUATION_RESULTS not in r0)
-            self.assertTrue(EVALUATION_RESULTS in r1)
-            self.assertTrue(EVALUATION_RESULTS not in r2)
-            self.assertTrue(EVALUATION_RESULTS in r3)
+        algo = config.build()
+        # Should only see eval results, when eval actually ran.
+        r0 = algo.train()
+        r1 = algo.train()
+        r2 = algo.train()
+        r3 = algo.train()
+        algo.stop()
+
+        # Eval results are not available at step 0.
+        # But step 3 should still have it, even though no eval was
+        # run during that step.
+        self.assertTrue(EVALUATION_RESULTS not in r0)
+        self.assertTrue(EVALUATION_RESULTS in r1)
+        self.assertTrue(EVALUATION_RESULTS not in r2)
+        self.assertTrue(EVALUATION_RESULTS in r3)
 
     def test_evaluation_wo_evaluation_env_runner_group(self):
         # Use a custom callback that asserts that we are running the
@@ -497,30 +487,29 @@ def test_evaluation_wo_evaluation_env_runner_group(self):
             .callbacks(callbacks_class=AssertEvalCallback)
         )
 
-        for _ in framework_iterator(frameworks=("torch", "tf")):
-            # Setup algorithm w/o evaluation worker set and still call
-            # evaluate() -> Expect error.
-            algo_wo_env_on_local_worker = config.build()
-            self.assertRaisesRegex(
-                ValueError,
-                "Can't evaluate on a local worker",
-                algo_wo_env_on_local_worker.evaluate,
-            )
-            algo_wo_env_on_local_worker.stop()
-
-            # Try again using `create_env_on_driver=True`.
-            # This force-adds the env on the local-worker, so this Algorithm
-            # can `evaluate` even though it doesn't have an evaluation-worker
-            # set.
-            config.create_env_on_local_worker = True
-            algo_w_env_on_local_worker = config.build()
-            results = algo_w_env_on_local_worker.evaluate()
-            assert (
-                ENV_RUNNER_RESULTS in results
-                and EPISODE_RETURN_MEAN in results[ENV_RUNNER_RESULTS]
-            )
-            algo_w_env_on_local_worker.stop()
-            config.create_env_on_local_worker = False
+        # Setup algorithm w/o evaluation worker set and still call
+        # evaluate() -> Expect error.
+        algo_wo_env_on_local_worker = config.build()
+        self.assertRaisesRegex(
+            ValueError,
+            "Can't evaluate on a local worker",
+            algo_wo_env_on_local_worker.evaluate,
+        )
+        algo_wo_env_on_local_worker.stop()
+
+        # Try again using `create_env_on_driver=True`.
+        # This force-adds the env on the local-worker, so this Algorithm
+        # can `evaluate` even though it doesn't have an evaluation-worker
+        # set.
+        config.create_env_on_local_worker = True
+        algo_w_env_on_local_worker = config.build()
+        results = algo_w_env_on_local_worker.evaluate()
+        assert (
+            ENV_RUNNER_RESULTS in results
+            and EPISODE_RETURN_MEAN in results[ENV_RUNNER_RESULTS]
+        )
+        algo_w_env_on_local_worker.stop()
+        config.create_env_on_local_worker = False
 
     def test_space_inference_from_remote_workers(self):
         # Expect to not do space inference if the learner has an env.
diff --git a/rllib/algorithms/tests/test_algorithm_export_checkpoint.py b/rllib/algorithms/tests/test_algorithm_export_checkpoint.py
index d5ddec5c79f7..9c64f160a5d8 100644
--- a/rllib/algorithms/tests/test_algorithm_export_checkpoint.py
+++ b/rllib/algorithms/tests/test_algorithm_export_checkpoint.py
@@ -6,11 +6,9 @@
 import ray
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.test_utils import framework_iterator
+from ray.rllib.utils.framework import try_import_torch
 from ray.tune.registry import get_trainable_cls
 
-tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 # Keep a set of all RLlib algos that support the RLModule API.
@@ -66,27 +64,18 @@ def save_test(alg_name, framework="tf", multi_agent=False):
     )
 
     # Test loading exported model and perform forward pass.
-    if framework == "torch":
-        filename = os.path.join(model_dir, "model.pt")
-        model = torch.load(filename)
-        assert model
-        results = model(
-            input_dict={"obs": torch.from_numpy(test_obs)},
-            # TODO (sven): Make non-RNN models NOT expect these args at all.
-            state=[torch.tensor(0)],  # dummy value
-            seq_lens=torch.tensor(0),  # dummy value
-        )
-        assert len(results) == 2
-        assert results[0].shape == (1, 2)
-        assert results[1] == [torch.tensor(0)]  # dummy
-    else:
-        model = tf.saved_model.load(model_dir)
-        assert model
-        results = model(tf.convert_to_tensor(test_obs, dtype=tf.float32))
-        assert len(results) == 2
-        assert results[0].shape == (1, 2)
-        # TODO (sven): Make non-RNN models NOT return states (empty list).
-        assert results[1].shape == (1, 1)  # dummy state-out
+    filename = os.path.join(model_dir, "model.pt")
+    model = torch.load(filename)
+    assert model
+    results = model(
+        input_dict={"obs": torch.from_numpy(test_obs)},
+        # TODO (sven): Make non-RNN models NOT expect these args at all.
+        state=[torch.tensor(0)],  # dummy value
+        seq_lens=torch.tensor(0),  # dummy value
+    )
+    assert len(results) == 2
+    assert results[0].shape == (1, 2)
+    assert results[1] == [torch.tensor(0)]  # dummy
 
     shutil.rmtree(export_dir)
 
@@ -101,12 +90,10 @@ def tearDownClass(cls) -> None:
         ray.shutdown()
 
     def test_save_appo_multi_agent(self):
-        for fw in framework_iterator():
-            save_test("APPO", fw, multi_agent=True)
+        save_test("APPO", "torch", multi_agent=True)
 
     def test_save_ppo(self):
-        for fw in framework_iterator():
-            save_test("PPO", fw)
+        save_test("PPO", "torch")
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/tests/test_algorithm_rl_module_restore.py b/rllib/algorithms/tests/test_algorithm_rl_module_restore.py
index d13caa90766c..1dd50fb84035 100644
--- a/rllib/algorithms/tests/test_algorithm_rl_module_restore.py
+++ b/rllib/algorithms/tests/test_algorithm_rl_module_restore.py
@@ -17,7 +17,7 @@
     MultiRLModule,
 )
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 from ray.rllib.utils.numpy import convert_to_numpy
 
 
@@ -63,123 +63,119 @@ def test_e2e_load_simple_multi_rl_module(self):
         """Test if we can train a PPO algo with a checkpointed MultiRLModule e2e."""
         config = self.get_ppo_config()
         env = MultiAgentCartPole({"num_agents": NUM_AGENTS})
-        for fw in framework_iterator(config, frameworks=["tf2", "torch"]):
-            # create a multi_rl_module to load and save it to a checkpoint directory
-            module_specs = {}
-            module_class = PPO_MODULES[fw]
-            for i in range(NUM_AGENTS):
-                module_specs[f"policy_{i}"] = RLModuleSpec(
-                    module_class=module_class,
-                    observation_space=env.observation_space[0],
-                    action_space=env.action_space[0],
-                    # If we want to use this externally created module in the algorithm,
-                    # we need to provide the same config as the algorithm.
-                    model_config_dict=config.model_config
-                    | {"fcnet_hiddens": [32 * (i + 1)]},
-                    catalog_class=PPOCatalog,
-                )
-            multi_rl_module_spec = MultiRLModuleSpec(module_specs=module_specs)
-            multi_rl_module = multi_rl_module_spec.build()
-            multi_rl_module_weights = convert_to_numpy(multi_rl_module.get_state())
-            marl_checkpoint_path = tempfile.mkdtemp()
-            multi_rl_module.save_to_path(marl_checkpoint_path)
-
-            # create a new MARL_spec with the checkpoint from the previous one
-            multi_rl_module_spec_from_checkpoint = MultiRLModuleSpec(
-                module_specs=module_specs,
-                load_state_path=marl_checkpoint_path,
-            )
-            config = config.api_stack(enable_rl_module_and_learner=True).rl_module(
-                rl_module_spec=multi_rl_module_spec_from_checkpoint,
+        # create a multi_rl_module to load and save it to a checkpoint directory
+        module_specs = {}
+        module_class = PPO_MODULES["torch"]
+        for i in range(NUM_AGENTS):
+            module_specs[f"policy_{i}"] = RLModuleSpec(
+                module_class=module_class,
+                observation_space=env.observation_space[0],
+                action_space=env.action_space[0],
+                # If we want to use this externally created module in the algorithm,
+                # we need to provide the same config as the algorithm.
+                model_config_dict=config.model_config
+                | {"fcnet_hiddens": [32 * (i + 1)]},
+                catalog_class=PPOCatalog,
             )
+        multi_rl_module_spec = MultiRLModuleSpec(module_specs=module_specs)
+        multi_rl_module = multi_rl_module_spec.build()
+        multi_rl_module_weights = convert_to_numpy(multi_rl_module.get_state())
+        marl_checkpoint_path = tempfile.mkdtemp()
+        multi_rl_module.save_to_path(marl_checkpoint_path)
+
+        # create a new MARL_spec with the checkpoint from the previous one
+        multi_rl_module_spec_from_checkpoint = MultiRLModuleSpec(
+            module_specs=module_specs,
+            load_state_path=marl_checkpoint_path,
+        )
+        config = config.api_stack(enable_rl_module_and_learner=True).rl_module(
+            rl_module_spec=multi_rl_module_spec_from_checkpoint,
+        )
 
-            # create the algorithm with multiple nodes and check if the weights
-            # are the same as the original MultiRLModule
-            algo = config.build()
-            algo_module_weights = algo.learner_group.get_weights()
-            check(algo_module_weights, multi_rl_module_weights)
-            algo.train()
-            algo.stop()
-            del algo
-            shutil.rmtree(marl_checkpoint_path)
+        # create the algorithm with multiple nodes and check if the weights
+        # are the same as the original MultiRLModule
+        algo = config.build()
+        algo_module_weights = algo.learner_group.get_weights()
+        check(algo_module_weights, multi_rl_module_weights)
+        algo.train()
+        algo.stop()
+        del algo
+        shutil.rmtree(marl_checkpoint_path)
 
     def test_e2e_load_complex_multi_rl_module(self):
         """Test if we can train a PPO algorithm with a cpkt MARL and RL module e2e."""
         config = self.get_ppo_config()
         env = MultiAgentCartPole({"num_agents": NUM_AGENTS})
-        for fw in framework_iterator(config, frameworks=["tf2", "torch"]):
-            # create a multi_rl_module to load and save it to a checkpoint directory
-            module_specs = {}
-            module_class = PPO_MODULES[fw]
-            for i in range(NUM_AGENTS):
-                module_specs[f"policy_{i}"] = RLModuleSpec(
-                    module_class=module_class,
-                    observation_space=env.observation_space[0],
-                    action_space=env.action_space[0],
-                    # If we want to use this externally created module in the algorithm,
-                    # we need to provide the same config as the algorithm.
-                    model_config_dict=config.model_config
-                    | {"fcnet_hiddens": [32 * (i + 1)]},
-                    catalog_class=PPOCatalog,
-                )
-            multi_rl_module_spec = MultiRLModuleSpec(module_specs=module_specs)
-            multi_rl_module = multi_rl_module_spec.build()
-            marl_checkpoint_path = tempfile.mkdtemp()
-            multi_rl_module.save_to_path(marl_checkpoint_path)
-
-            # create a RLModule to load and override the "policy_1" module with
-            module_to_swap_in = RLModuleSpec(
-                module_class=module_class,
-                observation_space=env.observation_space[0],
-                action_space=env.action_space[0],
-                # Note, we need to pass in the default model config for the algorithm
-                # to be able to use this module later.
-                model_config_dict=config.model_config | {"fcnet_hiddens": [64]},
-                catalog_class=PPOCatalog,
-            ).build()
-
-            module_to_swap_in_path = tempfile.mkdtemp()
-            module_to_swap_in.save_to_path(module_to_swap_in_path)
-
-            # create a new MARL_spec with the checkpoint from the marl_checkpoint
-            # and the module_to_swap_in_checkpoint
-            module_specs["policy_1"] = RLModuleSpec(
+        # create a multi_rl_module to load and save it to a checkpoint directory
+        module_specs = {}
+        module_class = PPO_MODULES["torch"]
+        for i in range(NUM_AGENTS):
+            module_specs[f"policy_{i}"] = RLModuleSpec(
                 module_class=module_class,
                 observation_space=env.observation_space[0],
                 action_space=env.action_space[0],
-                model_config_dict={"fcnet_hiddens": [64]},
+                # If we want to use this externally created module in the algorithm,
+                # we need to provide the same config as the algorithm.
+                model_config_dict=config.model_config
+                | {"fcnet_hiddens": [32 * (i + 1)]},
                 catalog_class=PPOCatalog,
-                load_state_path=module_to_swap_in_path,
-            )
-            multi_rl_module_spec_from_checkpoint = MultiRLModuleSpec(
-                module_specs=module_specs,
-                load_state_path=marl_checkpoint_path,
-            )
-            config = config.api_stack(enable_rl_module_and_learner=True).rl_module(
-                rl_module_spec=multi_rl_module_spec_from_checkpoint,
             )
+        multi_rl_module_spec = MultiRLModuleSpec(module_specs=module_specs)
+        multi_rl_module = multi_rl_module_spec.build()
+        marl_checkpoint_path = tempfile.mkdtemp()
+        multi_rl_module.save_to_path(marl_checkpoint_path)
+
+        # create a RLModule to load and override the "policy_1" module with
+        module_to_swap_in = RLModuleSpec(
+            module_class=module_class,
+            observation_space=env.observation_space[0],
+            action_space=env.action_space[0],
+            # Note, we need to pass in the default model config for the algorithm
+            # to be able to use this module later.
+            model_config_dict=config.model_config | {"fcnet_hiddens": [64]},
+            catalog_class=PPOCatalog,
+        ).build()
+
+        module_to_swap_in_path = tempfile.mkdtemp()
+        module_to_swap_in.save_to_path(module_to_swap_in_path)
+
+        # create a new MARL_spec with the checkpoint from the marl_checkpoint
+        # and the module_to_swap_in_checkpoint
+        module_specs["policy_1"] = RLModuleSpec(
+            module_class=module_class,
+            observation_space=env.observation_space[0],
+            action_space=env.action_space[0],
+            model_config_dict={"fcnet_hiddens": [64]},
+            catalog_class=PPOCatalog,
+            load_state_path=module_to_swap_in_path,
+        )
+        multi_rl_module_spec_from_checkpoint = MultiRLModuleSpec(
+            module_specs=module_specs,
+            load_state_path=marl_checkpoint_path,
+        )
+        config = config.api_stack(enable_rl_module_and_learner=True).rl_module(
+            rl_module_spec=multi_rl_module_spec_from_checkpoint,
+        )
 
-            # create the algorithm with multiple nodes and check if the weights
-            # are the same as the original MultiRLModule
-            algo = config.build()
-            algo_module_weights = algo.learner_group.get_weights()
+        # create the algorithm with multiple nodes and check if the weights
+        # are the same as the original MultiRLModule
+        algo = config.build()
+        algo_module_weights = algo.learner_group.get_weights()
 
-            multi_rl_module_with_swapped_in_module = MultiRLModule()
-            multi_rl_module_with_swapped_in_module.add_module(
-                "policy_0", multi_rl_module["policy_0"]
-            )
-            multi_rl_module_with_swapped_in_module.add_module(
-                "policy_1", module_to_swap_in
-            )
+        multi_rl_module_with_swapped_in_module = MultiRLModule()
+        multi_rl_module_with_swapped_in_module.add_module(
+            "policy_0", multi_rl_module["policy_0"]
+        )
+        multi_rl_module_with_swapped_in_module.add_module("policy_1", module_to_swap_in)
 
-            check(
-                algo_module_weights,
-                convert_to_numpy(multi_rl_module_with_swapped_in_module.get_state()),
-            )
-            algo.train()
-            algo.stop()
-            del algo
-            shutil.rmtree(marl_checkpoint_path)
+        check(
+            algo_module_weights,
+            convert_to_numpy(multi_rl_module_with_swapped_in_module.get_state()),
+        )
+        algo.train()
+        algo.stop()
+        del algo
+        shutil.rmtree(marl_checkpoint_path)
 
     def test_e2e_load_rl_module(self):
         """Test if we can train a PPO algorithm with a cpkt RL module e2e."""
@@ -197,49 +193,48 @@ def test_e2e_load_rl_module(self):
             .training(num_sgd_iter=1, train_batch_size=8, sgd_minibatch_size=8)
         )
         env = gym.make("CartPole-v1")
-        for fw in framework_iterator(config, frameworks=["tf2", "torch"]):
-            # create a multi_rl_module to load and save it to a checkpoint directory
-            module_class = PPO_MODULES[fw]
-            module_spec = RLModuleSpec(
-                module_class=module_class,
-                observation_space=env.observation_space,
-                action_space=env.action_space,
-                # If we want to use this externally created module in the algorithm,
-                # we need to provide the same config as the algorithm.
-                model_config_dict=config.model_config | {"fcnet_hiddens": [32]},
-                catalog_class=PPOCatalog,
-            )
-            module = module_spec.build()
-
-            module_ckpt_path = tempfile.mkdtemp()
-            module.save_to_path(module_ckpt_path)
-
-            module_to_load_spec = RLModuleSpec(
-                module_class=module_class,
-                observation_space=env.observation_space,
-                action_space=env.action_space,
-                model_config_dict={"fcnet_hiddens": [32]},
-                catalog_class=PPOCatalog,
-                load_state_path=module_ckpt_path,
-            )
+        # create a multi_rl_module to load and save it to a checkpoint directory
+        module_class = PPO_MODULES["torch"]
+        module_spec = RLModuleSpec(
+            module_class=module_class,
+            observation_space=env.observation_space,
+            action_space=env.action_space,
+            # If we want to use this externally created module in the algorithm,
+            # we need to provide the same config as the algorithm.
+            model_config_dict=config.model_config | {"fcnet_hiddens": [32]},
+            catalog_class=PPOCatalog,
+        )
+        module = module_spec.build()
+
+        module_ckpt_path = tempfile.mkdtemp()
+        module.save_to_path(module_ckpt_path)
+
+        module_to_load_spec = RLModuleSpec(
+            module_class=module_class,
+            observation_space=env.observation_space,
+            action_space=env.action_space,
+            model_config_dict={"fcnet_hiddens": [32]},
+            catalog_class=PPOCatalog,
+            load_state_path=module_ckpt_path,
+        )
 
-            config = config.api_stack(enable_rl_module_and_learner=True).rl_module(
-                rl_module_spec=module_to_load_spec,
-            )
+        config = config.api_stack(enable_rl_module_and_learner=True).rl_module(
+            rl_module_spec=module_to_load_spec,
+        )
 
-            # create the algorithm with multiple nodes and check if the weights
-            # are the same as the original MultiRLModule
-            algo = config.build()
-            algo_module_weights = algo.learner_group.get_weights()
+        # create the algorithm with multiple nodes and check if the weights
+        # are the same as the original MultiRLModule
+        algo = config.build()
+        algo_module_weights = algo.learner_group.get_weights()
 
-            check(
-                algo_module_weights[DEFAULT_MODULE_ID],
-                convert_to_numpy(module.get_state()),
-            )
-            algo.train()
-            algo.stop()
-            del algo
-            shutil.rmtree(module_ckpt_path)
+        check(
+            algo_module_weights[DEFAULT_MODULE_ID],
+            convert_to_numpy(module.get_state()),
+        )
+        algo.train()
+        algo.stop()
+        del algo
+        shutil.rmtree(module_ckpt_path)
 
     def test_e2e_load_complex_multi_rl_module_with_modules_to_load(self):
         """Test if we can train a PPO algorithm with a cpkt MARL and RL module e2e.
@@ -251,107 +246,104 @@ def test_e2e_load_complex_multi_rl_module_with_modules_to_load(self):
         num_agents = 3
         config = self.get_ppo_config(num_agents=num_agents)
         env = MultiAgentCartPole({"num_agents": num_agents})
-        for fw in framework_iterator(config, frameworks=["tf2", "torch"]):
-            # create a multi_rl_module to load and save it to a checkpoint directory
-            module_specs = {}
-            module_class = PPO_MODULES[fw]
-            for i in range(num_agents):
-                module_specs[f"policy_{i}"] = RLModuleSpec(
-                    module_class=module_class,
-                    observation_space=env.observation_space[0],
-                    action_space=env.action_space[0],
-                    # Note, we need to pass in the default model config for the
-                    # algorithm to be able to use this module later.
-                    model_config_dict=config.model_config
-                    | {"fcnet_hiddens": [32 * (i + 1)]},
-                    catalog_class=PPOCatalog,
-                )
-            multi_rl_module_spec = MultiRLModuleSpec(module_specs=module_specs)
-            multi_rl_module = multi_rl_module_spec.build()
-            marl_checkpoint_path = tempfile.mkdtemp()
-            multi_rl_module.save_to_path(marl_checkpoint_path)
-
-            # create a RLModule to load and override the "policy_1" module with
-            module_to_swap_in = RLModuleSpec(
+        # create a multi_rl_module to load and save it to a checkpoint directory
+        module_specs = {}
+        module_class = PPO_MODULES["torch"]
+        for i in range(num_agents):
+            module_specs[f"policy_{i}"] = RLModuleSpec(
                 module_class=module_class,
                 observation_space=env.observation_space[0],
                 action_space=env.action_space[0],
-                # Note, we need to pass in the default model config for the algorithm
-                # to be able to use this module later.
-                model_config_dict=config.model_config | {"fcnet_hiddens": [64]},
+                # Note, we need to pass in the default model config for the
+                # algorithm to be able to use this module later.
+                model_config_dict=config.model_config
+                | {"fcnet_hiddens": [32 * (i + 1)]},
                 catalog_class=PPOCatalog,
-            ).build()
-
-            module_to_swap_in_path = tempfile.mkdtemp()
-            module_to_swap_in.save_to_path(module_to_swap_in_path)
-
-            # create a new MARL_spec with the checkpoint from the marl_checkpoint
-            # and the module_to_swap_in_checkpoint
-            module_specs["policy_1"] = RLModuleSpec(
-                module_class=module_class,
-                observation_space=env.observation_space[0],
-                action_space=env.action_space[0],
-                model_config_dict={"fcnet_hiddens": [64]},
-                catalog_class=PPOCatalog,
-                load_state_path=module_to_swap_in_path,
-            )
-            multi_rl_module_spec_from_checkpoint = MultiRLModuleSpec(
-                module_specs=module_specs,
-                load_state_path=marl_checkpoint_path,
-                modules_to_load={
-                    "policy_0",
-                },
-            )
-            config = config.api_stack(enable_rl_module_and_learner=True).rl_module(
-                rl_module_spec=multi_rl_module_spec_from_checkpoint,
             )
+        multi_rl_module_spec = MultiRLModuleSpec(module_specs=module_specs)
+        multi_rl_module = multi_rl_module_spec.build()
+        marl_checkpoint_path = tempfile.mkdtemp()
+        multi_rl_module.save_to_path(marl_checkpoint_path)
+
+        # create a RLModule to load and override the "policy_1" module with
+        module_to_swap_in = RLModuleSpec(
+            module_class=module_class,
+            observation_space=env.observation_space[0],
+            action_space=env.action_space[0],
+            # Note, we need to pass in the default model config for the algorithm
+            # to be able to use this module later.
+            model_config_dict=config.model_config | {"fcnet_hiddens": [64]},
+            catalog_class=PPOCatalog,
+        ).build()
+
+        module_to_swap_in_path = tempfile.mkdtemp()
+        module_to_swap_in.save_to_path(module_to_swap_in_path)
+
+        # create a new MARL_spec with the checkpoint from the marl_checkpoint
+        # and the module_to_swap_in_checkpoint
+        module_specs["policy_1"] = RLModuleSpec(
+            module_class=module_class,
+            observation_space=env.observation_space[0],
+            action_space=env.action_space[0],
+            model_config_dict={"fcnet_hiddens": [64]},
+            catalog_class=PPOCatalog,
+            load_state_path=module_to_swap_in_path,
+        )
+        multi_rl_module_spec_from_checkpoint = MultiRLModuleSpec(
+            module_specs=module_specs,
+            load_state_path=marl_checkpoint_path,
+            modules_to_load={
+                "policy_0",
+            },
+        )
+        config = config.api_stack(enable_rl_module_and_learner=True).rl_module(
+            rl_module_spec=multi_rl_module_spec_from_checkpoint,
+        )
 
-            # create the algorithm with multiple nodes and check if the weights
-            # are the same as the original MultiRLModule
-            algo = config.build()
-            algo_module_weights = algo.learner_group.get_weights()
+        # create the algorithm with multiple nodes and check if the weights
+        # are the same as the original MultiRLModule
+        algo = config.build()
+        algo_module_weights = algo.learner_group.get_weights()
 
-            # weights of "policy_0" should be the same as in the loaded MultiRLModule
-            # since we specified it as being apart of the modules_to_load
-            check(
-                algo_module_weights["policy_0"],
-                convert_to_numpy(multi_rl_module["policy_0"].get_state()),
-            )
-            # weights of "policy_1" should be the same as in the module_to_swap_in since
-            # we specified its load path separately in an rl_module_spec inside of the
-            # multi_rl_module_spec_from_checkpoint
-            check(
-                algo_module_weights["policy_1"],
-                convert_to_numpy(module_to_swap_in.get_state()),
-            )
-            # weights of "policy_2" should be different from the loaded MultiRLModule
-            # since we didn't specify it as being apart of the modules_to_load
-            policy_2_algo_module_weight_sum = np.sum(
-                [
-                    np.sum(s)
-                    for s in tree.flatten(
-                        convert_to_numpy(algo_module_weights["policy_2"])
-                    )
-                ]
-            )
-            policy_2_multi_rl_module_weight_sum = np.sum(
-                [
-                    np.sum(s)
-                    for s in tree.flatten(
-                        convert_to_numpy(multi_rl_module["policy_2"].get_state())
-                    )
-                ]
-            )
-            check(
-                policy_2_algo_module_weight_sum,
-                policy_2_multi_rl_module_weight_sum,
-                false=True,
-            )
+        # weights of "policy_0" should be the same as in the loaded MultiRLModule
+        # since we specified it as being apart of the modules_to_load
+        check(
+            algo_module_weights["policy_0"],
+            convert_to_numpy(multi_rl_module["policy_0"].get_state()),
+        )
+        # weights of "policy_1" should be the same as in the module_to_swap_in since
+        # we specified its load path separately in an rl_module_spec inside of the
+        # multi_rl_module_spec_from_checkpoint
+        check(
+            algo_module_weights["policy_1"],
+            convert_to_numpy(module_to_swap_in.get_state()),
+        )
+        # weights of "policy_2" should be different from the loaded MultiRLModule
+        # since we didn't specify it as being apart of the modules_to_load
+        policy_2_algo_module_weight_sum = np.sum(
+            [
+                np.sum(s)
+                for s in tree.flatten(convert_to_numpy(algo_module_weights["policy_2"]))
+            ]
+        )
+        policy_2_multi_rl_module_weight_sum = np.sum(
+            [
+                np.sum(s)
+                for s in tree.flatten(
+                    convert_to_numpy(multi_rl_module["policy_2"].get_state())
+                )
+            ]
+        )
+        check(
+            policy_2_algo_module_weight_sum,
+            policy_2_multi_rl_module_weight_sum,
+            false=True,
+        )
 
-            algo.train()
-            algo.stop()
-            del algo
-            shutil.rmtree(marl_checkpoint_path)
+        algo.train()
+        algo.stop()
+        del algo
+        shutil.rmtree(marl_checkpoint_path)
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/tests/test_algorithm_save_load_checkpoint_learner.py b/rllib/algorithms/tests/test_algorithm_save_load_checkpoint_learner.py
index 3e8e9ef13079..3b71c09528bf 100644
--- a/rllib/algorithms/tests/test_algorithm_save_load_checkpoint_learner.py
+++ b/rllib/algorithms/tests/test_algorithm_save_load_checkpoint_learner.py
@@ -6,7 +6,7 @@
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.core import DEFAULT_MODULE_ID
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 
 
 algorithms_and_configs = {
@@ -96,27 +96,26 @@ def tearDowClass(cls) -> None:
     def test_save_and_restore(self):
         for algo_name in algorithms_and_configs:
             config = algorithms_and_configs[algo_name]
-            for _ in framework_iterator(config, frameworks=["torch", "tf2"]):
-                with tempfile.TemporaryDirectory() as tmpdir:
-                    # create an algorithm, checkpoint it, then train for 2 iterations
-                    ray.get(save_and_train.remote(config, "CartPole-v1", tmpdir))
-                    # load that checkpoint into a new algorithm and train for 2
-                    # iterations
-                    results_algo_2 = ray.get(
-                        load_and_train.remote(config, "CartPole-v1", tmpdir)
-                    )
-
-                    # load that checkpoint into another new algorithm and train for 2
-                    # iterations
-                    results_algo_3 = ray.get(
-                        load_and_train.remote(config, "CartPole-v1", tmpdir)
-                    )
-
-                    # check that the results are the same across loaded algorithms
-                    # they won't be the same as the first algorithm since the random
-                    # state that is used for each algorithm is not preserved across
-                    # checkpoints.
-                    check(results_algo_3, results_algo_2)
+            with tempfile.TemporaryDirectory() as tmpdir:
+                # create an algorithm, checkpoint it, then train for 2 iterations
+                ray.get(save_and_train.remote(config, "CartPole-v1", tmpdir))
+                # load that checkpoint into a new algorithm and train for 2
+                # iterations
+                results_algo_2 = ray.get(
+                    load_and_train.remote(config, "CartPole-v1", tmpdir)
+                )
+
+                # load that checkpoint into another new algorithm and train for 2
+                # iterations
+                results_algo_3 = ray.get(
+                    load_and_train.remote(config, "CartPole-v1", tmpdir)
+                )
+
+                # check that the results are the same across loaded algorithms
+                # they won't be the same as the first algorithm since the random
+                # state that is used for each algorithm is not preserved across
+                # checkpoints.
+                check(results_algo_3, results_algo_2)
 
 
 if __name__ == "__main__":
diff --git a/rllib/algorithms/tests/test_callbacks_old_stack.py b/rllib/algorithms/tests/test_callbacks_old_stack.py
index f9045a18c694..dcbe2e516733 100644
--- a/rllib/algorithms/tests/test_callbacks_old_stack.py
+++ b/rllib/algorithms/tests/test_callbacks_old_stack.py
@@ -7,7 +7,6 @@
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.evaluation.episode import Episode
 from ray.rllib.examples.envs.classes.random_env import RandomEnv
-from ray.rllib.utils.test_utils import framework_iterator
 
 
 class EpisodeAndSampleCallbacks(DefaultCallbacks):
@@ -82,16 +81,15 @@ def test_episode_and_sample_callbacks(self):
             .callbacks(EpisodeAndSampleCallbacks)
             .training(train_batch_size=50, sgd_minibatch_size=50, num_sgd_iter=1)
         )
-        for _ in framework_iterator(config, frameworks=("tf", "torch")):
-            algo = config.build()
-            algo.train()
-            algo.train()
-            callback_obj = algo.env_runner.callbacks
-            self.assertGreater(callback_obj.counts["sample"], 0)
-            self.assertGreater(callback_obj.counts["start"], 0)
-            self.assertGreater(callback_obj.counts["end"], 0)
-            self.assertGreater(callback_obj.counts["step"], 0)
-            algo.stop()
+        algo = config.build()
+        algo.train()
+        algo.train()
+        callback_obj = algo.env_runner.callbacks
+        self.assertGreater(callback_obj.counts["sample"], 0)
+        self.assertGreater(callback_obj.counts["start"], 0)
+        self.assertGreater(callback_obj.counts["end"], 0)
+        self.assertGreater(callback_obj.counts["step"], 0)
+        algo.stop()
 
     def test_on_sub_environment_created(self):
 
@@ -108,24 +106,23 @@ def test_on_sub_environment_created(self):
         ):
             config.callbacks(callbacks)
 
-            for _ in framework_iterator(config, frameworks=("tf", "torch")):
-                algo = config.build()
-                # Fake the counter on the local worker (doesn't have an env) and
-                # set it to -1 so the below `foreach_worker()` won't fail.
-                algo.env_runner.sum_sub_env_vector_indices = -1
-
-                # Get sub-env vector index sums from the 2 remote workers:
-                sum_sub_env_vector_indices = algo.env_runner_group.foreach_worker(
-                    lambda w: w.sum_sub_env_vector_indices
-                )
-                # Local worker has no environments -> Expect the -1 special
-                # value returned by the above lambda.
-                self.assertTrue(sum_sub_env_vector_indices[0] == -1)
-                # Both remote workers (index 1 and 2) have a vector index counter
-                # of 6 (sum of vector indices: 0 + 1 + 2 + 3).
-                self.assertTrue(sum_sub_env_vector_indices[1] == 6)
-                self.assertTrue(sum_sub_env_vector_indices[2] == 6)
-                algo.stop()
+            algo = config.build()
+            # Fake the counter on the local worker (doesn't have an env) and
+            # set it to -1 so the below `foreach_worker()` won't fail.
+            algo.env_runner.sum_sub_env_vector_indices = -1
+
+            # Get sub-env vector index sums from the 2 remote workers:
+            sum_sub_env_vector_indices = algo.env_runner_group.foreach_worker(
+                lambda w: w.sum_sub_env_vector_indices
+            )
+            # Local worker has no environments -> Expect the -1 special
+            # value returned by the above lambda.
+            self.assertTrue(sum_sub_env_vector_indices[0] == -1)
+            # Both remote workers (index 1 and 2) have a vector index counter
+            # of 6 (sum of vector indices: 0 + 1 + 2 + 3).
+            self.assertTrue(sum_sub_env_vector_indices[1] == 6)
+            self.assertTrue(sum_sub_env_vector_indices[2] == 6)
+            algo.stop()
 
     def test_on_sub_environment_created_with_remote_envs(self):
         config = (
@@ -148,24 +145,23 @@ def test_on_sub_environment_created_with_remote_envs(self):
         ):
             config.callbacks(callbacks)
 
-            for _ in framework_iterator(config, frameworks=("tf", "torch")):
-                algo = config.build()
-                # Fake the counter on the local worker (doesn't have an env) and
-                # set it to -1 so the below `foreach_worker()` won't fail.
-                algo.env_runner.sum_sub_env_vector_indices = -1
-
-                # Get sub-env vector index sums from the 2 remote workers:
-                sum_sub_env_vector_indices = algo.env_runner_group.foreach_worker(
-                    lambda w: w.sum_sub_env_vector_indices
-                )
-                # Local worker has no environments -> Expect the -1 special
-                # value returned by the above lambda.
-                self.assertTrue(sum_sub_env_vector_indices[0] == -1)
-                # Both remote workers (index 1 and 2) have a vector index counter
-                # of 6 (sum of vector indices: 0 + 1 + 2 + 3).
-                self.assertTrue(sum_sub_env_vector_indices[1] == 6)
-                self.assertTrue(sum_sub_env_vector_indices[2] == 6)
-                algo.stop()
+            algo = config.build()
+            # Fake the counter on the local worker (doesn't have an env) and
+            # set it to -1 so the below `foreach_worker()` won't fail.
+            algo.env_runner.sum_sub_env_vector_indices = -1
+
+            # Get sub-env vector index sums from the 2 remote workers:
+            sum_sub_env_vector_indices = algo.env_runner_group.foreach_worker(
+                lambda w: w.sum_sub_env_vector_indices
+            )
+            # Local worker has no environments -> Expect the -1 special
+            # value returned by the above lambda.
+            self.assertTrue(sum_sub_env_vector_indices[0] == -1)
+            # Both remote workers (index 1 and 2) have a vector index counter
+            # of 6 (sum of vector indices: 0 + 1 + 2 + 3).
+            self.assertTrue(sum_sub_env_vector_indices[1] == 6)
+            self.assertTrue(sum_sub_env_vector_indices[2] == 6)
+            algo.stop()
 
     def test_on_episode_created(self):
         # 1000 steps sampled (2.5 episodes on each sub-environment) before training
diff --git a/rllib/algorithms/tests/test_callbacks_on_algorithm.py b/rllib/algorithms/tests/test_callbacks_on_algorithm.py
index 05021456cb91..241d9ad31afa 100644
--- a/rllib/algorithms/tests/test_callbacks_on_algorithm.py
+++ b/rllib/algorithms/tests/test_callbacks_on_algorithm.py
@@ -7,7 +7,6 @@
 from ray.rllib.algorithms.callbacks import DefaultCallbacks
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.examples.envs.classes.cartpole_crashing import CartPoleCrashing
-from ray.rllib.utils.test_utils import framework_iterator
 from ray import tune
 
 
@@ -98,22 +97,19 @@ def test_on_init_and_checkpoint_loaded(self):
             .environment("CartPole-v1")
             .callbacks(InitAndCheckpointRestoredCallbacks)
         )
-        for _ in framework_iterator(config, frameworks=("torch", "tf2")):
-            algo = config.build()
-            self.assertTrue(algo.callbacks._on_init_was_called)
+        algo = config.build()
+        self.assertTrue(algo.callbacks._on_init_was_called)
+        self.assertTrue(not hasattr(algo.callbacks, "_on_checkpoint_loaded_was_called"))
+        algo.train()
+        # Save algo and restore.
+        with tempfile.TemporaryDirectory() as tmpdir:
+            algo.save(checkpoint_dir=tmpdir)
             self.assertTrue(
                 not hasattr(algo.callbacks, "_on_checkpoint_loaded_was_called")
             )
-            algo.train()
-            # Save algo and restore.
-            with tempfile.TemporaryDirectory() as tmpdir:
-                algo.save(checkpoint_dir=tmpdir)
-                self.assertTrue(
-                    not hasattr(algo.callbacks, "_on_checkpoint_loaded_was_called")
-                )
-                algo.load_checkpoint(checkpoint_dir=tmpdir)
-                self.assertTrue(algo.callbacks._on_checkpoint_loaded_was_called)
-            algo.stop()
+            algo.load_checkpoint(checkpoint_dir=tmpdir)
+            self.assertTrue(algo.callbacks._on_checkpoint_loaded_was_called)
+        algo.stop()
 
 
 if __name__ == "__main__":
diff --git a/rllib/core/learner/tests/test_learner.py b/rllib/core/learner/tests/test_learner.py
index c1d60b71e201..815b9b54a2d4 100644
--- a/rllib/core/learner/tests/test_learner.py
+++ b/rllib/core/learner/tests/test_learner.py
@@ -9,15 +9,10 @@
 from ray.rllib.core.testing.testing_learner import BaseTestingAlgorithmConfig
 
 from ray.rllib.utils.numpy import convert_to_numpy
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.test_utils import (
-    check,
-    framework_iterator,
-    get_cartpole_dataset_reader,
-)
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.utils.test_utils import check, get_cartpole_dataset_reader
 from ray.rllib.utils.metrics import ALL_MODULES
 
-_, tf, _ = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -37,22 +32,17 @@ def test_end_to_end_update(self):
 
         config = BaseTestingAlgorithmConfig()
 
-        for _ in framework_iterator(config, frameworks=("torch", "tf2")):
-            learner = config.build_learner(env=self.ENV)
-            reader = get_cartpole_dataset_reader(batch_size=512)
-
-            min_loss = float("inf")
-            for iter_i in range(1000):
-                batch = reader.next()
-                results = learner.update_from_batch(batch=batch.as_multi_agent())
-
-            loss = results[ALL_MODULES][Learner.TOTAL_LOSS_KEY]
-            min_loss = min(loss, min_loss)
-            print(f"[iter = {iter_i}] Loss: {loss:.3f}, Min Loss: {min_loss:.3f}")
-            # The loss is initially around 0.69 (ln2). When it gets to around
-            # 0.58 the return of the policy gets to around 100.
-            if min_loss < 0.58:
-                break
+        learner = config.build_learner(env=self.ENV)
+        reader = get_cartpole_dataset_reader(batch_size=512)
+
+        min_loss = float("inf")
+        for iter_i in range(1000):
+            batch = reader.next()
+            results = learner.update_from_batch(batch=batch.as_multi_agent())
+
+        loss = results[ALL_MODULES][Learner.TOTAL_LOSS_KEY]
+        min_loss = min(loss, min_loss)
+        print(f"[iter = {iter_i}] Loss: {loss:.3f}, Min Loss: {min_loss:.3f}")
         self.assertLess(min_loss, 0.58)
 
     def test_compute_gradients(self):
@@ -63,27 +53,20 @@ def test_compute_gradients(self):
         """
         config = BaseTestingAlgorithmConfig()
 
-        for fw in framework_iterator(config, frameworks=("torch", "tf2")):
-            learner = config.build_learner(env=self.ENV)
+        learner = config.build_learner(env=self.ENV)
 
-            params = learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
+        params = learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
 
-            tape = None
-            if fw == "torch":
-                loss_per_module = {ALL_MODULES: sum(param.sum() for param in params)}
-            else:
-                with tf.GradientTape() as tape:
-                    loss_per_module = {
-                        ALL_MODULES: sum(tf.reduce_sum(param) for param in params)
-                    }
+        tape = None
+        loss_per_module = {ALL_MODULES: sum(param.sum() for param in params)}
 
-            gradients = learner.compute_gradients(loss_per_module, gradient_tape=tape)
+        gradients = learner.compute_gradients(loss_per_module, gradient_tape=tape)
 
-            # Type should be a mapping from ParamRefs to gradients.
-            self.assertIsInstance(gradients, dict)
+        # Type should be a mapping from ParamRefs to gradients.
+        self.assertIsInstance(gradients, dict)
 
-            for grad in gradients.values():
-                check(grad, np.ones(grad.shape))
+        for grad in gradients.values():
+            check(grad, np.ones(grad.shape))
 
     def test_postprocess_gradients(self):
         """Tests the base grad clipping logic in `postprocess_gradients()`."""
@@ -93,69 +76,66 @@ def test_postprocess_gradients(self):
             lr=0.0003, grad_clip=0.75, grad_clip_by="value"
         )
 
-        for fw in framework_iterator(config, frameworks=("torch", "tf2")):
-            learner = config.build_learner(env=self.ENV)
-            # Pretend our computed gradients are our weights + 1.0.
-            grads = {
-                learner.get_param_ref(v): v + 1.0
-                for v in learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
-            }
-            # Call the learner's postprocessing method.
-            processed_grads = list(learner.postprocess_gradients(grads).values())
-            # Check clipped gradients.
-            # No single gradient must be larger than 0.1 or smaller than -0.1:
-            self.assertTrue(
-                all(
-                    np.max(grad) <= config.grad_clip
-                    and np.min(grad) >= -config.grad_clip
-                    for grad in convert_to_numpy(processed_grads)
-                )
+        learner = config.build_learner(env=self.ENV)
+        # Pretend our computed gradients are our weights + 1.0.
+        grads = {
+            learner.get_param_ref(v): v + 1.0
+            for v in learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
+        }
+        # Call the learner's postprocessing method.
+        processed_grads = list(learner.postprocess_gradients(grads).values())
+        # Check clipped gradients.
+        # No single gradient must be larger than 0.1 or smaller than -0.1:
+        self.assertTrue(
+            all(
+                np.max(grad) <= config.grad_clip and np.min(grad) >= -config.grad_clip
+                for grad in convert_to_numpy(processed_grads)
             )
+        )
 
-            # Clip by norm.
-            config.grad_clip = 1.0
-            config.grad_clip_by = "norm"
-            learner = config.build_learner(env=self.ENV)
-            # Pretend our computed gradients are our weights + 1.0.
-            grads = {
-                learner.get_param_ref(v): v + 1.0
-                for v in learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
-            }
-            # Call the learner's postprocessing method.
-            processed_grads = list(learner.postprocess_gradients(grads).values())
-            # Check clipped gradients.
+        # Clip by norm.
+        config.grad_clip = 1.0
+        config.grad_clip_by = "norm"
+        learner = config.build_learner(env=self.ENV)
+        # Pretend our computed gradients are our weights + 1.0.
+        grads = {
+            learner.get_param_ref(v): v + 1.0
+            for v in learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
+        }
+        # Call the learner's postprocessing method.
+        processed_grads = list(learner.postprocess_gradients(grads).values())
+        # Check clipped gradients.
+        for proc_grad, grad in zip(
+            convert_to_numpy(processed_grads),
+            convert_to_numpy(list(grads.values())),
+        ):
+            l2_norm = np.sqrt(np.sum(grad**2.0))
+            if l2_norm > config.grad_clip:
+                check(proc_grad, grad * (config.grad_clip / l2_norm))
+
+        # Clip by global norm.
+        config.grad_clip = 5.0
+        config.grad_clip_by = "global_norm"
+        learner = config.build_learner(env=self.ENV)
+        # Pretend our computed gradients are our weights + 1.0.
+        grads = {
+            learner.get_param_ref(v): v + 1.0
+            for v in learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
+        }
+        # Call the learner's postprocessing method.
+        processed_grads = list(learner.postprocess_gradients(grads).values())
+        # Check clipped gradients.
+        global_norm = np.sqrt(
+            np.sum(
+                np.sum(grad**2.0) for grad in convert_to_numpy(list(grads.values()))
+            )
+        )
+        if global_norm > config.grad_clip:
             for proc_grad, grad in zip(
                 convert_to_numpy(processed_grads),
-                convert_to_numpy(list(grads.values())),
+                grads.values(),
             ):
-                l2_norm = np.sqrt(np.sum(grad**2.0))
-                if l2_norm > config.grad_clip:
-                    check(proc_grad, grad * (config.grad_clip / l2_norm))
-
-            # Clip by global norm.
-            config.grad_clip = 5.0
-            config.grad_clip_by = "global_norm"
-            learner = config.build_learner(env=self.ENV)
-            # Pretend our computed gradients are our weights + 1.0.
-            grads = {
-                learner.get_param_ref(v): v + 1.0
-                for v in learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
-            }
-            # Call the learner's postprocessing method.
-            processed_grads = list(learner.postprocess_gradients(grads).values())
-            # Check clipped gradients.
-            global_norm = np.sqrt(
-                np.sum(
-                    np.sum(grad**2.0)
-                    for grad in convert_to_numpy(list(grads.values()))
-                )
-            )
-            if global_norm > config.grad_clip:
-                for proc_grad, grad in zip(
-                    convert_to_numpy(processed_grads),
-                    grads.values(),
-                ):
-                    check(proc_grad, grad * (config.grad_clip / global_norm))
+                check(proc_grad, grad * (config.grad_clip / global_norm))
 
     def test_apply_gradients(self):
         """Tests the apply_gradients correctness.
@@ -165,31 +145,23 @@ def test_apply_gradients(self):
         """
         config = BaseTestingAlgorithmConfig().training(lr=0.0003)
 
-        for fw in framework_iterator(config, frameworks=("torch", "tf2")):
-            learner = config.build_learner(env=self.ENV)
-
-            # calculated the expected new params based on gradients of all ones.
-            params = learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
-            n_steps = 100
-            expected = [
-                (
-                    convert_to_numpy(param)
-                    - n_steps * learner.config.lr * np.ones(param.shape)
-                )
-                for param in params
-            ]
-            for _ in range(n_steps):
-                if fw == "torch":
-                    gradients = {
-                        learner.get_param_ref(p): torch.ones_like(p) for p in params
-                    }
-                else:
-                    gradients = {
-                        learner.get_param_ref(p): tf.ones_like(p) for p in params
-                    }
-                learner.apply_gradients(gradients)
-
-            check(params, expected)
+        learner = config.build_learner(env=self.ENV)
+
+        # calculated the expected new params based on gradients of all ones.
+        params = learner.get_parameters(learner.module[DEFAULT_MODULE_ID])
+        n_steps = 100
+        expected = [
+            (
+                convert_to_numpy(param)
+                - n_steps * learner.config.lr * np.ones(param.shape)
+            )
+            for param in params
+        ]
+        for _ in range(n_steps):
+            gradients = {learner.get_param_ref(p): torch.ones_like(p) for p in params}
+            learner.apply_gradients(gradients)
+
+        check(params, expected)
 
     def test_add_remove_module(self):
         """Tests the compute/apply_gradients with add/remove modules.
@@ -200,79 +172,66 @@ def test_add_remove_module(self):
         """
         config = BaseTestingAlgorithmConfig().training(lr=0.0003)
 
-        for fw in framework_iterator(config, frameworks=("torch", "tf2")):
-            learner = config.build_learner(env=self.ENV)
-            rl_module_spec = config.get_default_rl_module_spec()
-            rl_module_spec.observation_space = self.ENV.observation_space
-            rl_module_spec.action_space = self.ENV.action_space
-            learner.add_module(
-                module_id="test",
-                module_spec=rl_module_spec,
-            )
-            learner.remove_module(DEFAULT_MODULE_ID)
-
-            # only test module should be left
-            self.assertEqual(set(learner.module.keys()), {"test"})
+        learner = config.build_learner(env=self.ENV)
+        rl_module_spec = config.get_default_rl_module_spec()
+        rl_module_spec.observation_space = self.ENV.observation_space
+        rl_module_spec.action_space = self.ENV.action_space
+        learner.add_module(
+            module_id="test",
+            module_spec=rl_module_spec,
+        )
+        learner.remove_module(DEFAULT_MODULE_ID)
+
+        # only test module should be left
+        self.assertEqual(set(learner.module.keys()), {"test"})
+
+        # calculated the expected new params based on gradients of all ones.
+        params = learner.get_parameters(learner.module["test"])
+        n_steps = 100
+        expected = [
+            convert_to_numpy(param) - n_steps * learner.config.lr * np.ones(param.shape)
+            for param in params
+        ]
+        for _ in range(n_steps):
+            tape = None
+            loss_per_module = {ALL_MODULES: sum(param.sum() for param in params)}
+            gradients = learner.compute_gradients(loss_per_module, gradient_tape=tape)
+            learner.apply_gradients(gradients)
 
-            # calculated the expected new params based on gradients of all ones.
-            params = learner.get_parameters(learner.module["test"])
-            n_steps = 100
-            expected = [
-                convert_to_numpy(param)
-                - n_steps * learner.config.lr * np.ones(param.shape)
-                for param in params
-            ]
-            for _ in range(n_steps):
-                tape = None
-                if fw == "torch":
-                    loss_per_module = {
-                        ALL_MODULES: sum(param.sum() for param in params)
-                    }
-                else:
-                    with tf.GradientTape() as tape:
-                        loss_per_module = {
-                            ALL_MODULES: sum(tf.reduce_sum(param) for param in params)
-                        }
-                gradients = learner.compute_gradients(
-                    loss_per_module, gradient_tape=tape
-                )
-                learner.apply_gradients(gradients)
-
-            check(params, expected)
+        check(params, expected)
 
     def test_save_to_path_and_restore_from_path(self):
         """Tests, whether a Learner's state is properly saved and restored."""
         config = BaseTestingAlgorithmConfig()
 
-        for fw in framework_iterator(config, frameworks=("torch", "tf2")):
-            # Get a Learner instance for the framework and env.
-            learner1 = config.build_learner(env=self.ENV)
-            with tempfile.TemporaryDirectory() as tmpdir:
-                learner1.save_to_path(tmpdir)
-
-                learner2 = config.build_learner(env=self.ENV)
-                learner2.restore_from_path(tmpdir)
-                self._check_learner_states(fw, learner1, learner2)
-
-            # Add a module then save/load and check states.
-            with tempfile.TemporaryDirectory() as tmpdir:
-                rl_module_spec = config.get_default_rl_module_spec()
-                rl_module_spec.observation_space = self.ENV.observation_space
-                rl_module_spec.action_space = self.ENV.action_space
-                learner1.add_module(
-                    module_id="test",
-                    module_spec=rl_module_spec,
-                )
-                learner1.save_to_path(tmpdir)
-                learner2 = Learner.from_checkpoint(tmpdir)
-                self._check_learner_states(fw, learner1, learner2)
-
-            # Remove a module then save/load and check states.
-            with tempfile.TemporaryDirectory() as tmpdir:
-                learner1.remove_module(module_id=DEFAULT_MODULE_ID)
-                learner1.save_to_path(tmpdir)
-                learner2 = Learner.from_checkpoint(tmpdir)
-                self._check_learner_states(fw, learner1, learner2)
+        # Get a Learner instance for the framework and env.
+        learner1 = config.build_learner(env=self.ENV)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            learner1.save_to_path(tmpdir)
+
+            learner2 = config.build_learner(env=self.ENV)
+            learner2.restore_from_path(tmpdir)
+            self._check_learner_states("torch", learner1, learner2)
+
+        # Add a module then save/load and check states.
+        with tempfile.TemporaryDirectory() as tmpdir:
+            rl_module_spec = config.get_default_rl_module_spec()
+            rl_module_spec.observation_space = self.ENV.observation_space
+            rl_module_spec.action_space = self.ENV.action_space
+            learner1.add_module(
+                module_id="test",
+                module_spec=rl_module_spec,
+            )
+            learner1.save_to_path(tmpdir)
+            learner2 = Learner.from_checkpoint(tmpdir)
+            self._check_learner_states("torch", learner1, learner2)
+
+        # Remove a module then save/load and check states.
+        with tempfile.TemporaryDirectory() as tmpdir:
+            learner1.remove_module(module_id=DEFAULT_MODULE_ID)
+            learner1.save_to_path(tmpdir)
+            learner2 = Learner.from_checkpoint(tmpdir)
+            self._check_learner_states("torch", learner1, learner2)
 
     def _check_learner_states(self, framework, learner1, learner2):
         check(learner1.module.get_state(), learner2.module.get_state())
diff --git a/rllib/core/models/tests/test_catalog.py b/rllib/core/models/tests/test_catalog.py
index 86d561a3f752..7189f02dceaa 100644
--- a/rllib/core/models/tests/test_catalog.py
+++ b/rllib/core/models/tests/test_catalog.py
@@ -44,7 +44,6 @@
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.numpy import convert_to_numpy
 from ray.rllib.utils.spaces.space_utils import get_dummy_batch_for_space
-from ray.rllib.utils.test_utils import framework_iterator
 from ray.rllib.utils.torch_utils import convert_to_torch_tensor
 
 _, tf, _ = try_import_tf()
@@ -320,63 +319,54 @@ def test_get_dist_cls_from_action_space(self):
                 model_config_dict=MODEL_DEFAULTS.copy(),
             )
 
-            for framework in framework_iterator(frameworks=["tf2", "torch"]):
-
-                if framework == "tf2":
-                    framework = "tf2"
+            dist_cls = catalog._get_dist_cls_from_action_space(
+                action_space=action_space,
+                framework="torch",
+            )
 
-                dist_cls = catalog._get_dist_cls_from_action_space(
+            # Check if we can query the required input dimensions
+            expected_cls = expected_cls_dict["torch"]
+            if (
+                expected_cls is TorchMultiDistribution
+                or expected_cls is TfMultiDistribution
+            ):
+                # For these special cases, we need to create partials of the
+                # expected classes so that we can calculate the required inputs
+                expected_cls = _multi_action_dist_partial_helper(
+                    catalog_cls=catalog,
                     action_space=action_space,
-                    framework=framework,
+                    framework="torch",
                 )
-
-                # Check if we can query the required input dimensions
-                expected_cls = expected_cls_dict[framework]
-                if (
-                    expected_cls is TorchMultiDistribution
-                    or expected_cls is TfMultiDistribution
-                ):
-                    # For these special cases, we need to create partials of the
-                    # expected classes so that we can calculate the required inputs
-                    expected_cls = _multi_action_dist_partial_helper(
-                        catalog_cls=catalog,
-                        action_space=action_space,
-                        framework=framework,
-                    )
-                elif (
-                    expected_cls is TorchMultiCategorical
-                    or expected_cls is TfMultiCategorical
-                ):
-                    # For these special cases, we need to create partials of the
-                    # expected classes so that we can calculate the required inputs
-                    expected_cls = _multi_categorical_dist_partial_helper(
-                        action_space=action_space, framework=framework
-                    )
-
-                # Now that we have sorted out special cases, we can finally get the
-                # input_dim
-                input_dim = expected_cls.required_input_dim(action_space)
-                logits = np.ones((32, input_dim), dtype=np.float32)
-                if framework == "torch":
-                    logits = torch.from_numpy(logits)
-                else:
-                    logits = tf.convert_to_tensor(logits)
-                # We don't need a model if we input tensors
-                dist = dist_cls.from_logits(logits=logits)
-                self.assertTrue(
-                    isinstance(dist, expected_cls_dict[framework]),
-                    msg=f"Expected {expected_cls_dict[framework]}, "
-                    f"got {type(dist)}",
+            elif (
+                expected_cls is TorchMultiCategorical
+                or expected_cls is TfMultiCategorical
+            ):
+                # For these special cases, we need to create partials of the
+                # expected classes so that we can calculate the required inputs
+                expected_cls = _multi_categorical_dist_partial_helper(
+                    action_space=action_space, framework="torch"
                 )
-                # Test if sampling works
-                actions = dist.sample()
-                # Test is logp works
-                dist.logp(actions)
-
-                # For any array of actions in a possibly nested space, convert to
-                # numpy and pick the first one to check if it is in the action space.
-                action = tree.map_structure(lambda a: convert_to_numpy(a)[0], actions)
-                self.assertTrue(action_space.contains(action))
+
+            # Now that we have sorted out special cases, we can finally get the
+            # input_dim
+            input_dim = expected_cls.required_input_dim(action_space)
+            logits = np.ones((32, input_dim), dtype=np.float32)
+            logits = torch.from_numpy(logits)
+            # We don't need a model if we input tensors
+            dist = dist_cls.from_logits(logits=logits)
+            self.assertTrue(
+                isinstance(dist, expected_cls_dict["torch"]),
+                msg=f"Expected {expected_cls_dict['torch']}, " f"got {type(dist)}",
+            )
+            # Test if sampling works
+            actions = dist.sample()
+            # Test is logp works
+            dist.logp(actions)
+
+            # For any array of actions in a possibly nested space, convert to
+            # numpy and pick the first one to check if it is in the action space.
+            action = tree.map_structure(lambda a: convert_to_numpy(a)[0], actions)
+            self.assertTrue(action_space.contains(action))
 
     def test_customize_catalog_from_algorithm_config(self):
         """Test if we can pass catalog to algorithm config and it ends up inside
diff --git a/rllib/core/models/tests/test_cnn_encoders.py b/rllib/core/models/tests/test_cnn_encoders.py
index c0536ca2bc6d..d7b344aba375 100644
--- a/rllib/core/models/tests/test_cnn_encoders.py
+++ b/rllib/core/models/tests/test_cnn_encoders.py
@@ -4,10 +4,9 @@
 from ray.rllib.core.models.base import ENCODER_OUT
 from ray.rllib.core.models.configs import CNNEncoderConfig
 from ray.rllib.models.utils import get_filter_config
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.test_utils import framework_iterator, ModelChecker
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.utils.test_utils import ModelChecker
 
-_, tf, _ = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -67,11 +66,10 @@ def test_cnn_encoders(self):
             # with each other.
             model_checker = ModelChecker(config)
 
-            for fw in framework_iterator(frameworks=("tf2", "torch")):
-                # Add this framework version of the model to our checker.
-                outputs = model_checker.add(framework=fw)
-                # Confirm that the config conputed the correct (actual) output dims.
-                self.assertEqual(outputs[ENCODER_OUT].shape, (1, config.output_dims[0]))
+            # Add this framework version of the model to our checker.
+            outputs = model_checker.add(framework="torch")
+            # Confirm that the config conputed the correct (actual) output dims.
+            self.assertEqual(outputs[ENCODER_OUT].shape, (1, config.output_dims[0]))
 
             # Check all added models against each other.
             model_checker.check()
@@ -97,11 +95,10 @@ def test_cnn_encoders_valid_padding(self):
         # with each other.
         model_checker = ModelChecker(config)
 
-        for fw in framework_iterator(frameworks=("tf2", "torch")):
-            # Add this framework version of the model to our checker.
-            outputs = model_checker.add(framework=fw)
-            # Confirm that the config conputed the correct (actual) output dims.
-            self.assertEqual(outputs[ENCODER_OUT].shape, (1, config.output_dims[0]))
+        # Add this framework version of the model to our checker.
+        outputs = model_checker.add(framework="torch")
+        # Confirm that the config conputed the correct (actual) output dims.
+        self.assertEqual(outputs[ENCODER_OUT].shape, (1, config.output_dims[0]))
 
         # Check all added models against each other.
         model_checker.check()
diff --git a/rllib/core/models/tests/test_cnn_transpose_heads.py b/rllib/core/models/tests/test_cnn_transpose_heads.py
index 2b7b38c00bbc..2c5a0d13c037 100644
--- a/rllib/core/models/tests/test_cnn_transpose_heads.py
+++ b/rllib/core/models/tests/test_cnn_transpose_heads.py
@@ -2,10 +2,9 @@
 import unittest
 
 from ray.rllib.core.models.configs import CNNTransposeHeadConfig
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.test_utils import framework_iterator, ModelChecker
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.utils.test_utils import ModelChecker
 
-_, tf, _ = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -92,10 +91,9 @@ def test_cnn_transpose_heads(self):
             # with each other.
             model_checker = ModelChecker(config)
 
-            for fw in framework_iterator(frameworks=("tf2", "torch")):
-                # Add this framework version of the model to our checker.
-                outputs = model_checker.add(framework=fw)
-                self.assertEqual(outputs.shape, (1,) + tuple(expected_output_dims))
+            # Add this framework version of the model to our checker.
+            outputs = model_checker.add(framework="torch")
+            self.assertEqual(outputs.shape, (1,) + tuple(expected_output_dims))
 
             # Check all added models against each other.
             model_checker.check()
diff --git a/rllib/core/models/tests/test_mlp_encoders.py b/rllib/core/models/tests/test_mlp_encoders.py
index 25fecf3f5235..96b5fc45dbe3 100644
--- a/rllib/core/models/tests/test_mlp_encoders.py
+++ b/rllib/core/models/tests/test_mlp_encoders.py
@@ -3,10 +3,9 @@
 
 from ray.rllib.core.models.configs import MLPEncoderConfig
 from ray.rllib.core.models.base import ENCODER_OUT
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.test_utils import framework_iterator, ModelChecker
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.utils.test_utils import ModelChecker
 
-_, tf, _ = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -72,10 +71,9 @@ def test_mlp_encoders(self):
             # with each other.
             model_checker = ModelChecker(config)
 
-            for fw in framework_iterator(frameworks=("tf2", "torch")):
-                # Add this framework version of the model to our checker.
-                outputs = model_checker.add(framework=fw)
-                self.assertEqual(outputs[ENCODER_OUT].shape, (1, output_dim))
+            # Add this framework version of the model to our checker.
+            outputs = model_checker.add(framework="torch")
+            self.assertEqual(outputs[ENCODER_OUT].shape, (1, output_dim))
 
             # Check all added models against each other.
             model_checker.check()
diff --git a/rllib/core/models/tests/test_mlp_heads.py b/rllib/core/models/tests/test_mlp_heads.py
index 366814b5d584..fcdcf0ac9695 100644
--- a/rllib/core/models/tests/test_mlp_heads.py
+++ b/rllib/core/models/tests/test_mlp_heads.py
@@ -2,10 +2,9 @@
 import unittest
 
 from ray.rllib.core.models.configs import MLPHeadConfig, FreeLogStdMLPHeadConfig
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.test_utils import framework_iterator, ModelChecker
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.utils.test_utils import ModelChecker
 
-_, tf, _ = try_import_tf()
 torch, nn = try_import_torch()
 
 
@@ -77,10 +76,9 @@ def test_mlp_heads(self):
             # with each other.
             model_checker = ModelChecker(config)
 
-            for fw in framework_iterator(frameworks=("tf2", "torch")):
-                # Add this framework version of the model to our checker.
-                outputs = model_checker.add(framework=fw)
-                self.assertEqual(outputs.shape, (1, output_dim))
+            # Add this framework version of the model to our checker.
+            outputs = model_checker.add(framework="torch")
+            self.assertEqual(outputs.shape, (1, output_dim))
 
             # Check all added models against each other.
             model_checker.check()
diff --git a/rllib/core/models/tests/test_recurrent_encoders.py b/rllib/core/models/tests/test_recurrent_encoders.py
index f3fe42f34c8d..e2ba68be01b7 100644
--- a/rllib/core/models/tests/test_recurrent_encoders.py
+++ b/rllib/core/models/tests/test_recurrent_encoders.py
@@ -4,10 +4,9 @@
 from ray.rllib.core.columns import Columns
 from ray.rllib.core.models.base import ENCODER_OUT
 from ray.rllib.core.models.configs import RecurrentEncoderConfig
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.test_utils import framework_iterator, ModelChecker
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.utils.test_utils import ModelChecker
 
-_, tf, _ = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -54,19 +53,18 @@ def test_gru_encoders(self):
             # with each other.
             model_checker = ModelChecker(config)
 
-            for fw in framework_iterator(frameworks=("tf2", "torch")):
-                # Add this framework version of the model to our checker.
-                outputs = model_checker.add(framework=fw)
-                # Output shape: [1=B, 1=T, [output_dim]]
-                self.assertEqual(
-                    outputs[ENCODER_OUT].shape,
-                    (1, 1, config.output_dims[0]),
-                )
-                # State shapes: [1=B, 1=num_layers, [hidden_dim]]
-                self.assertEqual(
-                    outputs[Columns.STATE_OUT]["h"].shape,
-                    (1, num_layers, hidden_dim),
-                )
+            # Add this framework version of the model to our checker.
+            outputs = model_checker.add(framework="torch")
+            # Output shape: [1=B, 1=T, [output_dim]]
+            self.assertEqual(
+                outputs[ENCODER_OUT].shape,
+                (1, 1, config.output_dims[0]),
+            )
+            # State shapes: [1=B, 1=num_layers, [hidden_dim]]
+            self.assertEqual(
+                outputs[Columns.STATE_OUT]["h"].shape,
+                (1, num_layers, hidden_dim),
+            )
             # Check all added models against each other.
             model_checker.check()
 
@@ -112,23 +110,22 @@ def test_lstm_encoders(self):
             # with each other.
             model_checker = ModelChecker(config)
 
-            for fw in framework_iterator(frameworks=("tf2", "torch")):
-                # Add this framework version of the model to our checker.
-                outputs = model_checker.add(framework=fw)
-                # Output shape: [1=B, 1=T, [output_dim]]
-                self.assertEqual(
-                    outputs[ENCODER_OUT].shape,
-                    (1, 1, config.output_dims[0]),
-                )
-                # State shapes: [1=B, 1=num_layers, [hidden_dim]]
-                self.assertEqual(
-                    outputs[Columns.STATE_OUT]["h"].shape,
-                    (1, num_layers, hidden_dim),
-                )
-                self.assertEqual(
-                    outputs[Columns.STATE_OUT]["c"].shape,
-                    (1, num_layers, hidden_dim),
-                )
+            # Add this framework version of the model to our checker.
+            outputs = model_checker.add(framework="torch")
+            # Output shape: [1=B, 1=T, [output_dim]]
+            self.assertEqual(
+                outputs[ENCODER_OUT].shape,
+                (1, 1, config.output_dims[0]),
+            )
+            # State shapes: [1=B, 1=num_layers, [hidden_dim]]
+            self.assertEqual(
+                outputs[Columns.STATE_OUT]["h"].shape,
+                (1, num_layers, hidden_dim),
+            )
+            self.assertEqual(
+                outputs[Columns.STATE_OUT]["c"].shape,
+                (1, num_layers, hidden_dim),
+            )
 
             # Check all added models against each other (only if bias=False).
             # See here on why pytorch uses two bias vectors per layer and tf only uses
diff --git a/rllib/core/testing/tests/test_bc_algorithm.py b/rllib/core/testing/tests/test_bc_algorithm.py
index 9403e183eda3..c40e09400301 100644
--- a/rllib/core/testing/tests/test_bc_algorithm.py
+++ b/rllib/core/testing/tests/test_bc_algorithm.py
@@ -8,14 +8,12 @@
     BCTorchMultiAgentModuleWithSharedEncoder,
 )
 from ray.rllib.core.testing.tf.bc_module import (
-    DiscreteBCTFModule,
     BCTfRLModuleWithSharedGlobalEncoder,
     BCTfMultiAgentModuleWithSharedEncoder,
 )
 from ray.rllib.core.rl_module.rl_module import RLModuleSpec
 from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec
 from ray.rllib.core.testing.bc_algorithm import BCConfigTest
-from ray.rllib.utils.test_utils import framework_iterator
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
 
 
@@ -37,16 +35,11 @@ def test_bc_algorithm(self):
             .training(model={"fcnet_hiddens": [32, 32]})
         )
 
-        # TODO (Kourosh): Add tf2 support
-        for fw in framework_iterator(config, frameworks=("torch")):
-            algo = config.build(env="CartPole-v1")
-            policy = algo.get_policy()
-            rl_module = policy.model
+        algo = config.build(env="CartPole-v1")
+        policy = algo.get_policy()
+        rl_module = policy.model
 
-            if fw == "torch":
-                assert isinstance(rl_module, DiscreteBCTorchModule)
-            elif fw == "tf2":
-                assert isinstance(rl_module, DiscreteBCTFModule)
+        assert isinstance(rl_module, DiscreteBCTorchModule)
 
     def test_bc_algorithm_marl(self):
         """Tests simple extension of single-agent to independent multi-agent case."""
@@ -63,17 +56,12 @@ def test_bc_algorithm_marl(self):
             .environment(MultiAgentCartPole, env_config={"num_agents": 2})
         )
 
-        # TODO (Kourosh): Add tf2 support
-        for fw in framework_iterator(config, frameworks=("torch")):
-            algo = config.build()
-            for policy_id in policies:
-                policy = algo.get_policy(policy_id=policy_id)
-                rl_module = policy.model
+        algo = config.build()
+        for policy_id in policies:
+            policy = algo.get_policy(policy_id=policy_id)
+            rl_module = policy.model
 
-                if fw == "torch":
-                    assert isinstance(rl_module, DiscreteBCTorchModule)
-                elif fw == "tf2":
-                    assert isinstance(rl_module, DiscreteBCTFModule)
+            assert isinstance(rl_module, DiscreteBCTorchModule)
 
     def test_bc_algorithm_w_custom_multi_rl_module(self):
         """Tests the independent multi-agent case with shared encoders."""
diff --git a/rllib/evaluation/tests/test_rollout_worker.py b/rllib/evaluation/tests/test_rollout_worker.py
index 2b98f3aaf726..4f8ed097170c 100644
--- a/rllib/evaluation/tests/test_rollout_worker.py
+++ b/rllib/evaluation/tests/test_rollout_worker.py
@@ -40,7 +40,7 @@
     NUM_AGENT_STEPS_TRAINED,
     EPISODE_RETURN_MEAN,
 )
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 from ray.tune.registry import register_env
 
 
@@ -177,33 +177,26 @@ def test_global_vars_update(self):
             # lr = 0.1 - [(0.1 - 0.000001) / 100000] * ts
             .training(lr_schedule=[[0, 0.1], [100000, 0.000001]])
         )
-        for fw in framework_iterator(config, frameworks=("tf2", "tf")):
-            algo = config.build()
-            policy = algo.get_policy()
-            for i in range(3):
-                result = algo.train()
-                print(
-                    "{}={}".format(
-                        NUM_AGENT_STEPS_TRAINED, result["info"][NUM_AGENT_STEPS_TRAINED]
-                    )
-                )
-                print(
-                    "{}={}".format(
-                        NUM_AGENT_STEPS_SAMPLED, result["info"][NUM_AGENT_STEPS_SAMPLED]
-                    )
+        algo = config.build()
+        policy = algo.get_policy()
+        for i in range(3):
+            result = algo.train()
+            print(
+                "{}={}".format(
+                    NUM_AGENT_STEPS_TRAINED, result["info"][NUM_AGENT_STEPS_TRAINED]
                 )
-                global_timesteps = (
-                    policy.global_timestep
-                    if fw == "tf"
-                    else policy.global_timestep.numpy()
+            )
+            print(
+                "{}={}".format(
+                    NUM_AGENT_STEPS_SAMPLED, result["info"][NUM_AGENT_STEPS_SAMPLED]
                 )
-                print("global_timesteps={}".format(global_timesteps))
-                expected_lr = 0.1 - ((0.1 - 0.000001) / 100000) * global_timesteps
-                lr = policy.cur_lr
-                if fw == "tf":
-                    lr = policy.get_session().run(lr)
-                check(lr, expected_lr, rtol=0.05)
-            algo.stop()
+            )
+            global_timesteps = policy.global_timestep
+            print("global_timesteps={}".format(global_timesteps))
+            expected_lr = 0.1 - ((0.1 - 0.000001) / 100000) * global_timesteps
+            lr = policy.cur_lr
+            check(lr, expected_lr, rtol=0.05)
+        algo.stop()
 
     def test_query_evaluators(self):
         register_env("test", lambda _: gym.make("CartPole-v1"))
@@ -217,21 +210,20 @@ def test_query_evaluators(self):
             )
             .training(train_batch_size=20, sgd_minibatch_size=5, num_sgd_iter=1)
         )
-        for _ in framework_iterator(config, frameworks=("torch", "tf")):
-            algo = config.build()
-            results = algo.env_runner_group.foreach_worker(
-                lambda w: w.total_rollout_fragment_length
-            )
-            results2 = algo.env_runner_group.foreach_worker_with_id(
-                lambda i, w: (i, w.total_rollout_fragment_length)
-            )
-            results3 = algo.env_runner_group.foreach_worker(
-                lambda w: w.foreach_env(lambda env: 1)
-            )
-            self.assertEqual(results, [10, 10, 10])
-            self.assertEqual(results2, [(0, 10), (1, 10), (2, 10)])
-            self.assertEqual(results3, [[1, 1], [1, 1], [1, 1]])
-            algo.stop()
+        algo = config.build()
+        results = algo.env_runner_group.foreach_worker(
+            lambda w: w.total_rollout_fragment_length
+        )
+        results2 = algo.env_runner_group.foreach_worker_with_id(
+            lambda i, w: (i, w.total_rollout_fragment_length)
+        )
+        results3 = algo.env_runner_group.foreach_worker(
+            lambda w: w.foreach_env(lambda env: 1)
+        )
+        self.assertEqual(results, [10, 10, 10])
+        self.assertEqual(results2, [(0, 10), (1, 10), (2, 10)])
+        self.assertEqual(results3, [[1, 1], [1, 1], [1, 1]])
+        algo.stop()
 
     def test_action_clipping(self):
         action_space = gym.spaces.Box(-2.0, 1.0, (3,))
diff --git a/rllib/evaluation/tests/test_trajectory_view_api.py b/rllib/evaluation/tests/test_trajectory_view_api.py
index 69dbce3212f0..457abba37f63 100644
--- a/rllib/evaluation/tests/test_trajectory_view_api.py
+++ b/rllib/evaluation/tests/test_trajectory_view_api.py
@@ -13,7 +13,6 @@
 from ray.rllib.examples._old_api_stack.policy.episode_env_aware_policy import (
     EpisodeEnvAwareAttentionPolicy,
 )
-from ray.rllib.models.tf.attention_net import GTrXLNet
 from ray.rllib.policy.rnn_sequencing import pad_batch_to_sequences_of_same_size
 from ray.rllib.policy.sample_batch import (
     DEFAULT_POLICY_ID,
@@ -23,7 +22,7 @@
 from ray.rllib.policy.view_requirement import ViewRequirement
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.metrics import NUM_ENV_STEPS_SAMPLED_LIFETIME
-from ray.rllib.utils.test_utils import framework_iterator, check
+from ray.rllib.utils.test_utils import check
 
 
 class MyCallbacks(DefaultCallbacks):
@@ -64,42 +63,39 @@ def test_traj_view_normal_case(self):
             )
         )
 
-        for _ in framework_iterator(config):
-            algo = config.build()
-            policy = algo.get_policy()
-            view_req_model = policy.model.view_requirements
-            view_req_policy = policy.view_requirements
-            assert len(view_req_model) == 1, view_req_model
-            assert len(view_req_policy) == 12, view_req_policy
-            for key in [
-                SampleBatch.OBS,
-                SampleBatch.ACTIONS,
-                SampleBatch.REWARDS,
-                SampleBatch.TERMINATEDS,
-                SampleBatch.TRUNCATEDS,
-                SampleBatch.NEXT_OBS,
-                SampleBatch.EPS_ID,
-                SampleBatch.AGENT_INDEX,
-                "weights",
-            ]:
-                assert key in view_req_policy
-                # None of the view cols has a special underlying data_col,
-                # except next-obs.
-                if key != SampleBatch.NEXT_OBS:
-                    assert view_req_policy[key].data_col is None
-                else:
-                    assert view_req_policy[key].data_col == SampleBatch.OBS
-                    assert view_req_policy[key].shift == 1
-            rollout_worker = algo.env_runner
-            sample_batch = rollout_worker.sample()
-            sample_batch = convert_ma_batch_to_sample_batch(sample_batch)
-            expected_count = (
-                config.num_envs_per_env_runner * config.rollout_fragment_length
-            )
-            assert sample_batch.count == expected_count
-            for v in sample_batch.values():
-                assert len(v) == expected_count
-            algo.stop()
+        algo = config.build()
+        policy = algo.get_policy()
+        view_req_model = policy.model.view_requirements
+        view_req_policy = policy.view_requirements
+        assert len(view_req_model) == 1, view_req_model
+        assert len(view_req_policy) == 12, view_req_policy
+        for key in [
+            SampleBatch.OBS,
+            SampleBatch.ACTIONS,
+            SampleBatch.REWARDS,
+            SampleBatch.TERMINATEDS,
+            SampleBatch.TRUNCATEDS,
+            SampleBatch.NEXT_OBS,
+            SampleBatch.EPS_ID,
+            SampleBatch.AGENT_INDEX,
+            "weights",
+        ]:
+            assert key in view_req_policy
+            # None of the view cols has a special underlying data_col,
+            # except next-obs.
+            if key != SampleBatch.NEXT_OBS:
+                assert view_req_policy[key].data_col is None
+            else:
+                assert view_req_policy[key].data_col == SampleBatch.OBS
+                assert view_req_policy[key].shift == 1
+        rollout_worker = algo.env_runner
+        sample_batch = rollout_worker.sample()
+        sample_batch = convert_ma_batch_to_sample_batch(sample_batch)
+        expected_count = config.num_envs_per_env_runner * config.rollout_fragment_length
+        assert sample_batch.count == expected_count
+        for v in sample_batch.values():
+            assert len(v) == expected_count
+        algo.stop()
 
     def test_traj_view_lstm_prev_actions_and_rewards(self):
         """Tests, whether Policy/Model return correct LSTM ViewRequirements."""
@@ -121,113 +117,67 @@ def test_traj_view_lstm_prev_actions_and_rewards(self):
             .env_runners(create_env_on_local_worker=True)
         )
 
-        for _ in framework_iterator(config):
-            algo = config.build()
-            policy = algo.get_policy()
-            view_req_model = policy.model.view_requirements
-            view_req_policy = policy.view_requirements
-            # 7=obs, prev-a + r, 2x state-in, 2x state-out.
-            assert len(view_req_model) == 7, view_req_model
-            assert len(view_req_policy) == 23, (len(view_req_policy), view_req_policy)
-            for key in [
-                SampleBatch.OBS,
-                SampleBatch.ACTIONS,
-                SampleBatch.REWARDS,
-                SampleBatch.TERMINATEDS,
-                SampleBatch.TRUNCATEDS,
+        algo = config.build()
+        policy = algo.get_policy()
+        view_req_model = policy.model.view_requirements
+        view_req_policy = policy.view_requirements
+        # 7=obs, prev-a + r, 2x state-in, 2x state-out.
+        assert len(view_req_model) == 7, view_req_model
+        assert len(view_req_policy) == 23, (len(view_req_policy), view_req_policy)
+        for key in [
+            SampleBatch.OBS,
+            SampleBatch.ACTIONS,
+            SampleBatch.REWARDS,
+            SampleBatch.TERMINATEDS,
+            SampleBatch.TRUNCATEDS,
+            SampleBatch.NEXT_OBS,
+            SampleBatch.VF_PREDS,
+            SampleBatch.PREV_ACTIONS,
+            SampleBatch.PREV_REWARDS,
+            "advantages",
+            "value_targets",
+            SampleBatch.ACTION_DIST_INPUTS,
+            SampleBatch.ACTION_LOGP,
+        ]:
+            assert key in view_req_policy
+
+            if key == SampleBatch.PREV_ACTIONS:
+                assert view_req_policy[key].data_col == SampleBatch.ACTIONS
+                assert view_req_policy[key].shift == -1
+            elif key == SampleBatch.PREV_REWARDS:
+                assert view_req_policy[key].data_col == SampleBatch.REWARDS
+                assert view_req_policy[key].shift == -1
+            elif key not in [
                 SampleBatch.NEXT_OBS,
-                SampleBatch.VF_PREDS,
                 SampleBatch.PREV_ACTIONS,
                 SampleBatch.PREV_REWARDS,
-                "advantages",
-                "value_targets",
-                SampleBatch.ACTION_DIST_INPUTS,
-                SampleBatch.ACTION_LOGP,
             ]:
-                assert key in view_req_policy
-
-                if key == SampleBatch.PREV_ACTIONS:
-                    assert view_req_policy[key].data_col == SampleBatch.ACTIONS
-                    assert view_req_policy[key].shift == -1
-                elif key == SampleBatch.PREV_REWARDS:
-                    assert view_req_policy[key].data_col == SampleBatch.REWARDS
-                    assert view_req_policy[key].shift == -1
-                elif key not in [
-                    SampleBatch.NEXT_OBS,
-                    SampleBatch.PREV_ACTIONS,
-                    SampleBatch.PREV_REWARDS,
-                ]:
-                    assert view_req_policy[key].data_col is None
-                else:
-                    assert view_req_policy[key].data_col == SampleBatch.OBS
-                    assert view_req_policy[key].shift == 1
-
-            rollout_worker = algo.env_runner
-            sample_batch = rollout_worker.sample()
-            sample_batch = convert_ma_batch_to_sample_batch(sample_batch)
-
-            # Rollout fragment length should be auto-computed to 2000:
-            # 2 workers, 1 env per worker, train batch size=4000 -> 2000 per worker.
-            self.assertEqual(sample_batch.count, 2000, "ppo rollout count != 2000")
-            self.assertEqual(sum(sample_batch["seq_lens"]), sample_batch.count)
-            self.assertEqual(
-                len(sample_batch["seq_lens"]), sample_batch["state_in_0"].shape[0]
-            )
+                assert view_req_policy[key].data_col is None
+            else:
+                assert view_req_policy[key].data_col == SampleBatch.OBS
+                assert view_req_policy[key].shift == 1
 
-            # check if non-zero state_ins are pointing to the correct state_outs
-            seq_counters = np.cumsum(sample_batch["seq_lens"])
-            for i in range(sample_batch["state_in_0"].shape[0]):
-                state_in = sample_batch["state_in_0"][i]
-                if np.any(state_in != 0):
-                    # non-zero state-in should be one of th state_outs.
-                    state_out_ind = seq_counters[i - 1] - 1
-                    check(sample_batch["state_out_0"][state_out_ind], state_in)
-            algo.stop()
-
-    def test_traj_view_attention_net(self):
-        config = (
-            ppo.PPOConfig()
-            # Batch-norm models have not been migrated to the RL Module API yet.
-            .api_stack(enable_rl_module_and_learner=False)
-            .environment(
-                "ray.rllib.examples.envs.classes.debug_counter_env.DebugCounterEnv",
-                env_config={"config": {"start_at_t": 1}},  # first obs is [1.0]
-            )
-            .env_runners(num_env_runners=0)
-            .callbacks(MyCallbacks)
-            # Setup attention net.
-            .training(
-                model={
-                    "custom_model": GTrXLNet,
-                    "custom_model_config": {
-                        "num_transformer_units": 1,
-                        "attention_dim": 64,
-                        "num_heads": 2,
-                        "memory_inference": 50,
-                        "memory_training": 50,
-                        "head_dim": 32,
-                        "ff_hidden_dim": 32,
-                    },
-                    "max_seq_len": 50,
-                },
-                # Test with odd batch numbers.
-                train_batch_size=1031,
-                sgd_minibatch_size=201,
-                num_sgd_iter=5,
-            )
+        rollout_worker = algo.env_runner
+        sample_batch = rollout_worker.sample()
+        sample_batch = convert_ma_batch_to_sample_batch(sample_batch)
+
+        # Rollout fragment length should be auto-computed to 2000:
+        # 2 workers, 1 env per worker, train batch size=4000 -> 2000 per worker.
+        self.assertEqual(sample_batch.count, 2000, "ppo rollout count != 2000")
+        self.assertEqual(sum(sample_batch["seq_lens"]), sample_batch.count)
+        self.assertEqual(
+            len(sample_batch["seq_lens"]), sample_batch["state_in_0"].shape[0]
         )
 
-        for _ in framework_iterator(config, frameworks="tf2"):
-            algo = config.build()
-            rw = algo.env_runner
-            sample = rw.sample()
-            assert sample.count == algo.config.get_rollout_fragment_length()
-            results = algo.train()
-            assert (
-                results[f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}"]
-                == config["train_batch_size"]
-            )
-            algo.stop()
+        # check if non-zero state_ins are pointing to the correct state_outs
+        seq_counters = np.cumsum(sample_batch["seq_lens"])
+        for i in range(sample_batch["state_in_0"].shape[0]):
+            state_in = sample_batch["state_in_0"][i]
+            if np.any(state_in != 0):
+                # non-zero state-in should be one of th state_outs.
+                state_out_ind = seq_counters[i - 1] - 1
+                check(sample_batch["state_out_0"][state_out_ind], state_in)
+        algo.stop()
 
     def test_traj_view_next_action(self):
         action_space = Discrete(2)
diff --git a/rllib/examples/offline_rl/offline_rl.py b/rllib/examples/offline_rl/offline_rl.py
index 3442d2b130b5..6d19252bca27 100644
--- a/rllib/examples/offline_rl/offline_rl.py
+++ b/rllib/examples/offline_rl/offline_rl.py
@@ -107,7 +107,6 @@
     num_iterations = 5
     min_reward = -300
 
-    # Test for torch framework (tf not implemented yet).
     cql_algorithm = cql.CQL(config=config)
     learnt = False
     for i in range(num_iterations):
diff --git a/rllib/models/tests/test_action_distributions.py b/rllib/models/tests/test_action_distributions.py
index 3ba9b540af11..6de0c1aa62a0 100644
--- a/rllib/models/tests/test_action_distributions.py
+++ b/rllib/models/tests/test_action_distributions.py
@@ -5,15 +5,6 @@
 import tree  # pip install dm_tree
 import unittest
 
-from ray.rllib.models.tf.tf_action_dist import (
-    Beta,
-    Categorical,
-    DiagGaussian,
-    GumbelSoftmax,
-    MultiActionDistribution,
-    MultiCategorical,
-    SquashedGaussian,
-)
 from ray.rllib.models.torch.torch_action_dist import (
     TorchBeta,
     TorchCategorical,
@@ -30,7 +21,7 @@
     SMALL_NUMBER,
     LARGE_INTEGER,
 )
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 
 tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
@@ -81,10 +72,7 @@ def _stability_test(
         dist = distribution_cls(inputs, {}, **(extra_kwargs or {}))
         for _ in range(100):
             sample = dist.sample()
-            if fw != "tf":
-                sample_check = sample.numpy()
-            else:
-                sample_check = sess.run(sample)
+            sample_check = sample.numpy()
             assert not np.any(np.isnan(sample_check))
             assert np.all(np.isfinite(sample_check))
             if bounds:
@@ -97,10 +85,7 @@ def _stability_test(
                     assert bounds[0] in sample_check
                     assert bounds[1] in sample_check
             logp = dist.logp(sample)
-            if fw != "tf":
-                logp_check = logp.numpy()
-            else:
-                logp_check = sess.run(logp)
+            logp_check = logp.numpy()
             assert not np.any(np.isnan(logp_check))
             assert np.all(np.isfinite(logp_check))
 
@@ -117,50 +102,45 @@ def test_categorical(self):
 
         inputs = inputs_space.sample()
 
-        for fw, sess in framework_iterator(session=True):
-            # Create the correct distribution object.
-            cls = Categorical if fw != "torch" else TorchCategorical
-            categorical = cls(inputs, {})
-
-            # Do a stability test using extreme NN outputs to see whether
-            # sampling and logp'ing result in NaN or +/-inf values.
-            self._stability_test(
-                cls,
-                inputs_space.shape,
-                fw=fw,
-                sess=sess,
-                bounds=(0, num_categories - 1),
-            )
+        # Create the correct distribution object.
+        cls = TorchCategorical
+        categorical = cls(inputs, {})
+
+        # Do a stability test using extreme NN outputs to see whether
+        # sampling and logp'ing result in NaN or +/-inf values.
+        self._stability_test(
+            cls,
+            inputs_space.shape,
+            fw="torch",
+            sess=None,
+            bounds=(0, num_categories - 1),
+        )
 
-            # Batch of size=3 and deterministic (True).
-            expected = np.transpose(np.argmax(inputs, axis=-1))
-            # Sample, expect always max value
-            # (max likelihood for deterministic draw).
-            out = categorical.deterministic_sample()
-            check(out, expected)
-
-            # Batch of size=3 and non-deterministic -> expect roughly the mean.
-            out = categorical.sample()
-            check(
-                tf.reduce_mean(out) if fw != "torch" else torch.mean(out.float()),
-                1.0,
-                decimals=0,
-            )
+        # Batch of size=3 and deterministic (True).
+        expected = np.transpose(np.argmax(inputs, axis=-1))
+        # Sample, expect always max value
+        # (max likelihood for deterministic draw).
+        out = categorical.deterministic_sample()
+        check(out, expected)
 
-            # Test log-likelihood outputs.
-            probs = softmax(inputs)
-            values = values_space.sample()
+        # Batch of size=3 and non-deterministic -> expect roughly the mean.
+        out = categorical.sample()
+        check(torch.mean(out.float()), 1.0, decimals=0)
 
-            out = categorical.logp(values if fw != "torch" else torch.Tensor(values))
-            expected = []
-            for i in range(batch_size):
-                expected.append(np.sum(np.log(np.array(probs[i][values[i]]))))
-            check(out, expected, decimals=4)
+        # Test log-likelihood outputs.
+        probs = softmax(inputs)
+        values = values_space.sample()
 
-            # Test entropy outputs.
-            out = categorical.entropy()
-            expected_entropy = -np.sum(probs * np.log(probs), -1)
-            check(out, expected_entropy)
+        out = categorical.logp(torch.Tensor(values))
+        expected = []
+        for i in range(batch_size):
+            expected.append(np.sum(np.log(np.array(probs[i][values[i]]))))
+        check(out, expected, decimals=4)
+
+        # Test entropy outputs.
+        out = categorical.entropy()
+        expected_entropy = -np.sum(probs * np.log(probs), -1)
+        check(out, expected_entropy)
 
     def test_multi_categorical(self):
         batch_size = 100
@@ -183,66 +163,59 @@ def test_multi_categorical(self):
         input_lengths = [num_categories] * num_sub_distributions
         inputs_split = np.split(inputs, num_sub_distributions, axis=1)
 
-        for fw, sess in framework_iterator(session=True):
-            # Create the correct distribution object.
-            cls = MultiCategorical if fw != "torch" else TorchMultiCategorical
-            multi_categorical = cls(inputs, None, input_lengths)
-
-            # Do a stability test using extreme NN outputs to see whether
-            # sampling and logp'ing result in NaN or +/-inf values.
-            self._stability_test(
-                cls,
-                inputs_space.shape,
-                fw=fw,
-                sess=sess,
-                bounds=(0, num_categories - 1),
-                extra_kwargs={"input_lens": input_lengths},
-            )
+        # Create the correct distribution object.
+        cls = TorchMultiCategorical
+        multi_categorical = cls(inputs, None, input_lengths)
+
+        # Do a stability test using extreme NN outputs to see whether
+        # sampling and logp'ing result in NaN or +/-inf values.
+        self._stability_test(
+            cls,
+            inputs_space.shape,
+            fw="torch",
+            sess=None,
+            bounds=(0, num_categories - 1),
+            extra_kwargs={"input_lens": input_lengths},
+        )
 
-            # Batch of size=3 and deterministic (True).
-            expected = np.transpose(np.argmax(inputs_split, axis=-1))
-            # Sample, expect always max value
-            # (max likelihood for deterministic draw).
-            out = multi_categorical.deterministic_sample()
-            check(out, expected)
-
-            # Batch of size=3 and non-deterministic -> expect roughly the mean.
-            out = multi_categorical.sample()
-            check(
-                tf.reduce_mean(out) if fw != "torch" else torch.mean(out.float()),
-                1.0,
-                decimals=0,
-            )
+        # Batch of size=3 and deterministic (True).
+        expected = np.transpose(np.argmax(inputs_split, axis=-1))
+        # Sample, expect always max value
+        # (max likelihood for deterministic draw).
+        out = multi_categorical.deterministic_sample()
+        check(out, expected)
+
+        # Batch of size=3 and non-deterministic -> expect roughly the mean.
+        out = multi_categorical.sample()
+        check(torch.mean(out.float()), 1.0, decimals=0)
 
-            # Test log-likelihood outputs.
-            probs = softmax(inputs_split)
-            values = values_space.sample()
-
-            out = multi_categorical.logp(
-                values
-                if fw != "torch"
-                else [torch.Tensor(values[i]) for i in range(num_sub_distributions)]
-            )  # v in np.stack(values, 1)])
-            expected = []
-            for i in range(batch_size):
-                expected.append(
-                    np.sum(
-                        np.log(
-                            np.array(
-                                [
-                                    probs[j][i][values[j][i]]
-                                    for j in range(num_sub_distributions)
-                                ]
-                            )
+        # Test log-likelihood outputs.
+        probs = softmax(inputs_split)
+        values = values_space.sample()
+
+        out = multi_categorical.logp(
+            [torch.Tensor(values[i]) for i in range(num_sub_distributions)]
+        )
+        expected = []
+        for i in range(batch_size):
+            expected.append(
+                np.sum(
+                    np.log(
+                        np.array(
+                            [
+                                probs[j][i][values[j][i]]
+                                for j in range(num_sub_distributions)
+                            ]
                         )
                     )
                 )
-            check(out, expected, decimals=4)
+            )
+        check(out, expected, decimals=4)
 
-            # Test entropy outputs.
-            out = multi_categorical.entropy()
-            expected_entropy = -np.sum(np.sum(probs * np.log(probs), 0), -1)
-            check(out, expected_entropy)
+        # Test entropy outputs.
+        out = multi_categorical.entropy()
+        expected_entropy = -np.sum(np.sum(probs * np.log(probs), 0), -1)
+        check(out, expected_entropy)
 
     def test_squashed_gaussian(self):
         """Tests the SquashedGaussian ActionDistribution for all frameworks."""
@@ -251,172 +224,138 @@ def test_squashed_gaussian(self):
 
         low, high = -2.0, 1.0
 
-        for fw, sess in framework_iterator(session=True):
-            cls = SquashedGaussian if fw != "torch" else TorchSquashedGaussian
-
-            # Do a stability test using extreme NN outputs to see whether
-            # sampling and logp'ing result in NaN or +/-inf values.
-            self._stability_test(
-                cls, input_space.shape, fw=fw, sess=sess, bounds=(low, high)
-            )
-
-            # Batch of size=n and deterministic.
-            inputs = input_space.sample()
-            means, _ = np.split(inputs, 2, axis=-1)
-            squashed_distribution = cls(inputs, {}, low=low, high=high)
-            expected = ((np.tanh(means) + 1.0) / 2.0) * (high - low) + low
-            # Sample n times, expect always mean value (deterministic draw).
-            out = squashed_distribution.deterministic_sample()
-            check(out, expected)
-
-            # Batch of size=n and non-deterministic -> expect roughly the mean.
-            inputs = input_space.sample()
-            means, log_stds = np.split(inputs, 2, axis=-1)
-            squashed_distribution = cls(inputs, {}, low=low, high=high)
-            expected = ((np.tanh(means) + 1.0) / 2.0) * (high - low) + low
-            values = squashed_distribution.sample()
-            if sess:
-                values = sess.run(values)
-            else:
-                values = values.numpy()
-            self.assertTrue(np.max(values) <= high)
-            self.assertTrue(np.min(values) >= low)
-
-            check(np.mean(values), expected.mean(), decimals=1)
+        cls = TorchSquashedGaussian
 
-            # Test log-likelihood outputs.
-            sampled_action_logp = squashed_distribution.logp(
-                values if fw != "torch" else torch.Tensor(values)
-            )
-            if sess:
-                sampled_action_logp = sess.run(sampled_action_logp)
-            else:
-                sampled_action_logp = sampled_action_logp.numpy()
-            # Convert to parameters for distr.
-            stds = np.exp(np.clip(log_stds, MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT))
-            # Unsquash values, then get log-llh from regular gaussian.
-            # atanh_in = np.clip((values - low) / (high - low) * 2.0 - 1.0,
-            #   -1.0 + SMALL_NUMBER, 1.0 - SMALL_NUMBER)
-            normed_values = (values - low) / (high - low) * 2.0 - 1.0
-            save_normed_values = np.clip(
-                normed_values, -1.0 + SMALL_NUMBER, 1.0 - SMALL_NUMBER
-            )
-            unsquashed_values = np.arctanh(save_normed_values)
-            log_prob_unsquashed = np.sum(
-                np.log(norm.pdf(unsquashed_values, means, stds)), -1
-            )
-            log_prob = log_prob_unsquashed - np.sum(
-                np.log(1 - np.tanh(unsquashed_values) ** 2), axis=-1
-            )
-            check(np.sum(sampled_action_logp), np.sum(log_prob), rtol=0.05)
+        # Do a stability test using extreme NN outputs to see whether
+        # sampling and logp'ing result in NaN or +/-inf values.
+        self._stability_test(
+            cls, input_space.shape, fw="torch", sess=None, bounds=(low, high)
+        )
 
-            # NN output.
-            means = np.array(
-                [[0.1, 0.2, 0.3, 0.4, 50.0], [-0.1, -0.2, -0.3, -0.4, -1.0]]
-            )
-            log_stds = np.array(
-                [[0.8, -0.2, 0.3, -1.0, 2.0], [0.7, -0.3, 0.4, -0.9, 2.0]]
-            )
-            squashed_distribution = cls(
-                inputs=np.concatenate([means, log_stds], axis=-1),
-                model={},
-                low=low,
-                high=high,
-            )
-            # Convert to parameters for distr.
-            stds = np.exp(log_stds)
-            # Values to get log-likelihoods for.
-            values = np.array(
-                [[0.9, 0.2, 0.4, -0.1, -1.05], [-0.9, -0.2, 0.4, -0.1, -1.05]]
-            )
+        # Batch of size=n and deterministic.
+        inputs = input_space.sample()
+        means, _ = np.split(inputs, 2, axis=-1)
+        squashed_distribution = cls(inputs, {}, low=low, high=high)
+        expected = ((np.tanh(means) + 1.0) / 2.0) * (high - low) + low
+        # Sample n times, expect always mean value (deterministic draw).
+        out = squashed_distribution.deterministic_sample()
+        check(out, expected)
+
+        # Batch of size=n and non-deterministic -> expect roughly the mean.
+        inputs = input_space.sample()
+        means, log_stds = np.split(inputs, 2, axis=-1)
+        squashed_distribution = cls(inputs, {}, low=low, high=high)
+        expected = ((np.tanh(means) + 1.0) / 2.0) * (high - low) + low
+        values = squashed_distribution.sample()
+        values = values.numpy()
+        self.assertTrue(np.max(values) <= high)
+        self.assertTrue(np.min(values) >= low)
+
+        check(np.mean(values), expected.mean(), decimals=1)
+
+        # Test log-likelihood outputs.
+        sampled_action_logp = squashed_distribution.logp(torch.Tensor(values))
+        sampled_action_logp = sampled_action_logp.numpy()
+        # Convert to parameters for distr.
+        stds = np.exp(np.clip(log_stds, MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT))
+        # Unsquash values, then get log-llh from regular gaussian.
+        # atanh_in = np.clip((values - low) / (high - low) * 2.0 - 1.0,
+        #   -1.0 + SMALL_NUMBER, 1.0 - SMALL_NUMBER)
+        normed_values = (values - low) / (high - low) * 2.0 - 1.0
+        save_normed_values = np.clip(
+            normed_values, -1.0 + SMALL_NUMBER, 1.0 - SMALL_NUMBER
+        )
+        unsquashed_values = np.arctanh(save_normed_values)
+        log_prob_unsquashed = np.sum(
+            np.log(norm.pdf(unsquashed_values, means, stds)), -1
+        )
+        log_prob = log_prob_unsquashed - np.sum(
+            np.log(1 - np.tanh(unsquashed_values) ** 2), axis=-1
+        )
+        check(np.sum(sampled_action_logp), np.sum(log_prob), rtol=0.05)
+
+        # NN output.
+        means = np.array([[0.1, 0.2, 0.3, 0.4, 50.0], [-0.1, -0.2, -0.3, -0.4, -1.0]])
+        log_stds = np.array([[0.8, -0.2, 0.3, -1.0, 2.0], [0.7, -0.3, 0.4, -0.9, 2.0]])
+        squashed_distribution = cls(
+            inputs=np.concatenate([means, log_stds], axis=-1),
+            model={},
+            low=low,
+            high=high,
+        )
+        # Convert to parameters for distr.
+        stds = np.exp(log_stds)
+        # Values to get log-likelihoods for.
+        values = np.array(
+            [[0.9, 0.2, 0.4, -0.1, -1.05], [-0.9, -0.2, 0.4, -0.1, -1.05]]
+        )
 
-            # Unsquash values, then get log-llh from regular gaussian.
-            unsquashed_values = np.arctanh((values - low) / (high - low) * 2.0 - 1.0)
-            log_prob_unsquashed = np.sum(
-                np.log(norm.pdf(unsquashed_values, means, stds)), -1
-            )
-            log_prob = log_prob_unsquashed - np.sum(
-                np.log(1 - np.tanh(unsquashed_values) ** 2), axis=-1
-            )
+        # Unsquash values, then get log-llh from regular gaussian.
+        unsquashed_values = np.arctanh((values - low) / (high - low) * 2.0 - 1.0)
+        log_prob_unsquashed = np.sum(
+            np.log(norm.pdf(unsquashed_values, means, stds)), -1
+        )
+        log_prob = log_prob_unsquashed - np.sum(
+            np.log(1 - np.tanh(unsquashed_values) ** 2), axis=-1
+        )
 
-            outs = squashed_distribution.logp(
-                values if fw != "torch" else torch.Tensor(values)
-            )
-            if sess:
-                outs = sess.run(outs)
-            check(outs, log_prob, decimals=4)
+        outs = squashed_distribution.logp(torch.Tensor(values))
+        check(outs, log_prob, decimals=4)
 
     def test_diag_gaussian(self):
         """Tests the DiagGaussian ActionDistribution for all frameworks."""
         input_space = Box(-2.0, 1.0, shape=(2000, 10))
         input_space.seed(42)
 
-        for fw, sess in framework_iterator(session=True):
-            cls = DiagGaussian if fw != "torch" else TorchDiagGaussian
-
-            # Do a stability test using extreme NN outputs to see whether
-            # sampling and logp'ing result in NaN or +/-inf values.
-            self._stability_test(cls, input_space.shape, fw=fw, sess=sess)
-
-            # Batch of size=n and deterministic.
-            inputs = input_space.sample()
-            means, _ = np.split(inputs, 2, axis=-1)
-            diag_distribution = cls(inputs, {})
-            expected = means
-            # Sample n times, expect always mean value (deterministic draw).
-            out = diag_distribution.deterministic_sample()
-            check(out, expected)
-
-            # Batch of size=n and non-deterministic -> expect roughly the mean.
-            inputs = input_space.sample()
-            means, log_stds = np.split(inputs, 2, axis=-1)
-            diag_distribution = cls(inputs, {})
-            expected = means
-            values = diag_distribution.sample()
-            if sess:
-                values = sess.run(values)
-            else:
-                values = values.numpy()
-            check(np.mean(values), expected.mean(), decimals=1)
-
-            # Test log-likelihood outputs.
-            sampled_action_logp = diag_distribution.logp(
-                values if fw != "torch" else torch.Tensor(values)
-            )
-            if sess:
-                sampled_action_logp = sess.run(sampled_action_logp)
-            else:
-                sampled_action_logp = sampled_action_logp.numpy()
-
-            # NN output.
-            means = np.array(
-                [[0.1, 0.2, 0.3, 0.4, 50.0], [-0.1, -0.2, -0.3, -0.4, -1.0]],
-                dtype=np.float32,
-            )
-            log_stds = np.array(
-                [[0.8, -0.2, 0.3, -1.0, 2.0], [0.7, -0.3, 0.4, -0.9, 2.0]],
-                dtype=np.float32,
-            )
+        cls = TorchDiagGaussian
+
+        # Do a stability test using extreme NN outputs to see whether
+        # sampling and logp'ing result in NaN or +/-inf values.
+        self._stability_test(cls, input_space.shape, fw="torch")
+
+        # Batch of size=n and deterministic.
+        inputs = input_space.sample()
+        means, _ = np.split(inputs, 2, axis=-1)
+        diag_distribution = cls(inputs, {})
+        expected = means
+        # Sample n times, expect always mean value (deterministic draw).
+        out = diag_distribution.deterministic_sample()
+        check(out, expected)
+
+        # Batch of size=n and non-deterministic -> expect roughly the mean.
+        inputs = input_space.sample()
+        means, log_stds = np.split(inputs, 2, axis=-1)
+        diag_distribution = cls(inputs, {})
+        expected = means
+        values = diag_distribution.sample()
+        values = values.numpy()
+        check(np.mean(values), expected.mean(), decimals=1)
+
+        # NN output.
+        means = np.array(
+            [[0.1, 0.2, 0.3, 0.4, 50.0], [-0.1, -0.2, -0.3, -0.4, -1.0]],
+            dtype=np.float32,
+        )
+        log_stds = np.array(
+            [[0.8, -0.2, 0.3, -1.0, 2.0], [0.7, -0.3, 0.4, -0.9, 2.0]],
+            dtype=np.float32,
+        )
 
-            diag_distribution = cls(
-                inputs=np.concatenate([means, log_stds], axis=-1), model={}
-            )
-            # Convert to parameters for distr.
-            stds = np.exp(log_stds)
-            # Values to get log-likelihoods for.
-            values = np.array(
-                [[0.9, 0.2, 0.4, -0.1, -1.05], [-0.9, -0.2, 0.4, -0.1, -1.05]]
-            )
+        diag_distribution = cls(
+            inputs=np.concatenate([means, log_stds], axis=-1), model={}
+        )
+        # Convert to parameters for distr.
+        stds = np.exp(log_stds)
+        # Values to get log-likelihoods for.
+        values = np.array(
+            [[0.9, 0.2, 0.4, -0.1, -1.05], [-0.9, -0.2, 0.4, -0.1, -1.05]]
+        )
 
-            # get log-llh from regular gaussian.
-            log_prob = np.sum(np.log(norm.pdf(values, means, stds)), -1)
+        # get log-llh from regular gaussian.
+        log_prob = np.sum(np.log(norm.pdf(values, means, stds)), -1)
 
-            outs = diag_distribution.logp(
-                values if fw != "torch" else torch.Tensor(values)
-            )
-            if sess:
-                outs = sess.run(outs)
-            check(outs, log_prob, decimals=4)
+        outs = diag_distribution.logp(torch.Tensor(values))
+        check(outs, log_prob, decimals=4)
 
     def test_beta(self):
         input_space = Box(-2.0, 1.0, shape=(2000, 10))
@@ -425,81 +364,41 @@ def test_beta(self):
         plain_beta_value_space = Box(0.0, 1.0, shape=(2000, 5))
         plain_beta_value_space.seed(42)
 
-        for fw, sess in framework_iterator(session=True):
-            cls = TorchBeta if fw == "torch" else Beta
-            inputs = input_space.sample()
-            beta_distribution = cls(inputs, {}, low=low, high=high)
-
-            inputs = beta_distribution.inputs
-            if sess:
-                inputs = sess.run(inputs)
-            else:
-                inputs = inputs.numpy()
-            alpha, beta_ = np.split(inputs, 2, axis=-1)
-
-            # Mean for a Beta distribution: 1 / [1 + (beta/alpha)]
-            expected = (1.0 / (1.0 + beta_ / alpha)) * (high - low) + low
-            # Sample n times, expect always mean value (deterministic draw).
-            out = beta_distribution.deterministic_sample()
-            check(out, expected, rtol=0.01)
-
-            # Batch of size=n and non-deterministic -> expect roughly the mean.
-            values = beta_distribution.sample()
-            if sess:
-                values = sess.run(values)
-            else:
-                values = values.numpy()
-            self.assertTrue(np.max(values) <= high)
-            self.assertTrue(np.min(values) >= low)
-
-            check(np.mean(values), expected.mean(), decimals=1)
-
-            # Test log-likelihood outputs (against scipy).
-            inputs = input_space.sample()
-            beta_distribution = cls(inputs, {}, low=low, high=high)
-            inputs = beta_distribution.inputs
-            if sess:
-                inputs = sess.run(inputs)
-            else:
-                inputs = inputs.numpy()
-            alpha, beta_ = np.split(inputs, 2, axis=-1)
-
-            values = plain_beta_value_space.sample()
-            values_scaled = values * (high - low) + low
-            if fw == "torch":
-                values_scaled = torch.Tensor(values_scaled)
-            print(values_scaled)
-            out = beta_distribution.logp(values_scaled)
-            check(out, np.sum(np.log(beta.pdf(values, alpha, beta_)), -1), rtol=0.01)
-
-            # TODO(sven): Test entropy outputs (against scipy).
-
-    def test_gumbel_softmax(self):
-        """Tests the GumbelSoftmax ActionDistribution (tf + eager only)."""
-        for fw, sess in framework_iterator(frameworks=("tf2", "tf"), session=True):
-            batch_size = 1000
-            num_categories = 5
-            input_space = Box(-1.0, 1.0, shape=(batch_size, num_categories))
-            input_space.seed(42)
-
-            # Batch of size=n and deterministic.
-            inputs = input_space.sample()
-            gumbel_softmax = GumbelSoftmax(inputs, {}, temperature=1.0)
-
-            expected = softmax(inputs)
-            # Sample n times, expect always mean value (deterministic draw).
-            out = gumbel_softmax.deterministic_sample()
-            check(out, expected)
-
-            # Batch of size=n and non-deterministic -> expect roughly that
-            # the max-likelihood (argmax) ints are output (most of the time).
-            inputs = input_space.sample()
-            gumbel_softmax = GumbelSoftmax(inputs, {}, temperature=1.0)
-            expected_mean = np.mean(np.argmax(inputs, -1)).astype(np.float32)
-            outs = gumbel_softmax.sample()
-            if sess:
-                outs = sess.run(outs)
-            check(np.mean(np.argmax(outs, -1)), expected_mean, rtol=0.08)
+        cls = TorchBeta
+        inputs = input_space.sample()
+        beta_distribution = cls(inputs, {}, low=low, high=high)
+
+        inputs = beta_distribution.inputs
+        inputs = inputs.numpy()
+        alpha, beta_ = np.split(inputs, 2, axis=-1)
+
+        # Mean for a Beta distribution: 1 / [1 + (beta/alpha)]
+        expected = (1.0 / (1.0 + beta_ / alpha)) * (high - low) + low
+        # Sample n times, expect always mean value (deterministic draw).
+        out = beta_distribution.deterministic_sample()
+        check(out, expected, rtol=0.01)
+
+        # Batch of size=n and non-deterministic -> expect roughly the mean.
+        values = beta_distribution.sample()
+        values = values.numpy()
+        self.assertTrue(np.max(values) <= high)
+        self.assertTrue(np.min(values) >= low)
+
+        check(np.mean(values), expected.mean(), decimals=1)
+
+        # Test log-likelihood outputs (against scipy).
+        inputs = input_space.sample()
+        beta_distribution = cls(inputs, {}, low=low, high=high)
+        inputs = beta_distribution.inputs
+        inputs = inputs.numpy()
+        alpha, beta_ = np.split(inputs, 2, axis=-1)
+
+        values = plain_beta_value_space.sample()
+        values_scaled = values * (high - low) + low
+        values_scaled = torch.Tensor(values_scaled)
+        print(values_scaled)
+        out = beta_distribution.logp(values_scaled)
+        check(out, np.sum(np.log(beta.pdf(values, alpha, beta_)), -1), rtol=0.01)
 
     def test_multi_action_distribution(self):
         """Tests the MultiActionDistribution (across all frameworks)."""
@@ -539,152 +438,130 @@ def test_multi_action_distribution(self):
         )
         value_space.seed(42)
 
-        for fw, sess in framework_iterator(session=True):
-            if fw == "torch":
-                cls = TorchMultiActionDistribution
-                child_distr_cls = [
-                    TorchCategorical,
-                    TorchDiagGaussian,
-                    partial(TorchBeta, low=low, high=high),
-                ]
-            else:
-                cls = MultiActionDistribution
-                child_distr_cls = [
-                    Categorical,
-                    DiagGaussian,
-                    partial(Beta, low=low, high=high),
-                ]
-
-            inputs = list(input_space.sample())
-            distr = cls(
-                np.concatenate([inputs[0], inputs[1], inputs[2]["a"]], axis=1),
-                model={},
-                action_space=value_space,
-                child_distributions=child_distr_cls,
-                input_lens=[4, 6, 4],
-            )
+        cls = TorchMultiActionDistribution
+        child_distr_cls = [
+            TorchCategorical,
+            TorchDiagGaussian,
+            partial(TorchBeta, low=low, high=high),
+        ]
 
-            # Adjust inputs for the Beta distr just as Beta itself does.
-            inputs[2]["a"] = np.clip(
-                inputs[2]["a"], np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)
-            )
-            inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0
-            # Sample deterministically.
-            expected_det = [
-                np.argmax(inputs[0], axis=-1),
-                inputs[1][:, :3],  # [:3]=Mean values.
-                # Mean for a Beta distribution:
-                # 1 / [1 + (beta/alpha)] * range + low
-                (1.0 / (1.0 + inputs[2]["a"][:, 2:] / inputs[2]["a"][:, 0:2]))
-                * (high - low)
-                + low,
-            ]
-            out = distr.deterministic_sample()
-            if sess:
-                out = sess.run(out)
-            check(out[0], expected_det[0])
-            check(out[1], expected_det[1])
-            check(out[2]["a"], expected_det[2])
-
-            # Stochastic sampling -> expect roughly the mean.
-            inputs = list(input_space.sample())
-            # Fix categorical inputs (not needed for distribution itself, but
-            # for our expectation calculations).
-            inputs[0] = softmax(inputs[0], -1)
-            # Fix std inputs (shouldn't be too large for this test).
-            inputs[1][:, 3:] = std_space.sample()
-            # Adjust inputs for the Beta distr just as Beta itself does.
-            inputs[2]["a"] = np.clip(
-                inputs[2]["a"], np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)
-            )
-            inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0
-            distr = cls(
-                np.concatenate([inputs[0], inputs[1], inputs[2]["a"]], axis=1),
-                model={},
-                action_space=value_space,
-                child_distributions=child_distr_cls,
-                input_lens=[4, 6, 4],
-            )
-            expected_mean = [
-                np.mean(np.sum(inputs[0] * np.array([0, 1, 2, 3]), -1)),
-                inputs[1][:, :3],  # [:3]=Mean values.
-                # Mean for a Beta distribution:
-                # 1 / [1 + (beta/alpha)] * range + low
-                (1.0 / (1.0 + inputs[2]["a"][:, 2:] / inputs[2]["a"][:, :2]))
-                * (high - low)
-                + low,
-            ]
-            out = distr.sample()
-            if sess:
-                out = sess.run(out)
-            out = list(out)
-            if fw == "torch":
-                out[0] = out[0].numpy()
-                out[1] = out[1].numpy()
-                out[2]["a"] = out[2]["a"].numpy()
-            check(np.mean(out[0]), expected_mean[0], decimals=1)
-            check(np.mean(out[1], 0), np.mean(expected_mean[1], 0), decimals=1)
-            check(np.mean(out[2]["a"], 0), np.mean(expected_mean[2], 0), decimals=1)
-
-            # Test log-likelihood outputs.
-            # Make sure beta-values are within 0.0 and 1.0 for the numpy
-            # calculation (which doesn't have scaling).
-            inputs = list(input_space.sample())
-            # Adjust inputs for the Beta distr just as Beta itself does.
-            inputs[2]["a"] = np.clip(
-                inputs[2]["a"], np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)
-            )
-            inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0
-            distr = cls(
-                np.concatenate([inputs[0], inputs[1], inputs[2]["a"]], axis=1),
-                model={},
-                action_space=value_space,
-                child_distributions=child_distr_cls,
-                input_lens=[4, 6, 4],
-            )
-            inputs[0] = softmax(inputs[0], -1)
-            values = list(value_space.sample())
-            log_prob_beta = np.log(
-                beta.pdf(values[2]["a"], inputs[2]["a"][:, :2], inputs[2]["a"][:, 2:])
-            )
-            # Now do the up-scaling for [2] (beta values) to be between
-            # low/high.
-            values[2]["a"] = values[2]["a"] * (high - low) + low
-            inputs[1][:, 3:] = np.exp(inputs[1][:, 3:])
-            expected_log_llh = np.sum(
-                np.concatenate(
-                    [
-                        np.expand_dims(
-                            np.log([i[values[0][j]] for j, i in enumerate(inputs[0])]),
-                            -1,
-                        ),
-                        np.log(norm.pdf(values[1], inputs[1][:, :3], inputs[1][:, 3:])),
-                        log_prob_beta,
-                    ],
-                    -1,
-                ),
+        inputs = list(input_space.sample())
+        distr = cls(
+            np.concatenate([inputs[0], inputs[1], inputs[2]["a"]], axis=1),
+            model={},
+            action_space=value_space,
+            child_distributions=child_distr_cls,
+            input_lens=[4, 6, 4],
+        )
+
+        # Adjust inputs for the Beta distr just as Beta itself does.
+        inputs[2]["a"] = np.clip(
+            inputs[2]["a"], np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)
+        )
+        inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0
+        # Sample deterministically.
+        expected_det = [
+            np.argmax(inputs[0], axis=-1),
+            inputs[1][:, :3],  # [:3]=Mean values.
+            # Mean for a Beta distribution:
+            # 1 / [1 + (beta/alpha)] * range + low
+            (1.0 / (1.0 + inputs[2]["a"][:, 2:] / inputs[2]["a"][:, 0:2]))
+            * (high - low)
+            + low,
+        ]
+        out = distr.deterministic_sample()
+        check(out[0], expected_det[0])
+        check(out[1], expected_det[1])
+        check(out[2]["a"], expected_det[2])
+
+        # Stochastic sampling -> expect roughly the mean.
+        inputs = list(input_space.sample())
+        # Fix categorical inputs (not needed for distribution itself, but
+        # for our expectation calculations).
+        inputs[0] = softmax(inputs[0], -1)
+        # Fix std inputs (shouldn't be too large for this test).
+        inputs[1][:, 3:] = std_space.sample()
+        # Adjust inputs for the Beta distr just as Beta itself does.
+        inputs[2]["a"] = np.clip(
+            inputs[2]["a"], np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)
+        )
+        inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0
+        distr = cls(
+            np.concatenate([inputs[0], inputs[1], inputs[2]["a"]], axis=1),
+            model={},
+            action_space=value_space,
+            child_distributions=child_distr_cls,
+            input_lens=[4, 6, 4],
+        )
+        expected_mean = [
+            np.mean(np.sum(inputs[0] * np.array([0, 1, 2, 3]), -1)),
+            inputs[1][:, :3],  # [:3]=Mean values.
+            # Mean for a Beta distribution:
+            # 1 / [1 + (beta/alpha)] * range + low
+            (1.0 / (1.0 + inputs[2]["a"][:, 2:] / inputs[2]["a"][:, :2])) * (high - low)
+            + low,
+        ]
+        out = distr.sample()
+        out = list(out)
+        out[0] = out[0].numpy()
+        out[1] = out[1].numpy()
+        out[2]["a"] = out[2]["a"].numpy()
+        check(np.mean(out[0]), expected_mean[0], decimals=1)
+        check(np.mean(out[1], 0), np.mean(expected_mean[1], 0), decimals=1)
+        check(np.mean(out[2]["a"], 0), np.mean(expected_mean[2], 0), decimals=1)
+
+        # Test log-likelihood outputs.
+        # Make sure beta-values are within 0.0 and 1.0 for the numpy
+        # calculation (which doesn't have scaling).
+        inputs = list(input_space.sample())
+        # Adjust inputs for the Beta distr just as Beta itself does.
+        inputs[2]["a"] = np.clip(
+            inputs[2]["a"], np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)
+        )
+        inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0
+        distr = cls(
+            np.concatenate([inputs[0], inputs[1], inputs[2]["a"]], axis=1),
+            model={},
+            action_space=value_space,
+            child_distributions=child_distr_cls,
+            input_lens=[4, 6, 4],
+        )
+        inputs[0] = softmax(inputs[0], -1)
+        values = list(value_space.sample())
+        log_prob_beta = np.log(
+            beta.pdf(values[2]["a"], inputs[2]["a"][:, :2], inputs[2]["a"][:, 2:])
+        )
+        # Now do the up-scaling for [2] (beta values) to be between
+        # low/high.
+        values[2]["a"] = values[2]["a"] * (high - low) + low
+        inputs[1][:, 3:] = np.exp(inputs[1][:, 3:])
+        expected_log_llh = np.sum(
+            np.concatenate(
+                [
+                    np.expand_dims(
+                        np.log([i[values[0][j]] for j, i in enumerate(inputs[0])]),
+                        -1,
+                    ),
+                    np.log(norm.pdf(values[1], inputs[1][:, :3], inputs[1][:, 3:])),
+                    log_prob_beta,
+                ],
                 -1,
-            )
+            ),
+            -1,
+        )
 
-            values[0] = np.expand_dims(values[0], -1)
-            if fw == "torch":
-                values = tree.map_structure(lambda s: torch.Tensor(s), values)
-            # Test all flattened input.
-            concat = np.concatenate(tree.flatten(values), -1).astype(np.float32)
-            out = distr.logp(concat)
-            if sess:
-                out = sess.run(out)
-            check(out, expected_log_llh, atol=15)
-            # Test structured input.
-            out = distr.logp(values)
-            if sess:
-                out = sess.run(out)
-            check(out, expected_log_llh, atol=15)
-            # Test flattened input.
-            out = distr.logp(tree.flatten(values))
-            if sess:
-                out = sess.run(out)
-            check(out, expected_log_llh, atol=15)
+        values[0] = np.expand_dims(values[0], -1)
+        values = tree.map_structure(lambda s: torch.Tensor(s), values)
+        # Test all flattened input.
+        concat = np.concatenate(tree.flatten(values), -1).astype(np.float32)
+        out = distr.logp(concat)
+        check(out, expected_log_llh, atol=15)
+        # Test structured input.
+        out = distr.logp(values)
+        check(out, expected_log_llh, atol=15)
+        # Test flattened input.
+        out = distr.logp(tree.flatten(values))
+        check(out, expected_log_llh, atol=15)
 
 
 if __name__ == "__main__":
diff --git a/rllib/models/tests/test_attention_nets.py b/rllib/models/tests/test_attention_nets.py
index a12ef8bf05fa..1ccc216aec3c 100644
--- a/rllib/models/tests/test_attention_nets.py
+++ b/rllib/models/tests/test_attention_nets.py
@@ -13,7 +13,6 @@
     EPISODE_RETURN_MEAN,
     NUM_ENV_STEPS_SAMPLED_LIFETIME,
 )
-from ray.rllib.utils.test_utils import framework_iterator
 
 
 class TestAttentionNets(unittest.TestCase):
@@ -75,12 +74,11 @@ def test_attention_nets_w_prev_actions_and_prev_rewards(self):
             "rollout_fragment_length": 100,
             "num_env_runners": 1,
         }
-        for _ in framework_iterator(config):
-            tune.Tuner(
-                "PPO",
-                param_space=config,
-                run_config=air.RunConfig(stop={TRAINING_ITERATION: 1}, verbose=1),
-            ).fit()
+        tune.Tuner(
+            "PPO",
+            param_space=config,
+            run_config=air.RunConfig(stop={TRAINING_ITERATION: 1}, verbose=1),
+        ).fit()
 
     def test_ppo_attention_net_learning(self):
         ModelCatalog.register_custom_model("attention_net", GTrXLNet)
diff --git a/rllib/models/tests/test_conv2d_default_stacks.py b/rllib/models/tests/test_conv2d_default_stacks.py
index 890ec45c6c0f..4cbafb7adbd5 100644
--- a/rllib/models/tests/test_conv2d_default_stacks.py
+++ b/rllib/models/tests/test_conv2d_default_stacks.py
@@ -4,11 +4,9 @@
 from ray.rllib.models.catalog import ModelCatalog, MODEL_DEFAULTS
 from ray.rllib.models.tf.visionnet import VisionNetwork
 from ray.rllib.models.torch.visionnet import VisionNetwork as TorchVision
-from ray.rllib.utils.framework import try_import_torch, try_import_tf
-from ray.rllib.utils.test_utils import framework_iterator
+from ray.rllib.utils.framework import try_import_torch
 
 torch, nn = try_import_torch()
-tf1, tf, tfv = try_import_tf()
 
 
 class TestConv2DDefaultStacks(unittest.TestCase):
@@ -27,20 +25,14 @@ def test_conv2d_default_stacks(self):
         for shape in shapes:
             print(f"shape={shape}")
             obs_space = gym.spaces.Box(-1.0, 1.0, shape=shape)
-            for fw in framework_iterator():
-                model = ModelCatalog.get_model_v2(
-                    obs_space, action_space, 2, MODEL_DEFAULTS.copy(), framework=fw
-                )
-                self.assertTrue(isinstance(model, (VisionNetwork, TorchVision)))
-                if fw == "torch":
-                    output, _ = model(
-                        {"obs": torch.from_numpy(obs_space.sample()[None])}
-                    )
-                else:
-                    output, _ = model({"obs": obs_space.sample()[None]})
-                # B x [action logits]
-                self.assertTrue(output.shape == (1, 2))
-                print("ok")
+            model = ModelCatalog.get_model_v2(
+                obs_space, action_space, 2, MODEL_DEFAULTS.copy(), framework="torch"
+            )
+            self.assertTrue(isinstance(model, (VisionNetwork, TorchVision)))
+            output, _ = model({"obs": torch.from_numpy(obs_space.sample()[None])})
+            # B x [action logits]
+            self.assertTrue(output.shape == (1, 2))
+            print("ok")
 
 
 if __name__ == "__main__":
diff --git a/rllib/models/tests/test_lstms.py b/rllib/models/tests/test_lstms.py
index 53351f4ab506..c8d204b395e5 100644
--- a/rllib/models/tests/test_lstms.py
+++ b/rllib/models/tests/test_lstms.py
@@ -6,7 +6,6 @@
 from ray.air.constants import TRAINING_ITERATION
 from ray.rllib.algorithms import ppo
 from ray.rllib.examples.envs.classes.random_env import RandomEnv
-from ray.rllib.utils.test_utils import framework_iterator
 
 
 class TestLSTMs(unittest.TestCase):
@@ -63,12 +62,11 @@ def test_lstm_w_prev_action_and_prev_reward(self):
             )
         )
 
-        for _ in framework_iterator(config):
-            tune.Tuner(
-                "PPO",
-                param_space=config.to_dict(),
-                run_config=air.RunConfig(stop={TRAINING_ITERATION: 1}, verbose=1),
-            ).fit()
+        tune.Tuner(
+            "PPO",
+            param_space=config.to_dict(),
+            run_config=air.RunConfig(stop={TRAINING_ITERATION: 1}, verbose=1),
+        ).fit()
 
 
 if __name__ == "__main__":
diff --git a/rllib/models/tests/test_preprocessors.py b/rllib/models/tests/test_preprocessors.py
index 05d736945ed7..64b0836caec6 100644
--- a/rllib/models/tests/test_preprocessors.py
+++ b/rllib/models/tests/test_preprocessors.py
@@ -20,7 +20,6 @@
     check,
     check_compute_single_action,
     check_train_results,
-    framework_iterator,
 )
 from ray.rllib.utils.framework import try_import_tf
 
@@ -60,12 +59,11 @@ def test_rlms_and_preprocessing(self):
             .experimental(_disable_preprocessor_api=True)
         )
 
-        for _ in framework_iterator(config, frameworks=("torch", "tf2")):
-            algo = config.build()
-            results = algo.train()
-            check_train_results(results)
-            check_compute_single_action(algo)
-            algo.stop()
+        algo = config.build()
+        results = algo.train()
+        check_train_results(results)
+        check_compute_single_action(algo)
+        algo.stop()
 
     def test_preprocessing_disabled_modelv2(self):
         config = (
@@ -107,15 +105,13 @@ def test_preprocessing_disabled_modelv2(self):
         # input space.
 
         num_iterations = 1
-        # Only supported for tf so far.
-        for _ in framework_iterator(config):
-            algo = config.build()
-            for i in range(num_iterations):
-                results = algo.train()
-                check_train_results(results)
-                print(results)
-            check_compute_single_action(algo)
-            algo.stop()
+        algo = config.build()
+        for i in range(num_iterations):
+            results = algo.train()
+            check_train_results(results)
+            print(results)
+        check_compute_single_action(algo)
+        algo.stop()
 
     def test_gym_preprocessors(self):
         p1 = ModelCatalog.get_preprocessor(gym.make("CartPole-v1"))
diff --git a/rllib/policy/tests/test_compute_log_likelihoods.py b/rllib/policy/tests/test_compute_log_likelihoods.py
index c13d0bbfd561..9a60bab41603 100644
--- a/rllib/policy/tests/test_compute_log_likelihoods.py
+++ b/rllib/policy/tests/test_compute_log_likelihoods.py
@@ -7,18 +7,14 @@
 import ray.rllib.algorithms.dqn as dqn
 import ray.rllib.algorithms.ppo as ppo
 import ray.rllib.algorithms.sac as sac
-from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.numpy import MAX_LOG_NN_OUTPUT, MIN_LOG_NN_OUTPUT, fc, one_hot
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 
-tf1, tf, tfv = try_import_tf()
 
-
-def _get_expected_logp(fw, vars, obs_batch, a, layer_key, logp_func=None):
+def _get_expected_logp(vars, obs_batch, a, layer_key, logp_func=None):
     """Get the expected logp for the given obs_batch and action.
 
     Args:
-        fw: Framework ("tf" or "torch").
         vars: The ModelV2 weights.
         obs_batch: The observation batch.
         a: The action batch.
@@ -28,29 +24,15 @@ def _get_expected_logp(fw, vars, obs_batch, a, layer_key, logp_func=None):
     Returns:
         The expected logp.
     """
-    if fw != "torch":
-        if isinstance(vars, list):
-            expected_mean_logstd = fc(
-                fc(obs_batch, vars[layer_key[1][0]]), vars[layer_key[1][1]]
-            )
-        else:
-            expected_mean_logstd = fc(
-                fc(
-                    obs_batch,
-                    vars["default_policy/{}_1/kernel".format(layer_key[0])],
-                ),
-                vars["default_policy/{}_out/kernel".format(layer_key[0])],
-            )
-    else:
-        expected_mean_logstd = fc(
-            fc(
-                obs_batch,
-                vars["{}_model.0.weight".format(layer_key[2][0])],
-                framework=fw,
-            ),
-            vars["{}_model.0.weight".format(layer_key[2][1])],
-            framework=fw,
-        )
+    expected_mean_logstd = fc(
+        fc(
+            obs_batch,
+            vars["{}_model.0.weight".format(layer_key[2][0])],
+            framework="torch",
+        ),
+        vars["{}_model.0.weight".format(layer_key[2][1])],
+        framework="torch",
+    )
     mean, log_std = np.split(expected_mean_logstd, 2, axis=-1)
     if logp_func is None:
         expected_logp = np.log(norm.pdf(a, mean, np.exp(log_std)))
@@ -84,66 +66,64 @@ def do_test_log_likelihood(
 
     prev_r = None if prev_a is None else np.array(0.0)
 
-    # Test against all frameworks.
-    for fw in framework_iterator(config):
-        algo = config.build()
-
-        policy = algo.get_policy()
-        vars = policy.get_weights()
-        # Sample n actions, then roughly check their logp against their
-        # counts.
-        num_actions = 1000 if not continuous else 50
-        actions = []
-        for _ in range(num_actions):
-            # Single action from single obs.
-            actions.append(
-                algo.compute_single_action(
-                    obs_batch[0],
-                    prev_action=prev_a,
-                    prev_reward=prev_r,
-                    explore=True,
-                    # Do not unsquash actions
-                    # (remain in normalized [-1.0; 1.0] space).
-                    unsquash_action=False,
-                )
+    algo = config.build()
+
+    policy = algo.get_policy()
+    vars = policy.get_weights()
+    # Sample n actions, then roughly check their logp against their
+    # counts.
+    num_actions = 1000 if not continuous else 50
+    actions = []
+    for _ in range(num_actions):
+        # Single action from single obs.
+        actions.append(
+            algo.compute_single_action(
+                obs_batch[0],
+                prev_action=prev_a,
+                prev_reward=prev_r,
+                explore=True,
+                # Do not unsquash actions
+                # (remain in normalized [-1.0; 1.0] space).
+                unsquash_action=False,
             )
+        )
 
-        # Test all taken actions for their log-likelihoods vs expected values.
-        if continuous:
-            for idx in range(num_actions):
-                a = actions[idx]
-
-                logp = policy.compute_log_likelihoods(
-                    np.array([a]),
-                    preprocessed_obs_batch,
-                    prev_action_batch=np.array([prev_a]) if prev_a else None,
-                    prev_reward_batch=np.array([prev_r]) if prev_r else None,
-                    actions_normalized=True,
-                    in_training=False,
-                )
+    # Test all taken actions for their log-likelihoods vs expected values.
+    if continuous:
+        for idx in range(num_actions):
+            a = actions[idx]
+
+            logp = policy.compute_log_likelihoods(
+                np.array([a]),
+                preprocessed_obs_batch,
+                prev_action_batch=np.array([prev_a]) if prev_a else None,
+                prev_reward_batch=np.array([prev_r]) if prev_r else None,
+                actions_normalized=True,
+                in_training=False,
+            )
 
-                # The expected logp computation logic is overfitted to the ModelV2
-                # stack and does not generalize to RLModule API.
-                if not config.enable_rl_module_and_learner:
-                    expected_logp = _get_expected_logp(
-                        fw, vars, obs_batch, a, layer_key, logp_func
-                    )
-                    check(logp, expected_logp[0], rtol=0.2)
-        # Test all available actions for their logp values.
-        else:
-            for a in [0, 1, 2, 3]:
-                count = actions.count(a)
-                expected_prob = count / num_actions
-                logp = policy.compute_log_likelihoods(
-                    np.array([a]),
-                    preprocessed_obs_batch,
-                    prev_action_batch=np.array([prev_a]) if prev_a else None,
-                    prev_reward_batch=np.array([prev_r]) if prev_r else None,
-                    in_training=False,
+            # The expected logp computation logic is overfitted to the ModelV2
+            # stack and does not generalize to RLModule API.
+            if not config.enable_rl_module_and_learner:
+                expected_logp = _get_expected_logp(
+                    vars, obs_batch, a, layer_key, logp_func
                 )
+                check(logp, expected_logp[0], rtol=0.2)
+    # Test all available actions for their logp values.
+    else:
+        for a in [0, 1, 2, 3]:
+            count = actions.count(a)
+            expected_prob = count / num_actions
+            logp = policy.compute_log_likelihoods(
+                np.array([a]),
+                preprocessed_obs_batch,
+                prev_action_batch=np.array([prev_a]) if prev_a else None,
+                prev_reward_batch=np.array([prev_r]) if prev_r else None,
+                in_training=False,
+            )
 
-                if not config.enable_rl_module_and_learner:
-                    check(np.exp(logp), expected_prob, atol=0.2)
+            if not config.enable_rl_module_and_learner:
+                check(np.exp(logp), expected_prob, atol=0.2)
 
 
 class TestComputeLogLikelihood(unittest.TestCase):
diff --git a/rllib/policy/tests/test_export_checkpoint_and_model.py b/rllib/policy/tests/test_export_checkpoint_and_model.py
index 32eaa654e00f..67f31b37e58c 100644
--- a/rllib/policy/tests/test_export_checkpoint_and_model.py
+++ b/rllib/policy/tests/test_export_checkpoint_and_model.py
@@ -8,11 +8,9 @@
 import ray
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.test_utils import framework_iterator
+from ray.rllib.utils.framework import try_import_torch
 from ray.tune.registry import get_trainable_cls
 
-tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 # Keep a set of all RLlib algos that support the RLModule API.
@@ -25,7 +23,6 @@ def export_test(
     alg_name,
     framework="tf",
     multi_agent=False,
-    tf_expected_to_work=True,
 ):
     cls = get_trainable_cls(alg_name)
     config = cls.get_default_config()
@@ -83,20 +80,6 @@ def export_test(
         assert results[0].shape in [(1, 2), (1, 3), (1, 256)], results[0].shape
         assert results[1] == [torch.tensor(0)]  # dummy
 
-    # Only if keras model gets properly saved by the Policy's export_model() method.
-    # NOTE: This is not the case (yet) for TF Policies like SAC, which use ModelV2s
-    # that have more than one keras "base_model" properties in them. For example,
-    # SACTfModel contains `q_net` and `action_model`, both of which have their own
-    # `base_model`.
-    elif tf_expected_to_work:
-        model = tf.saved_model.load(os.path.join(export_dir, "model"))
-        assert model
-        results = model(tf.convert_to_tensor(test_obs, dtype=tf.float32))
-        assert len(results) == 2
-        assert results[0].shape in [(1, 2), (1, 3), (1, 256)], results[0].shape
-        # TODO (sven): Make non-RNN models NOT return states (empty list).
-        assert results[1].shape == (1, 1), results[1].shape  # dummy state-out
-
     shutil.rmtree(export_dir)
 
     print("Exporting policy (`default_policy`) model ", alg_name, export_dir)
@@ -124,20 +107,6 @@ def export_test(
         assert results[0].shape in [(1, 2), (1, 3), (1, 256)], results[0].shape
         assert results[1] == [torch.tensor(0)]  # dummy
 
-    # Only if keras model gets properly saved by the Policy's export_model() method.
-    # NOTE: This is not the case (yet) for TF Policies like SAC, which use ModelV2s
-    # that have more than one keras "base_model" properties in them. For example,
-    # SACTfModel contains `q_net` and `action_model`, both of which have their own
-    # `base_model`.
-    elif tf_expected_to_work:
-        model = tf.saved_model.load(export_dir)
-        assert model
-        results = model(tf.convert_to_tensor(test_obs, dtype=tf.float32))
-        assert len(results) == 2
-        assert results[0].shape in [(1, 2), (1, 3), (1, 256)], results[0].shape
-        # TODO (sven): Make non-RNN models NOT return states (empty list).
-        assert results[1].shape == (1, 1), results[1].shape  # dummy state-out
-
     if os.path.exists(export_dir):
         shutil.rmtree(export_dir)
         if multi_agent:
@@ -156,20 +125,16 @@ def tearDownClass(cls) -> None:
         ray.shutdown()
 
     def test_export_appo(self):
-        for fw in framework_iterator():
-            export_test("APPO", fw)
+        export_test("APPO", "torch")
 
     def test_export_ppo(self):
-        for fw in framework_iterator():
-            export_test("PPO", fw)
+        export_test("PPO", "torch")
 
     def test_export_ppo_multi_agent(self):
-        for fw in framework_iterator():
-            export_test("PPO", fw, multi_agent=True)
+        export_test("PPO", "torch", multi_agent=True)
 
     def test_export_sac(self):
-        for fw in framework_iterator():
-            export_test("SAC", fw, tf_expected_to_work=False)
+        export_test("SAC", "torch")
 
 
 if __name__ == "__main__":
diff --git a/rllib/policy/tests/test_policy.py b/rllib/policy/tests/test_policy.py
index 6bd09c6e8ff3..751fc1c3ab03 100644
--- a/rllib/policy/tests/test_policy.py
+++ b/rllib/policy/tests/test_policy.py
@@ -6,7 +6,7 @@
 from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2
 from ray.rllib.policy.policy import Policy
 from ray.rllib.policy.torch_policy_v2 import TorchPolicyV2
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 
 
 class TestPolicy(unittest.TestCase):
@@ -20,40 +20,38 @@ def tearDownClass(cls) -> None:
 
     def test_policy_get_and_set_state(self):
         config = PPOConfig().environment("CartPole-v1")
-        for fw in framework_iterator(config):
-            algo = config.build()
-            policy = algo.get_policy()
-            state1 = policy.get_state()
-            algo.train()
-            state2 = policy.get_state()
-            check(state1["global_timestep"], state2["global_timestep"], false=True)
-
-            # Reset policy to its original state and compare.
-            policy.set_state(state1)
-            state3 = policy.get_state()
-            # Make sure everything is the same.
+        algo = config.build()
+        policy = algo.get_policy()
+        state1 = policy.get_state()
+        algo.train()
+        state2 = policy.get_state()
+        check(state1["global_timestep"], state2["global_timestep"], false=True)
+
+        # Reset policy to its original state and compare.
+        policy.set_state(state1)
+        state3 = policy.get_state()
+        # Make sure everything is the same.
+        # This is only supported without RLModule API. See AlgorithmConfig for
+        # more info.
+        if not config.enable_rl_module_and_learner:
+            check(state1["_exploration_state"], state3["_exploration_state"])
+        check(state1["global_timestep"], state3["global_timestep"])
+        check(state1["weights"], state3["weights"])
+
+        # Create a new Policy only from state (which could be part of an algorithm's
+        # checkpoint). This would allow users to restore a policy w/o having access
+        # to the original code (e.g. the config, policy class used, etc..).
+        if isinstance(policy, (EagerTFPolicyV2, DynamicTFPolicyV2, TorchPolicyV2)):
+            policy_restored_from_scratch = Policy.from_state(state3)
+            state4 = policy_restored_from_scratch.get_state()
             # This is only supported without RLModule API. See AlgorithmConfig for
             # more info.
             if not config.enable_rl_module_and_learner:
-                check(state1["_exploration_state"], state3["_exploration_state"])
-            check(state1["global_timestep"], state3["global_timestep"])
-            check(state1["weights"], state3["weights"])
-
-            # Create a new Policy only from state (which could be part of an algorithm's
-            # checkpoint). This would allow users to restore a policy w/o having access
-            # to the original code (e.g. the config, policy class used, etc..).
-            if isinstance(policy, (EagerTFPolicyV2, DynamicTFPolicyV2, TorchPolicyV2)):
-                policy_restored_from_scratch = Policy.from_state(state3)
-                state4 = policy_restored_from_scratch.get_state()
-                # This is only supported without RLModule API. See AlgorithmConfig for
-                # more info.
-                if not config.enable_rl_module_and_learner:
-                    check(state3["_exploration_state"], state4["_exploration_state"])
-                check(state3["global_timestep"], state4["global_timestep"])
-                # For tf static graph, the new model has different layer names
-                # (as it gets written into the same graph as the old one).
-                if fw != "tf":
-                    check(state3["weights"], state4["weights"])
+                check(state3["_exploration_state"], state4["_exploration_state"])
+            check(state3["global_timestep"], state4["global_timestep"])
+            # For tf static graph, the new model has different layer names
+            # (as it gets written into the same graph as the old one).
+            check(state3["weights"], state4["weights"])
 
 
 if __name__ == "__main__":
diff --git a/rllib/policy/tests/test_policy_checkpoint_restore.py b/rllib/policy/tests/test_policy_checkpoint_restore.py
index cc7598dc7710..93449c550fd4 100644
--- a/rllib/policy/tests/test_policy_checkpoint_restore.py
+++ b/rllib/policy/tests/test_policy_checkpoint_restore.py
@@ -10,7 +10,6 @@
 
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.policy import Policy
-from ray.rllib.utils.test_utils import framework_iterator
 
 
 def _do_checkpoint_twice_test(framework):
@@ -20,25 +19,24 @@ def _do_checkpoint_twice_test(framework):
         .env_runners(num_env_runners=0)
         .evaluation(evaluation_num_env_runners=0)
     )
-    for fw in framework_iterator(config, frameworks=[framework]):
-        algo1 = config.build(env="CartPole-v1")
-        algo2 = config.build(env="Pendulum-v1")
+    algo1 = config.build(env="CartPole-v1")
+    algo2 = config.build(env="Pendulum-v1")
 
-        algo1.train()
-        algo2.train()
+    algo1.train()
+    algo2.train()
 
-        policy1 = algo1.get_policy()
-        policy1.export_checkpoint("/tmp/test_policy_from_checkpoint_twice_p_1")
+    policy1 = algo1.get_policy()
+    policy1.export_checkpoint("/tmp/test_policy_from_checkpoint_twice_p_1")
 
-        policy2 = algo2.get_policy()
-        policy2.export_checkpoint("/tmp/test_policy_from_checkpoint_twice_p_2")
+    policy2 = algo2.get_policy()
+    policy2.export_checkpoint("/tmp/test_policy_from_checkpoint_twice_p_2")
 
-        algo1.stop()
-        algo2.stop()
+    algo1.stop()
+    algo2.stop()
 
-        # Create two policies from different checkpoints
-        Policy.from_checkpoint("/tmp/test_policy_from_checkpoint_twice_p_1")
-        Policy.from_checkpoint("/tmp/test_policy_from_checkpoint_twice_p_2")
+    # Create two policies from different checkpoints
+    Policy.from_checkpoint("/tmp/test_policy_from_checkpoint_twice_p_1")
+    Policy.from_checkpoint("/tmp/test_policy_from_checkpoint_twice_p_2")
 
 
 class TestPolicyFromCheckpoint(unittest.TestCase):
@@ -50,12 +48,6 @@ def setUpClass(cls) -> None:
     def tearDownClass(cls) -> None:
         ray.shutdown()
 
-    def test_policy_from_checkpoint_twice_tf(self):
-        return _do_checkpoint_twice_test("tf")
-
-    def test_policy_from_checkpoint_twice_tf2(self):
-        return _do_checkpoint_twice_test("tf2")
-
     def test_policy_from_checkpoint_twice_torch(self):
         return _do_checkpoint_twice_test("torch")
 
diff --git a/rllib/policy/tests/test_policy_state_swapping.py b/rllib/policy/tests/test_policy_state_swapping.py
index ed1328d6c1b1..ca60bb0a58fd 100644
--- a/rllib/policy/tests/test_policy_state_swapping.py
+++ b/rllib/policy/tests/test_policy_state_swapping.py
@@ -4,19 +4,11 @@
 import unittest
 
 import ray
-from ray.rllib.algorithms.appo import (
-    APPOConfig,
-    APPOTF1Policy,
-    APPOTF2Policy,
-    APPOTorchPolicy,
-)
+from ray.rllib.algorithms.appo import APPOConfig, APPOTorchPolicy
 from ray.rllib.policy.policy_map import PolicyMap
-from ray.rllib.utils.framework import try_import_tf
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 from ray.rllib.utils.tf_utils import get_tf_eager_cls_if_necessary
 
-tf1, tf, tfv = try_import_tf()
-
 
 class TestPolicyStateSwapping(unittest.TestCase):
     """Tests, whether Policies' states can be swapped out via their state on a GPU."""
@@ -34,9 +26,6 @@ def test_policy_swap_gpu(self):
             APPOConfig()
             # Use a single GPU for this test.
             .resources(num_gpus=1)
-            # Set eager tracing to True here, such that the framework_iterator loop
-            # below skips tf2 w/o tracing (loops through tf, tf2+tracing, and torch).
-            .framework("tf2")
         )
         obs_space = gym.spaces.Box(-1.0, 1.0, (4,), dtype=np.float32)
         dummy_obs = obs_space.sample()
@@ -44,99 +33,88 @@ def test_policy_swap_gpu(self):
         num_policies = 2
         capacity = 1
 
-        for fw in framework_iterator(config):
-            cls = get_tf_eager_cls_if_necessary(
-                APPOTF2Policy
-                if fw == "tf2"
-                else APPOTF1Policy
-                if fw == "tf"
-                else APPOTorchPolicy,
-                config,
-            )
+        cls = get_tf_eager_cls_if_necessary(APPOTorchPolicy, config)
 
-            # Create empty, swappable-policies PolicyMap.
-            policy_map = PolicyMap(capacity=capacity, policy_states_are_swappable=True)
-
-            # Create and add some TF2 policies.
-            for i in range(num_policies):
-                config.training(lr=(i + 1) * 0.01)
-                with tf1.variable_scope(f"Policy{i}"):
-                    policy = cls(
-                        observation_space=obs_space,
-                        action_space=act_space,
-                        config=config.to_dict(),
-                    )
-                policy_map[f"pol{i}"] = policy
-
-            # Create a dummy batch with all 1.0s in it (instead of zeros), so we have a
-            # better chance of changing our weights during an update.
-            dummy_batch_ones = tree.map_structure(
-                lambda s: np.ones_like(s),
-                policy_map["pol0"]._dummy_batch,
+        # Create empty, swappable-policies PolicyMap.
+        policy_map = PolicyMap(capacity=capacity, policy_states_are_swappable=True)
+
+        # Create and add some TF2 policies.
+        for i in range(num_policies):
+            config.training(lr=(i + 1) * 0.01)
+            policy = cls(
+                observation_space=obs_space,
+                action_space=act_space,
+                config=config.to_dict(),
             )
-            dummy_batch_twos = tree.map_structure(
-                lambda s: np.full_like(s, 2.0),
-                policy_map["pol0"]._dummy_batch,
+            policy_map[f"pol{i}"] = policy
+
+        # Create a dummy batch with all 1.0s in it (instead of zeros), so we have a
+        # better chance of changing our weights during an update.
+        dummy_batch_ones = tree.map_structure(
+            lambda s: np.ones_like(s),
+            policy_map["pol0"]._dummy_batch,
+        )
+        dummy_batch_twos = tree.map_structure(
+            lambda s: np.full_like(s, 2.0),
+            policy_map["pol0"]._dummy_batch,
+        )
+
+        logits = {
+            pid: p.compute_single_action(dummy_obs)[2]["action_dist_inputs"]
+            for pid, p in policy_map.items()
+        }
+        # Make sure policies output different deterministic actions. Otherwise,
+        # this test would not work.
+        check(logits["pol0"], logits["pol1"], atol=0.0000001, false=True)
+
+        # Test proper policy state swapping.
+        for i in range(50):
+            pid = f"pol{i % num_policies}"
+            print(i)
+            pol = policy_map[pid]
+            # Make sure config has been changed properly.
+            self.assertTrue(pol.config["lr"] == ((i % num_policies) + 1) * 0.01)
+            # After accessing `pid`, assume it's the most recently accessed
+            # item now.
+            self.assertTrue(policy_map._deque[-1] == pid)
+            self.assertTrue(len(policy_map._deque) == capacity)
+            self.assertTrue(len(policy_map.cache) == capacity)
+            self.assertTrue(pid in policy_map.cache)
+            # Actually compute one action to trigger tracing operations of
+            # the graph. These may be performed lazily by the DL framework.
+            check(
+                pol.compute_single_action(dummy_obs)[2]["action_dist_inputs"],
+                logits[pid],
             )
 
-            logits = {
-                pid: p.compute_single_action(dummy_obs)[2]["action_dist_inputs"]
-                for pid, p in policy_map.items()
-            }
-            # Make sure policies output different deterministic actions. Otherwise,
-            # this test would not work.
-            check(logits["pol0"], logits["pol1"], atol=0.0000001, false=True)
-
-            # Test proper policy state swapping.
-            for i in range(50):
-                pid = f"pol{i % num_policies}"
-                print(i)
-                pol = policy_map[pid]
-                # Make sure config has been changed properly.
-                self.assertTrue(pol.config["lr"] == ((i % num_policies) + 1) * 0.01)
-                # After accessing `pid`, assume it's the most recently accessed
-                # item now.
-                self.assertTrue(policy_map._deque[-1] == pid)
-                self.assertTrue(len(policy_map._deque) == capacity)
-                self.assertTrue(len(policy_map.cache) == capacity)
-                self.assertTrue(pid in policy_map.cache)
-                # Actually compute one action to trigger tracing operations of
-                # the graph. These may be performed lazily by the DL framework.
-                check(
-                    pol.compute_single_action(dummy_obs)[2]["action_dist_inputs"],
-                    logits[pid],
-                )
-
-            # Test, whether training (on the GPU) will affect the state swapping.
-            for i in range(num_policies):
-                pid = f"pol{i % num_policies}"
-                pol = policy_map[pid]
-                if i == 0:
-                    pol.learn_on_batch(dummy_batch_ones)
-                else:
-                    assert i == 1
-                    pol.learn_on_batch(dummy_batch_twos)
-
-                # Make sure, we really changed the NN during training and update our
-                # actions dict.
-                old_logits = logits[pid]
-                logits[pid] = pol.compute_single_action(dummy_obs)[2][
-                    "action_dist_inputs"
-                ]
-                check(logits[pid], old_logits, atol=0.0000001, false=True)
-
-            # Make sure policies output different deterministic actions. Otherwise,
-            # this test would not work.
-            check(logits["pol0"], logits["pol1"], atol=0.0000001, false=True)
-
-            # Once more, test proper policy state swapping.
-            for i in range(50):
-                pid = f"pol{i % num_policies}"
-                pol = policy_map[pid]
-                check(
-                    pol.compute_single_action(dummy_obs)[2]["action_dist_inputs"],
-                    logits[pid],
-                )
+        # Test, whether training (on the GPU) will affect the state swapping.
+        for i in range(num_policies):
+            pid = f"pol{i % num_policies}"
+            pol = policy_map[pid]
+            if i == 0:
+                pol.learn_on_batch(dummy_batch_ones)
+            else:
+                assert i == 1
+                pol.learn_on_batch(dummy_batch_twos)
+
+            # Make sure, we really changed the NN during training and update our
+            # actions dict.
+            old_logits = logits[pid]
+            logits[pid] = pol.compute_single_action(dummy_obs)[2]["action_dist_inputs"]
+            check(logits[pid], old_logits, atol=0.0000001, false=True)
+
+        # Make sure policies output different deterministic actions. Otherwise,
+        # this test would not work.
+        check(logits["pol0"], logits["pol1"], atol=0.0000001, false=True)
+
+        # Once more, test proper policy state swapping.
+        for i in range(50):
+            pid = f"pol{i % num_policies}"
+            pol = policy_map[pid]
+            check(
+                pol.compute_single_action(dummy_obs)[2]["action_dist_inputs"],
+                logits[pid],
+            )
 
 
 if __name__ == "__main__":
diff --git a/rllib/tests/backward_compat/checkpoints/create_checkpoints.py b/rllib/tests/backward_compat/checkpoints/create_checkpoints.py
index 6a8e9ade5fb3..952d299d385f 100644
--- a/rllib/tests/backward_compat/checkpoints/create_checkpoints.py
+++ b/rllib/tests/backward_compat/checkpoints/create_checkpoints.py
@@ -3,7 +3,6 @@
 # Checkpoints will be located in ~/ray_results/...
 
 from ray.rllib.algorithms.ppo import PPOConfig
-from ray.rllib.utils.test_utils import framework_iterator
 
 # Build a PPOConfig object.
 config = (
@@ -17,8 +16,7 @@
     )
 )
 
-for fw in framework_iterator(config):
-    algo = config.build()
-    results = algo.train()
-    algo.save()
-    algo.stop()
+algo = config.build()
+results = algo.train()
+algo.save()
+algo.stop()
diff --git a/rllib/tests/backward_compat/test_backward_compat.py b/rllib/tests/backward_compat/test_backward_compat.py
index e29daf2a29b1..1368ab85aa30 100644
--- a/rllib/tests/backward_compat/test_backward_compat.py
+++ b/rllib/tests/backward_compat/test_backward_compat.py
@@ -1,19 +1,11 @@
-import os
-from pathlib import Path
-from packaging import version
 import sys
 import unittest
 
 import ray
-import ray.cloudpickle as pickle
-from ray.rllib.algorithms.algorithm import Algorithm
 from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
 from ray.rllib.algorithms.dqn import DQN
-from ray.rllib.algorithms.ppo import PPO
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
-from ray.rllib.policy.policy import Policy, PolicySpec
-from ray.rllib.utils.checkpoints import get_checkpoint_info
-from ray.rllib.utils.test_utils import framework_iterator
+from ray.rllib.policy.policy import PolicySpec
 from ray.tune.registry import register_env
 
 
@@ -26,58 +18,6 @@ def setUpClass(cls):
     def tearDownClass(cls):
         ray.shutdown()
 
-    def test_old_checkpoint_formats(self):
-        """Tests, whether we remain backward compatible (>=2.0.0) wrt checkpoints."""
-
-        rllib_dir = Path(__file__).parent.parent.parent
-        print(f"rllib dir={rllib_dir} exists={os.path.isdir(rllib_dir)}")
-
-        # TODO: Once checkpoints are python version independent (once we stop using
-        #  pickle), add 1.0 here as well.
-        # Broken due to gymnasium move (old gym envs not recoverable via pickle due to
-        # gym version conflict (gym==0.23.x not compatible with gym==0.26.x)).
-        for v in []:  # "0.1"
-            v = version.Version(v)
-            for fw in framework_iterator():
-                path_to_checkpoint = os.path.join(
-                    rllib_dir,
-                    "tests",
-                    "backward_compat",
-                    "checkpoints",
-                    "v" + str(v),
-                    "ppo_frozenlake_" + fw,
-                )
-
-                print(
-                    f"path_to_checkpoint={path_to_checkpoint} "
-                    f"exists={os.path.isdir(path_to_checkpoint)}"
-                )
-
-                checkpoint_info = get_checkpoint_info(path_to_checkpoint)
-                # v0.1: Need to create algo first, then restore.
-                if checkpoint_info["checkpoint_version"] == version.Version("0.1"):
-                    # For checkpoints <= v0.1, we need to magically know the original
-                    # config used as well as the algo class.
-                    with open(checkpoint_info["state_file"], "rb") as f:
-                        state = pickle.load(f)
-                    worker_state = pickle.loads(state["worker"])
-                    algo = PPO(config=worker_state["policy_config"])
-                    # Note, we can not use restore() here because the testing
-                    # checkpoints are created with Algorithm.save() by
-                    # checkpoints/create_checkpoints.py. I.e, they are missing
-                    # all the Tune checkpoint metadata.
-                    algo.load_checkpoint(path_to_checkpoint)
-                # > v0.1: Simply use new `Algorithm.from_checkpoint()` staticmethod.
-                else:
-                    algo = Algorithm.from_checkpoint(path_to_checkpoint)
-
-                    # Also test restoring a Policy from an algo checkpoint.
-                    policies = Policy.from_checkpoint(path_to_checkpoint)
-                    self.assertTrue("default_policy" in policies)
-
-                print(algo.train())
-                algo.stop()
-
     def test_old_algorithm_config_dicts(self):
         """Tests, whether we can build Algorithm objects with old config dicts."""
 
diff --git a/rllib/tests/test_gpus.py b/rllib/tests/test_gpus.py
index 4bbd769c5b8f..54ef39821f23 100644
--- a/rllib/tests/test_gpus.py
+++ b/rllib/tests/test_gpus.py
@@ -5,7 +5,6 @@
 from ray.air.constants import TRAINING_ITERATION
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.utils.framework import try_import_torch
-from ray.rllib.utils.test_utils import framework_iterator
 from ray import tune
 
 torch, _ = try_import_torch()
@@ -43,44 +42,38 @@ def test_gpus_in_non_local_mode(self):
                         f"_fake_gpus={fake_gpus}"
                     )
 
-                    frameworks = (
-                        ("tf", "torch") if num_gpus > 1 else ("tf2", "tf", "torch")
-                    )
-                    for _ in framework_iterator(config, frameworks=frameworks):
-                        # Expect that Algorithm creation causes a num_gpu error.
-                        if (
-                            actual_gpus < num_gpus + 2 * num_gpus_per_env_runner
-                            and not fake_gpus
-                        ):
-                            # "Direct" RLlib (create Algorithm on the driver).
-                            # Cannot run through ray.tune.Tuner().fit() as it would
-                            # simply wait infinitely for the resources to
-                            # become available.
-                            print("direct RLlib")
-                            self.assertRaisesRegex(
-                                RuntimeError,
-                                "Found 0 GPUs on your machine",
-                                lambda: config.build(),
-                            )
-                        # If actual_gpus >= num_gpus or faked,
-                        # expect no error.
-                        else:
-                            print("direct RLlib")
-                            algo = config.build()
-                            algo.stop()
-                            # Cannot run through ray.tune.Tuner().fit() w/ fake GPUs
-                            # as it would simply wait infinitely for the
-                            # resources to become available (even though, we
-                            # wouldn't really need them).
-                            if num_gpus == 0:
-                                print("via ray.tune.Tuner().fit()")
-                                tune.Tuner(
-                                    "PPO",
-                                    param_space=config,
-                                    run_config=air.RunConfig(
-                                        stop={TRAINING_ITERATION: 0}
-                                    ),
-                                ).fit()
+                    # Expect that Algorithm creation causes a num_gpu error.
+                    if (
+                        actual_gpus < num_gpus + 2 * num_gpus_per_env_runner
+                        and not fake_gpus
+                    ):
+                        # "Direct" RLlib (create Algorithm on the driver).
+                        # Cannot run through ray.tune.Tuner().fit() as it would
+                        # simply wait infinitely for the resources to
+                        # become available.
+                        print("direct RLlib")
+                        self.assertRaisesRegex(
+                            RuntimeError,
+                            "Found 0 GPUs on your machine",
+                            lambda: config.build(),
+                        )
+                    # If actual_gpus >= num_gpus or faked,
+                    # expect no error.
+                    else:
+                        print("direct RLlib")
+                        algo = config.build()
+                        algo.stop()
+                        # Cannot run through ray.tune.Tuner().fit() w/ fake GPUs
+                        # as it would simply wait infinitely for the
+                        # resources to become available (even though, we
+                        # wouldn't really need them).
+                        if num_gpus == 0:
+                            print("via ray.tune.Tuner().fit()")
+                            tune.Tuner(
+                                "PPO",
+                                param_space=config,
+                                run_config=air.RunConfig(stop={TRAINING_ITERATION: 0}),
+                            ).fit()
         ray.shutdown()
 
     def test_gpus_in_local_mode(self):
@@ -97,17 +90,15 @@ def test_gpus_in_local_mode(self):
             for fake_gpus in [False, True]:
                 print(f"_fake_gpus={fake_gpus}")
                 config.resources(num_gpus=num_gpus, _fake_gpus=fake_gpus)
-                frameworks = ("tf", "torch") if num_gpus > 1 else ("tf2", "tf", "torch")
-                for _ in framework_iterator(config, frameworks=frameworks):
-                    print("direct RLlib")
-                    algo = config.build()
-                    algo.stop()
-                    print("via ray.tune.Tuner().fit()")
-                    tune.Tuner(
-                        "PPO",
-                        param_space=config,
-                        run_config=air.RunConfig(stop={TRAINING_ITERATION: 0}),
-                    ).fit()
+                print("direct RLlib")
+                algo = config.build()
+                algo.stop()
+                print("via ray.tune.Tuner().fit()")
+                tune.Tuner(
+                    "PPO",
+                    param_space=config,
+                    run_config=air.RunConfig(stop={TRAINING_ITERATION: 0}),
+                ).fit()
 
         ray.shutdown()
 
diff --git a/rllib/tests/test_io.py b/rllib/tests/test_io.py
index 027e1382cc26..0fe968a2ae61 100644
--- a/rllib/tests/test_io.py
+++ b/rllib/tests/test_io.py
@@ -31,7 +31,6 @@
     EVALUATION_RESULTS,
     NUM_ENV_STEPS_SAMPLED_LIFETIME,
 )
-from ray.rllib.utils.test_utils import framework_iterator
 
 SAMPLES = SampleBatch(
     {
@@ -71,33 +70,30 @@ def write_outputs(self, output, fw, output_config=None):
         return algo
 
     def test_agent_output_ok(self):
-        for fw in framework_iterator(frameworks=("torch", "tf")):
-            self.write_outputs(self.test_dir, fw)
-            # PPO has two workers, so we expect 2 output files.
-            self.assertEqual(len(os.listdir(self.test_dir + fw)), 2)
-            reader = JsonReader(self.test_dir + fw + "/*.json")
-            reader.next()
+        self.write_outputs(self.test_dir, "torch")
+        # PPO has two workers, so we expect 2 output files.
+        self.assertEqual(len(os.listdir(self.test_dir + "torch")), 2)
+        reader = JsonReader(self.test_dir + "torch" + "/*.json")
+        reader.next()
 
     def test_agent_output_logdir(self):
         """Test special value 'logdir' as Agent's output."""
-        for fw in framework_iterator():
-            agent = self.write_outputs("logdir", fw)
-            # PPO has two workers, so we expect 2 output files.
-            self.assertEqual(len(glob.glob(agent.logdir + "/output-*.json")), 2)
+        agent = self.write_outputs("logdir", "torch")
+        # PPO has two workers, so we expect 2 output files.
+        self.assertEqual(len(glob.glob(agent.logdir + "/output-*.json")), 2)
 
     def test_agent_output_infos(self):
         """Verify that the infos dictionary is written to the output files.
 
         Note, with torch this is always the case."""
         output_config = {"store_infos": True}
-        for fw in framework_iterator(frameworks=("torch", "tf")):
-            self.write_outputs(self.test_dir, fw, output_config=output_config)
-            # PPO has two workers, so we expect 2 output files.
-            self.assertEqual(len(os.listdir(self.test_dir + fw)), 2)
-            reader = JsonReader(self.test_dir + fw + "/*.json")
-            data = reader.next()
-            data = convert_ma_batch_to_sample_batch(data)
-            self.assertTrue("infos" in data)
+        self.write_outputs(self.test_dir, "torch", output_config=output_config)
+        # PPO has two workers, so we expect 2 output files.
+        self.assertEqual(len(os.listdir(self.test_dir + "torch")), 2)
+        reader = JsonReader(self.test_dir + "torch" + "/*.json")
+        data = reader.next()
+        data = convert_ma_batch_to_sample_batch(data)
+        self.assertTrue("infos" in data)
 
     def test_agent_input_dir(self):
         config = (
@@ -107,18 +103,17 @@ def test_agent_input_dir(self):
             .training(train_batch_size=250)
         )
 
-        for fw in framework_iterator(config, frameworks=("torch", "tf")):
-            self.write_outputs(self.test_dir, fw)
-            config.offline_data(
-                input_=self.test_dir + fw,
-            )
-            print("WROTE TO: ", self.test_dir)
-            algo = config.build()
-            result = algo.train()
-            self.assertEqual(
-                result[f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}"], 250
-            )  # read from input
-            self.assertTrue(np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]))
+        self.write_outputs(self.test_dir, "torch")
+        config.offline_data(
+            input_=self.test_dir + "torch",
+        )
+        print("WROTE TO: ", self.test_dir)
+        algo = config.build()
+        result = algo.train()
+        self.assertEqual(
+            result[f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}"], 250
+        )  # read from input
+        self.assertTrue(np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]))
 
     def test_split_by_episode(self):
         splits = SAMPLES.split_by_episode()
@@ -138,39 +133,38 @@ def test_agent_input_postprocessing_enabled(self):
             .evaluation(off_policy_estimation_methods={})
         )
 
-        for fw in framework_iterator(config, frameworks=("tf", "torch")):
-            self.write_outputs(self.test_dir, fw)
-            config.offline_data(input_=self.test_dir + fw)
-
-            # Rewrite the files to drop advantages and value_targets for
-            # testing
-            for path in glob.glob(self.test_dir + fw + "/*.json"):
-                out = []
-                with open(path) as f:
-                    for line in f.readlines():
-                        data_string = json.loads(line)
-                        data = from_json_data(data_string, None)
-                        data = convert_ma_batch_to_sample_batch(data)
-                        # Data won't contain rewards as these are not included
-                        # in the write_outputs run (not needed in the
-                        # SampleBatch). Flip out "rewards" for "advantages"
-                        # just for testing.
-                        data["rewards"] = data["advantages"]
-                        del data["advantages"]
-                        if "value_targets" in data:
-                            del data["value_targets"]
-                        out.append(_to_json_dict(data, []))
-                with open(path, "w") as f:
-                    for data in out:
-                        f.write(json.dumps(data))
+        self.write_outputs(self.test_dir, "torch")
+        config.offline_data(input_=self.test_dir + "torch")
+
+        # Rewrite the files to drop advantages and value_targets for
+        # testing
+        for path in glob.glob(self.test_dir + "torch" + "/*.json"):
+            out = []
+            with open(path) as f:
+                for line in f.readlines():
+                    data_string = json.loads(line)
+                    data = from_json_data(data_string, None)
+                    data = convert_ma_batch_to_sample_batch(data)
+                    # Data won't contain rewards as these are not included
+                    # in the write_outputs run (not needed in the
+                    # SampleBatch). Flip out "rewards" for "advantages"
+                    # just for testing.
+                    data["rewards"] = data["advantages"]
+                    del data["advantages"]
+                    if "value_targets" in data:
+                        del data["value_targets"]
+                    out.append(_to_json_dict(data, []))
+            with open(path, "w") as f:
+                for data in out:
+                    f.write(json.dumps(data))
 
-            algo = config.build()
-            result = algo.train()
-            self.assertEqual(
-                result[f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}"], 250
-            )  # read from input
-            self.assertTrue(np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]))
-            algo.stop()
+        algo = config.build()
+        result = algo.train()
+        self.assertEqual(
+            result[f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}"], 250
+        )  # read from input
+        self.assertTrue(np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]))
+        algo.stop()
 
     def test_agent_input_eval_sampler(self):
         config = (
@@ -185,18 +179,17 @@ def test_agent_input_eval_sampler(self):
             )
         )
 
-        for fw in framework_iterator(config, frameworks=["tf", "torch"]):
-            self.write_outputs(self.test_dir, fw)
-            config.offline_data(input_=self.test_dir + fw)
-            algo = config.build()
-            result = algo.train()
-            assert np.isnan(
-                result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
-            ), "episode reward should not be computed for offline data"
-            assert not np.isnan(
-                result[EVALUATION_RESULTS][ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
-            ), "Did not see simulation results during evaluation"
-            algo.stop()
+        self.write_outputs(self.test_dir, "torch")
+        config.offline_data(input_=self.test_dir + "torch")
+        algo = config.build()
+        result = algo.train()
+        assert np.isnan(
+            result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
+        ), "episode reward should not be computed for offline data"
+        assert not np.isnan(
+            result[EVALUATION_RESULTS][ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
+        ), "Did not see simulation results during evaluation"
+        algo.stop()
 
     def test_agent_input_list(self):
         config = (
@@ -206,33 +199,29 @@ def test_agent_input_list(self):
             .evaluation(off_policy_estimation_methods={})
         )
 
-        for fw in framework_iterator(config, frameworks=("torch", "tf")):
-            self.write_outputs(self.test_dir, fw)
-            config.offline_data(input_=glob.glob(self.test_dir + fw + "/*.json"))
-            algo = config.build()
-            result = algo.train()
-            self.assertEqual(
-                result[f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}"], 250
-            )  # read from input
-            self.assertTrue(np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]))
-            algo.stop()
+        self.write_outputs(self.test_dir, "torch")
+        config.offline_data(input_=glob.glob(self.test_dir + "torch" + "/*.json"))
+        algo = config.build()
+        result = algo.train()
+        self.assertEqual(
+            result[f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}"], 250
+        )  # read from input
+        self.assertTrue(np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]))
+        algo.stop()
 
     def test_agent_input_dict(self):
         config = PPOConfig().environment("CartPole-v1").training(train_batch_size=2000)
-        for fw in framework_iterator(config):
-            self.write_outputs(self.test_dir, fw)
-            config.offline_data(
-                input_={
-                    self.test_dir + fw: 0.1,
-                    "sampler": 0.9,
-                }
-            )
-            algo = config.build()
-            result = algo.train()
-            self.assertTrue(
-                not np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN])
-            )
-            algo.stop()
+        self.write_outputs(self.test_dir, "torch")
+        config.offline_data(
+            input_={
+                self.test_dir + "torch": 0.1,
+                "sampler": 0.9,
+            }
+        )
+        algo = config.build()
+        result = algo.train()
+        self.assertTrue(not np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]))
+        algo.stop()
 
     def test_custom_input_procedure(self):
         class CustomJsonReader(JsonReader):
@@ -258,16 +247,13 @@ def input_creator(ioctx: IOContext) -> InputReader:
                 .evaluation(off_policy_estimation_methods={})
             )
 
-            for fw in framework_iterator(config, frameworks=("torch", "tf")):
-                self.write_outputs(self.test_dir, fw)
-                config.offline_data(input_config={"input_files": self.test_dir + fw})
-                algo = config.build()
-                result = algo.train()
-                self.assertEqual(result[NUM_ENV_STEPS_SAMPLED_LIFETIME], 4000)
-                self.assertTrue(
-                    np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN])
-                )
-                algo.stop()
+            self.write_outputs(self.test_dir, "torch")
+            config.offline_data(input_config={"input_files": self.test_dir + "torch"})
+            algo = config.build()
+            result = algo.train()
+            self.assertEqual(result[NUM_ENV_STEPS_SAMPLED_LIFETIME], 4000)
+            self.assertTrue(np.isnan(result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]))
+            algo.stop()
 
     def test_multiple_output_workers(self):
         ray.shutdown()
@@ -281,14 +267,13 @@ def test_multiple_output_workers(self):
             .evaluation(off_policy_estimation_methods={})
         )
 
-        for fw in framework_iterator(config, frameworks=["tf", "torch"]):
-            config.offline_data(output=self.test_dir + fw)
-            algo = config.build()
-            algo.train()
-            self.assertEqual(len(os.listdir(self.test_dir + fw)), 2)
-            reader = JsonReader(self.test_dir + fw + "/*.json")
-            reader.next()
-            algo.stop()
+        config.offline_data(output=self.test_dir + "torch")
+        algo = config.build()
+        algo.train()
+        self.assertEqual(len(os.listdir(self.test_dir + "torch")), 2)
+        reader = JsonReader(self.test_dir + "torch" + "/*.json")
+        reader.next()
+        algo.stop()
 
 
 class JsonIOTest(unittest.TestCase):
diff --git a/rllib/tests/test_local.py b/rllib/tests/test_local.py
index 7664a8158cff..38f87ff099f4 100644
--- a/rllib/tests/test_local.py
+++ b/rllib/tests/test_local.py
@@ -2,7 +2,6 @@
 
 import ray
 from ray.rllib.algorithms.ppo import PPOConfig
-from ray.rllib.utils.test_utils import framework_iterator
 
 
 class LocalModeTest(unittest.TestCase):
@@ -20,10 +19,9 @@ def test_local(self):
             .training(model={"fcnet_hiddens": [10]})
         )
 
-        for _ in framework_iterator(config):
-            algo = config.build()
-            print(algo.train())
-            algo.stop()
+        algo = config.build()
+        print(algo.train())
+        algo.stop()
 
 
 if __name__ == "__main__":
diff --git a/rllib/tests/test_nn_framework_import_errors.py b/rllib/tests/test_nn_framework_import_errors.py
index 0ab0f5fb8f8e..61c06816d09d 100644
--- a/rllib/tests/test_nn_framework_import_errors.py
+++ b/rllib/tests/test_nn_framework_import_errors.py
@@ -3,18 +3,6 @@
 import pytest
 
 import ray.rllib.algorithms.ppo as ppo
-from ray.rllib.utils.test_utils import framework_iterator
-
-
-def test_dont_import_tf_error():
-    """Check error being thrown, if tf not installed but configured."""
-    # Do not import tf for testing purposes.
-    os.environ["RLLIB_TEST_NO_TF_IMPORT"] = "1"
-
-    config = ppo.PPOConfig().environment("CartPole-v1")
-    for _ in framework_iterator(config, frameworks=("tf", "tf2")):
-        with pytest.raises(ImportError, match="However, no installation was found"):
-            config.build()
 
 
 def test_dont_import_torch_error():
@@ -27,5 +15,4 @@ def test_dont_import_torch_error():
 
 
 if __name__ == "__main__":
-    test_dont_import_tf_error()
     test_dont_import_torch_error()
diff --git a/rllib/tests/test_reproducibility.py b/rllib/tests/test_reproducibility.py
index 1cd89d5ecc1f..682fd1984ef5 100644
--- a/rllib/tests/test_reproducibility.py
+++ b/rllib/tests/test_reproducibility.py
@@ -9,7 +9,6 @@
     EPISODE_RETURN_MIN,
     ENV_RUNNER_RESULTS,
 )
-from ray.rllib.utils.test_utils import framework_iterator
 from ray.tune.registry import register_env
 
 
@@ -33,48 +32,46 @@ def step(self, action):
         def env_creator(env_config):
             return PickLargest()
 
-        for fw in framework_iterator(frameworks=("tf", "torch")):
-            trajs = list()
-            for trial in range(3):
-                ray.init()
-                register_env("PickLargest", env_creator)
-                config = (
-                    DQNConfig()
-                    .environment("PickLargest")
-                    .debugging(seed=666 if trial in [0, 1] else 999)
-                    .reporting(
-                        min_time_s_per_iteration=0,
-                        min_sample_timesteps_per_iteration=100,
-                    )
-                    .framework(fw)
+        trajs = []
+        for trial in range(3):
+            ray.init()
+            register_env("PickLargest", env_creator)
+            config = (
+                DQNConfig()
+                .environment("PickLargest")
+                .debugging(seed=666 if trial in [0, 1] else 999)
+                .reporting(
+                    min_time_s_per_iteration=0,
+                    min_sample_timesteps_per_iteration=100,
                 )
-                algo = config.build()
+            )
+            algo = config.build()
 
-                trajectory = list()
-                for _ in range(8):
-                    r = algo.train()
-                    trajectory.append(r[ENV_RUNNER_RESULTS][EPISODE_RETURN_MAX])
-                    trajectory.append(r[ENV_RUNNER_RESULTS][EPISODE_RETURN_MIN])
-                trajs.append(trajectory)
+            trajectory = list()
+            for _ in range(8):
+                r = algo.train()
+                trajectory.append(r[ENV_RUNNER_RESULTS][EPISODE_RETURN_MAX])
+                trajectory.append(r[ENV_RUNNER_RESULTS][EPISODE_RETURN_MIN])
+            trajs.append(trajectory)
 
-                algo.stop()
-                ray.shutdown()
+            algo.stop()
+            ray.shutdown()
 
-            # trial0 and trial1 use same seed and thus
-            # expect identical trajectories.
-            all_same = True
-            for v0, v1 in zip(trajs[0], trajs[1]):
-                if v0 != v1:
-                    all_same = False
-            self.assertTrue(all_same)
+        # trial0 and trial1 use same seed and thus
+        # expect identical trajectories.
+        all_same = True
+        for v0, v1 in zip(trajs[0], trajs[1]):
+            if v0 != v1:
+                all_same = False
+        self.assertTrue(all_same)
 
-            # trial1 and trial2 use different seeds and thus
-            # most rewards tend to be different.
-            diff_cnt = 0
-            for v1, v2 in zip(trajs[1], trajs[2]):
-                if v1 != v2:
-                    diff_cnt += 1
-            self.assertTrue(diff_cnt > 8)
+        # trial1 and trial2 use different seeds and thus
+        # most rewards tend to be different.
+        diff_cnt = 0
+        for v1, v2 in zip(trajs[1], trajs[2]):
+            if v1 != v2:
+                diff_cnt += 1
+        self.assertTrue(diff_cnt > 8)
 
 
 if __name__ == "__main__":
diff --git a/rllib/tests/test_supported_multi_agent.py b/rllib/tests/test_supported_multi_agent.py
index edd26e5443bf..469dba2ea790 100644
--- a/rllib/tests/test_supported_multi_agent.py
+++ b/rllib/tests/test_supported_multi_agent.py
@@ -11,7 +11,7 @@
     MultiAgentMountainCar,
 )
 from ray.rllib.policy.policy import PolicySpec
-from ray.rllib.utils.test_utils import check_train_results, framework_iterator
+from ray.rllib.utils.test_utils import check_train_results
 from ray.tune.registry import register_env
 
 
@@ -36,18 +36,15 @@ def policy_mapping_fn(agent_id, episode, worker, **kwargs):
 
     config.multi_agent(policies=policies, policy_mapping_fn=policy_mapping_fn)
 
-    for fw in framework_iterator(config):
-        if fw == "tf2" and alg == "IMPALA":
-            continue
-        if alg == "SAC":
-            a = config.build(env="multi_agent_mountaincar")
-        else:
-            a = config.build(env="multi_agent_cartpole")
-
-        results = a.train()
-        check_train_results(results)
-        print(results)
-        a.stop()
+    if alg == "SAC":
+        a = config.build(env="multi_agent_mountaincar")
+    else:
+        a = config.build(env="multi_agent_cartpole")
+
+    results = a.train()
+    check_train_results(results)
+    print(results)
+    a.stop()
 
 
 class TestSupportedMultiAgentPolicyGradient(unittest.TestCase):
diff --git a/rllib/tests/test_timesteps.py b/rllib/tests/test_timesteps.py
index 9725bb22ebdb..6b95864d26aa 100644
--- a/rllib/tests/test_timesteps.py
+++ b/rllib/tests/test_timesteps.py
@@ -4,7 +4,7 @@
 import ray
 import ray.rllib.algorithms.ppo as ppo
 from ray.rllib.examples.envs.classes.random_env import RandomEnv
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 
 
 class TestTimeSteps(unittest.TestCase):
@@ -34,27 +34,26 @@ def test_timesteps(self):
         obs = np.array(1)
         obs_batch = np.array([1])
 
-        for _ in framework_iterator(config):
-            algo = config.build()
-            policy = algo.get_policy()
-
-            for i in range(1, 21):
-                algo.compute_single_action(obs)
-                check(int(policy.global_timestep), i)
-            for i in range(1, 21):
-                policy.compute_actions(obs_batch)
-                check(int(policy.global_timestep), i + 20)
-
-            # Artificially set ts to 100Bio, then keep computing actions and
-            # train.
-            crazy_timesteps = int(1e11)
-            policy.on_global_var_update({"timestep": crazy_timesteps})
-            # Run for 10 more ts.
-            for i in range(1, 11):
-                policy.compute_actions(obs_batch)
-                check(int(policy.global_timestep), i + crazy_timesteps)
-            algo.train()
-            algo.stop()
+        algo = config.build()
+        policy = algo.get_policy()
+
+        for i in range(1, 21):
+            algo.compute_single_action(obs)
+            check(int(policy.global_timestep), i)
+        for i in range(1, 21):
+            policy.compute_actions(obs_batch)
+            check(int(policy.global_timestep), i + 20)
+
+        # Artificially set ts to 100Bio, then keep computing actions and
+        # train.
+        crazy_timesteps = int(1e11)
+        policy.on_global_var_update({"timestep": crazy_timesteps})
+        # Run for 10 more ts.
+        for i in range(1, 11):
+            policy.compute_actions(obs_batch)
+            check(int(policy.global_timestep), i + crazy_timesteps)
+        algo.train()
+        algo.stop()
 
 
 if __name__ == "__main__":
diff --git a/rllib/utils/__init__.py b/rllib/utils/__init__.py
index 479438daa533..01f8404da2f0 100644
--- a/rllib/utils/__init__.py
+++ b/rllib/utils/__init__.py
@@ -34,7 +34,6 @@
     check,
     check_compute_single_action,
     check_train_results,
-    framework_iterator,
 )
 from ray.tune.utils import merge_dicts, deep_update
 
@@ -115,7 +114,6 @@ def __exit__(self, *args):
     "fc",
     "force_list",
     "force_tuple",
-    "framework_iterator",
     "lstm",
     "merge_dicts",
     "one_hot",
diff --git a/rllib/utils/exploration/tests/test_curiosity.py b/rllib/utils/exploration/tests/test_curiosity.py
index a0f91ce0c7cf..ddc5939c5df5 100644
--- a/rllib/utils/exploration/tests/test_curiosity.py
+++ b/rllib/utils/exploration/tests/test_curiosity.py
@@ -10,7 +10,7 @@
 from ray.air.constants import TRAINING_ITERATION
 from ray.rllib.algorithms.callbacks import DefaultCallbacks
 import ray.rllib.algorithms.ppo as ppo
-from ray.rllib.utils.test_utils import check_learning_achieved, framework_iterator
+from ray.rllib.utils.test_utils import check_learning_achieved
 from ray.rllib.utils.metrics import (
     ENV_RUNNER_RESULTS,
     EPISODE_RETURN_MAX,
@@ -194,36 +194,35 @@ def test_curiosity_on_frozen_lake(self):
         )
 
         num_iterations = 10
-        for _ in framework_iterator(config, frameworks=("tf", "torch")):
-            # W/ Curiosity. Expect to learn something.
-            algo = config.build()
-            learnt = False
-            for i in range(num_iterations):
-                result = algo.train()
-                print(result)
-                if result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MAX] > 0.0:
-                    print("Reached goal after {} iters!".format(i))
-                    learnt = True
-                    break
-            algo.stop()
-            self.assertTrue(learnt)
+        # W/ Curiosity. Expect to learn something.
+        algo = config.build()
+        learnt = False
+        for i in range(num_iterations):
+            result = algo.train()
+            print(result)
+            if result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MAX] > 0.0:
+                print("Reached goal after {} iters!".format(i))
+                learnt = True
+                break
+        algo.stop()
+        self.assertTrue(learnt)
 
-            # Disable this check for now. Add too much flakyness to test.
-            # if fw == "tf":
-            #    # W/o Curiosity. Expect to learn nothing.
-            #    print("Trying w/o curiosity (not expected to learn).")
-            #    config["exploration_config"] = {
-            #        "type": "StochasticSampling",
-            #    }
-            #    algo = ppo.PPO(config=config)
-            #    rewards_wo = 0.0
-            #    for _ in range(num_iterations):
-            #        result = algo.train()
-            #        rewards_wo += result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
-            #        print(result)
-            #    algo.stop()
-            #    self.assertTrue(rewards_wo == 0.0)
-            #    print("Did not reach goal w/o curiosity!")
+        # Disable this check for now. Add too much flakyness to test.
+        # if fw == "tf":
+        #    # W/o Curiosity. Expect to learn nothing.
+        #    print("Trying w/o curiosity (not expected to learn).")
+        #    config["exploration_config"] = {
+        #        "type": "StochasticSampling",
+        #    }
+        #    algo = ppo.PPO(config=config)
+        #    rewards_wo = 0.0
+        #    for _ in range(num_iterations):
+        #        result = algo.train()
+        #        rewards_wo += result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
+        #        print(result)
+        #    algo.stop()
+        #    self.assertTrue(rewards_wo == 0.0)
+        #    print("Did not reach goal w/o curiosity!")
 
     def test_curiosity_on_partially_observable_domain(self):
         config = (
@@ -273,41 +272,40 @@ def test_curiosity_on_partially_observable_domain(self):
             TRAINING_ITERATION: 25,
             f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": min_reward,
         }
-        for _ in framework_iterator(config, frameworks="torch"):
-            # To replay:
-            # algo = ppo.PPO(config=config)
-            # algo.restore("[checkpoint file]")
-            # env = env_maker(config["env_config"])
-            # obs, info = env.reset()
-            # for _ in range(10000):
-            #     obs, reward, done, truncated, info = env.step(
-            #         algo.compute_single_action(s)
-            #     )
-            #     if done:
-            #         obs, info = env.reset()
-            #     env.render()
+        # To replay:
+        # algo = ppo.PPO(config=config)
+        # algo.restore("[checkpoint file]")
+        # env = env_maker(config["env_config"])
+        # obs, info = env.reset()
+        # for _ in range(10000):
+        #     obs, reward, done, truncated, info = env.step(
+        #         algo.compute_single_action(s)
+        #     )
+        #     if done:
+        #         obs, info = env.reset()
+        #     env.render()
 
-            results = tune.Tuner(
-                "PPO",
-                param_space=config,
-                run_config=air.RunConfig(stop=stop, verbose=1),
-            ).fit()
-            check_learning_achieved(results, min_reward)
-            iters = results.get_best_result().metrics[TRAINING_ITERATION]
-            print("Reached in {} iterations.".format(iters))
+        results = tune.Tuner(
+            "PPO",
+            param_space=config,
+            run_config=air.RunConfig(stop=stop, verbose=1),
+        ).fit()
+        check_learning_achieved(results, min_reward)
+        iters = results.get_best_result().metrics[TRAINING_ITERATION]
+        print("Reached in {} iterations.".format(iters))
 
-            # config_wo = config.copy()
-            # config_wo["exploration_config"] = {"type": "StochasticSampling"}
-            # stop_wo = stop.copy()
-            # stop_wo[TRAINING_ITERATION] = iters
-            # results = tune.Tuner(
-            #     "PPO", param_space=config_wo, stop=stop_wo, verbose=1).fit()
-            # try:
-            #     check_learning_achieved(results, min_reward)
-            # except ValueError:
-            #     print("Did not learn w/o curiosity (expected).")
-            # else:
-            #     raise ValueError("Learnt w/o curiosity (not expected)!")
+        # config_wo = config.copy()
+        # config_wo["exploration_config"] = {"type": "StochasticSampling"}
+        # stop_wo = stop.copy()
+        # stop_wo[TRAINING_ITERATION] = iters
+        # results = tune.Tuner(
+        #     "PPO", param_space=config_wo, stop=stop_wo, verbose=1).fit()
+        # try:
+        #     check_learning_achieved(results, min_reward)
+        # except ValueError:
+        #     print("Did not learn w/o curiosity (expected).")
+        # else:
+        #     raise ValueError("Learnt w/o curiosity (not expected)!")
 
 
 if __name__ == "__main__":
diff --git a/rllib/utils/exploration/tests/test_explorations.py b/rllib/utils/exploration/tests/test_explorations.py
index d63e879e7feb..0254664446ab 100644
--- a/rllib/utils/exploration/tests/test_explorations.py
+++ b/rllib/utils/exploration/tests/test_explorations.py
@@ -7,62 +7,60 @@
 import ray.rllib.algorithms.impala as impala
 import ray.rllib.algorithms.ppo as ppo
 import ray.rllib.algorithms.sac as sac
-from ray.rllib.utils import check, framework_iterator
+from ray.rllib.utils import check
 
 
 def do_test_explorations(config, dummy_obs, prev_a=None, expected_mean_action=None):
     """Calls an Agent's `compute_actions` with different `explore` options."""
 
-    # Test all frameworks.
-    for _ in framework_iterator(config):
-        print(f"Algorithm={config.algo_class}")
-
-        # Test for both the default Agent's exploration AND the `Random`
-        # exploration class.
-        for exploration in [None, "Random"]:
-            local_config = config.copy()
-            if exploration == "Random":
-                if local_config.enable_rl_module_and_learner:
-                    # TODO(Artur): Support Random exploration with RL Modules.
-                    continue
-                local_config.env_runners(exploration_config={"type": "Random"})
-            print("exploration={}".format(exploration or "default"))
-
-            algo = local_config.build()
-
-            # Make sure all actions drawn are the same, given same
-            # observations.
-            actions = []
-            for _ in range(25):
-                actions.append(
-                    algo.compute_single_action(
-                        observation=dummy_obs,
-                        explore=False,
-                        prev_action=prev_a,
-                        prev_reward=1.0 if prev_a is not None else None,
-                    )
+    print(f"Algorithm={config.algo_class}")
+
+    # Test for both the default Agent's exploration AND the `Random`
+    # exploration class.
+    for exploration in [None, "Random"]:
+        local_config = config.copy()
+        if exploration == "Random":
+            if local_config.enable_rl_module_and_learner:
+                # TODO(Artur): Support Random exploration with RL Modules.
+                continue
+            local_config.env_runners(exploration_config={"type": "Random"})
+        print("exploration={}".format(exploration or "default"))
+
+        algo = local_config.build()
+
+        # Make sure all actions drawn are the same, given same
+        # observations.
+        actions = []
+        for _ in range(25):
+            actions.append(
+                algo.compute_single_action(
+                    observation=dummy_obs,
+                    explore=False,
+                    prev_action=prev_a,
+                    prev_reward=1.0 if prev_a is not None else None,
                 )
-                check(actions[-1], actions[0])
-
-            # Make sure actions drawn are different
-            # (around some mean value), given constant observations.
-            actions = []
-            for _ in range(500):
-                actions.append(
-                    algo.compute_single_action(
-                        observation=dummy_obs,
-                        explore=True,
-                        prev_action=prev_a,
-                        prev_reward=1.0 if prev_a is not None else None,
-                    )
+            )
+            check(actions[-1], actions[0])
+
+        # Make sure actions drawn are different
+        # (around some mean value), given constant observations.
+        actions = []
+        for _ in range(500):
+            actions.append(
+                algo.compute_single_action(
+                    observation=dummy_obs,
+                    explore=True,
+                    prev_action=prev_a,
+                    prev_reward=1.0 if prev_a is not None else None,
                 )
-            check(
-                np.mean(actions),
-                expected_mean_action if expected_mean_action is not None else 0.5,
-                atol=0.4,
             )
-            # Check that the stddev is not 0.0 (values differ).
-            check(np.std(actions), 0.0, false=True)
+        check(
+            np.mean(actions),
+            expected_mean_action if expected_mean_action is not None else 0.5,
+            atol=0.4,
+        )
+        # Check that the stddev is not 0.0 (values differ).
+        check(np.std(actions), 0.0, false=True)
 
 
 class TestExplorations(unittest.TestCase):
diff --git a/rllib/utils/exploration/tests/test_random_encoder.py b/rllib/utils/exploration/tests/test_random_encoder.py
deleted file mode 100644
index aa8a021e28b9..000000000000
--- a/rllib/utils/exploration/tests/test_random_encoder.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import sys
-import unittest
-
-import pytest
-import ray
-from ray.rllib.utils.test_utils import framework_iterator
-import ray.rllib.algorithms.ppo as ppo
-import ray.rllib.algorithms.sac as sac
-from ray.rllib.algorithms.callbacks import RE3UpdateCallbacks
-from ray.rllib.utils.metrics import (
-    ENV_RUNNER_RESULTS,
-    EPISODE_RETURN_MAX,
-)
-
-
-class TestRE3(unittest.TestCase):
-    """Tests for RE3 exploration algorithm."""
-
-    @classmethod
-    def setUpClass(cls):
-        ray.init()
-
-    @classmethod
-    def tearDownClass(cls):
-        ray.shutdown()
-
-    def run_re3(self, rl_algorithm):
-        """Tests RE3 for PPO and SAC.
-
-        Both the on-policy and off-policy setups are validated.
-        """
-        if rl_algorithm == "PPO":
-            # We need to disable the RLModule / Learner API here, since this test is
-            # overfitted to the ModelV2 API stack. The random encoder is based on
-            # ModelV2 stack.
-            config = ppo.PPOConfig()
-            algo_cls = ppo.PPO
-            beta_schedule = "constant"
-        elif rl_algorithm == "SAC":
-            config = sac.SACConfig()
-            algo_cls = sac.SAC
-            beta_schedule = "linear_decay"
-
-        config = config.to_dict()
-
-        class RE3Callbacks(RE3UpdateCallbacks, config["callbacks"]):
-            pass
-
-        config["env"] = "Pendulum-v1"
-        config["callbacks"] = RE3Callbacks
-        config["exploration_config"] = {
-            "type": "RE3",
-            "embeds_dim": 128,
-            "beta_schedule": beta_schedule,
-            "sub_exploration": {
-                "type": "StochasticSampling",
-            },
-        }
-
-        num_iterations = 60
-        for _ in framework_iterator(config, frameworks=("tf", "tf2"), session=True):
-            algo = algo_cls(config=config)
-            learnt = False
-            for i in range(num_iterations):
-                result = algo.train()
-                print(result)
-                if result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MAX] > -900.0:
-                    print("Reached goal after {} iters!".format(i))
-                    learnt = True
-                    break
-            algo.stop()
-            self.assertTrue(learnt)
-
-    def test_re3_ppo(self):
-        """Tests RE3 with PPO."""
-        self.run_re3("PPO")
-
-    def test_re3_sac(self):
-        """Tests RE3 with SAC."""
-        self.run_re3("SAC")
-
-
-if __name__ == "__main__":
-    sys.exit(pytest.main(["-v", __file__]))
diff --git a/rllib/utils/schedules/tests/test_schedules.py b/rllib/utils/schedules/tests/test_schedules.py
index b3f256d63540..ded2e926cf22 100644
--- a/rllib/utils/schedules/tests/test_schedules.py
+++ b/rllib/utils/schedules/tests/test_schedules.py
@@ -6,10 +6,9 @@
     ExponentialSchedule,
     PiecewiseSchedule,
 )
-from ray.rllib.utils import check, framework_iterator, try_import_tf, try_import_torch
+from ray.rllib.utils import check, try_import_torch
 from ray.rllib.utils.from_config import from_config
 
-tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -22,34 +21,30 @@ def test_constant_schedule(self):
 
         config = {"value": value}
 
-        for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]):
-            constant = from_config(ConstantSchedule, config, framework=fw)
-            for t in ts:
-                out = constant(t)
-                check(out, value)
+        constant = from_config(ConstantSchedule, config, framework=None)
+        for t in ts:
+            out = constant(t)
+            check(out, value)
 
-            ts_as_tensors = self._get_framework_tensors(ts, fw)
-            for t in ts_as_tensors:
-                out = constant(t)
-                assert fw != "tf" or isinstance(out, tf.Tensor)
-                check(out, value, decimals=4)
+        ts_as_tensors = self._get_framework_tensors(ts, None)
+        for t in ts_as_tensors:
+            out = constant(t)
+            check(out, value, decimals=4)
 
     def test_linear_schedule(self):
         ts = [0, 50, 10, 100, 90, 2, 1, 99, 23, 1000]
         expected = [2.1 - (min(t, 100) / 100) * (2.1 - 0.6) for t in ts]
         config = {"schedule_timesteps": 100, "initial_p": 2.1, "final_p": 0.6}
 
-        for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]):
-            linear = from_config(LinearSchedule, config, framework=fw)
-            for t, e in zip(ts, expected):
-                out = linear(t)
-                check(out, e, decimals=4)
+        linear = from_config(LinearSchedule, config, framework=None)
+        for t, e in zip(ts, expected):
+            out = linear(t)
+            check(out, e, decimals=4)
 
-            ts_as_tensors = self._get_framework_tensors(ts, fw)
-            for t, e in zip(ts_as_tensors, expected):
-                out = linear(t)
-                assert fw != "tf" or isinstance(out, tf.Tensor)
-                check(out, e, decimals=4)
+        ts_as_tensors = self._get_framework_tensors(ts, None)
+        for t, e in zip(ts_as_tensors, expected):
+            out = linear(t)
+            check(out, e, decimals=4)
 
     def test_polynomial_schedule(self):
         ts = [0, 5, 10, 100, 90, 2, 1, 99, 23, 1000]
@@ -62,17 +57,15 @@ def test_polynomial_schedule(self):
             power=2.0,
         )
 
-        for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]):
-            polynomial = from_config(config, framework=fw)
-            for t, e in zip(ts, expected):
-                out = polynomial(t)
-                check(out, e, decimals=4)
+        polynomial = from_config(config, framework=None)
+        for t, e in zip(ts, expected):
+            out = polynomial(t)
+            check(out, e, decimals=4)
 
-            ts_as_tensors = self._get_framework_tensors(ts, fw)
-            for t, e in zip(ts_as_tensors, expected):
-                out = polynomial(t)
-                assert fw != "tf" or isinstance(out, tf.Tensor)
-                check(out, e, decimals=4)
+        ts_as_tensors = self._get_framework_tensors(ts, None)
+        for t, e in zip(ts_as_tensors, expected):
+            out = polynomial(t)
+            check(out, e, decimals=4)
 
     def test_exponential_schedule(self):
         decay_rate = 0.2
@@ -80,17 +73,15 @@ def test_exponential_schedule(self):
         expected = [2.0 * decay_rate ** (t / 100) for t in ts]
         config = dict(initial_p=2.0, decay_rate=decay_rate, schedule_timesteps=100)
 
-        for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]):
-            exponential = from_config(ExponentialSchedule, config, framework=fw)
-            for t, e in zip(ts, expected):
-                out = exponential(t)
-                check(out, e, decimals=4)
+        exponential = from_config(ExponentialSchedule, config, framework=None)
+        for t, e in zip(ts, expected):
+            out = exponential(t)
+            check(out, e, decimals=4)
 
-            ts_as_tensors = self._get_framework_tensors(ts, fw)
-            for t, e in zip(ts_as_tensors, expected):
-                out = exponential(t)
-                assert fw != "tf" or isinstance(out, tf.Tensor)
-                check(out, e, decimals=4)
+        ts_as_tensors = self._get_framework_tensors(ts, None)
+        for t, e in zip(ts_as_tensors, expected):
+            out = exponential(t)
+            check(out, e, decimals=4)
 
     def test_piecewise_schedule(self):
         ts = [0, 5, 10, 100, 90, 2, 1, 99, 27]
@@ -99,24 +90,20 @@ def test_piecewise_schedule(self):
             endpoints=[(0, 50.0), (25, 100.0), (30, 200.0)], outside_value=14.5
         )
 
-        for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]):
-            piecewise = from_config(PiecewiseSchedule, config, framework=fw)
-            for t, e in zip(ts, expected):
-                out = piecewise(t)
-                check(out, e, decimals=4)
+        piecewise = from_config(PiecewiseSchedule, config, framework=None)
+        for t, e in zip(ts, expected):
+            out = piecewise(t)
+            check(out, e, decimals=4)
 
-            ts_as_tensors = self._get_framework_tensors(ts, fw)
-            for t, e in zip(ts_as_tensors, expected):
-                out = piecewise(t)
-                assert fw != "tf" or isinstance(out, tf.Tensor)
-                check(out, e, decimals=4)
+        ts_as_tensors = self._get_framework_tensors(ts, None)
+        for t, e in zip(ts_as_tensors, expected):
+            out = piecewise(t)
+            check(out, e, decimals=4)
 
     @staticmethod
     def _get_framework_tensors(ts, fw):
         if fw == "torch":
             ts = [torch.tensor(t, dtype=torch.int32) for t in ts]
-        elif fw is not None and "tf" in fw:
-            ts = [tf.constant(t, dtype=tf.int32) for t in ts]
         return ts
 
 
diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py
index cea7c31543e2..26f83ff2a2bc 100644
--- a/rllib/utils/test_utils.py
+++ b/rllib/utils/test_utils.py
@@ -13,7 +13,6 @@
 import pprint
 import random
 import re
-import sys
 import time
 import tree  # pip install dm_tree
 from typing import (
@@ -22,7 +21,6 @@
     Dict,
     List,
     Optional,
-    Sequence,
     Tuple,
     Type,
     Union,
@@ -62,13 +60,6 @@
 
 jax, _ = try_import_jax()
 tf1, tf, tfv = try_import_tf()
-if tf1:
-    eager_mode = None
-    try:
-        from tensorflow.python.eager.context import eager_mode
-    except (ImportError, ModuleNotFoundError):
-        pass
-
 torch, _ = try_import_torch()
 
 logger = logging.getLogger(__name__)
@@ -947,105 +938,6 @@ def check_train_results(train_results: ResultDict):
     return train_results
 
 
-def framework_iterator(
-    config: Optional["AlgorithmConfig"] = None,
-    frameworks: Sequence[str] = ("tf2", "tf", "torch"),
-    session: bool = False,
-    time_iterations: Optional[dict] = None,
-) -> Union[str, Tuple[str, Optional["tf1.Session"]]]:
-    """An generator that allows for looping through n frameworks for testing.
-
-    Provides the correct config entries ("framework") as well
-    as the correct eager/non-eager contexts for tf/tf2.
-
-    Args:
-        config: An optional config dict or AlgorithmConfig object. This will be modified
-            (value for "framework" changed) depending on the iteration.
-        frameworks: A list/tuple of the frameworks to be tested.
-            Allowed are: "tf2", "tf", "torch", and None.
-        session: If True and only in the tf-case: Enter a tf.Session()
-            and yield that as second return value (otherwise yield (fw, None)).
-            Also sets a seed (42) on the session to make the test
-            deterministic.
-        time_iterations: If provided, will write to the given dict (by
-            framework key) the times in seconds that each (framework's)
-            iteration takes.
-
-    Yields:
-        If `session` is False: The current framework [tf2|tf|torch] used.
-        If `session` is True: A tuple consisting of the current framework
-        string and the tf1.Session (if fw="tf", otherwise None).
-    """
-    config = config or {}
-    frameworks = [frameworks] if isinstance(frameworks, str) else list(frameworks)
-
-    for fw in frameworks:
-        # Skip tf if on new API stack.
-        if fw == "tf" and config.get("enable_rl_module_and_learner", False):
-            logger.warning("Skipping `framework=tf` (new API stack configured)!")
-            continue
-        # Skip if tf/tf2 and py >= 3.11.
-        elif fw in ["tf", "tf2"] and (
-            sys.version_info.major == 3 and sys.version_info.minor >= 9
-        ):
-            logger.warning("Skipping `framework=tf/tf2` (python >= 3.9)!")
-            continue
-
-        # Skip non-installed frameworks.
-        if fw == "torch" and not torch:
-            logger.warning("framework_iterator skipping torch (not installed)!")
-            continue
-        if fw != "torch" and not tf:
-            logger.warning(
-                "framework_iterator skipping {} (tf not installed)!".format(fw)
-            )
-            continue
-        elif fw == "tf2" and tfv != 2:
-            logger.warning("framework_iterator skipping tf2.x (tf version is < 2.0)!")
-            continue
-        elif fw == "jax" and not jax:
-            logger.warning("framework_iterator skipping JAX (not installed)!")
-            continue
-        assert fw in ["tf2", "tf", "torch", "jax", None]
-
-        # Do we need a test session?
-        sess = None
-        if fw == "tf" and session is True:
-            sess = tf1.Session()
-            sess.__enter__()
-            tf1.set_random_seed(42)
-
-        if isinstance(config, dict):
-            config["framework"] = fw
-        else:
-            config.framework(fw)
-
-        eager_ctx = None
-        # Enable eager mode for tf2.
-        if fw == "tf2":
-            eager_ctx = eager_mode()
-            eager_ctx.__enter__()
-            assert tf1.executing_eagerly()
-        # Make sure, eager mode is off.
-        elif fw == "tf":
-            assert not tf1.executing_eagerly()
-
-        # Yield current framework + tf-session (if necessary).
-        print(f"framework={fw}")
-        time_started = time.time()
-        yield fw if session is False else (fw, sess)
-        if time_iterations is not None:
-            time_total = time.time() - time_started
-            time_iterations[fw] = time_total
-            print(f".. took {time_total}sec")
-
-        # Exit any context we may have entered.
-        if eager_ctx:
-            eager_ctx.__exit__(None, None, None)
-        elif sess:
-            sess.__exit__(None, None, None)
-
-
 @Deprecated(new="run_learning_tests_from_yaml_or_py(config_files=...)", error=False)
 def run_learning_tests_from_yaml(
     yaml_files: List[str],
@@ -1809,47 +1701,46 @@ def check_reproducibilty(
             )
         )
 
-        for fw in framework_iterator(algo_config, **fw_kwargs):
-            print(
-                f"Testing reproducibility of {algo_class.__name__}"
-                f" with {num_workers} workers on fw = {fw}"
+        print(
+            f"Testing reproducibility of {algo_class.__name__}"
+            f" with {num_workers} workers"
+        )
+        print("/// config")
+        pprint.pprint(algo_config.to_dict())
+        # test tune.Tuner().fit() reproducibility
+        results1 = tune.Tuner(
+            algo_class,
+            param_space=algo_config.to_dict(),
+            run_config=air.RunConfig(stop=stop_dict, verbose=1),
+        ).fit()
+        results1 = results1.get_best_result().metrics
+
+        results2 = tune.Tuner(
+            algo_class,
+            param_space=algo_config.to_dict(),
+            run_config=air.RunConfig(stop=stop_dict, verbose=1),
+        ).fit()
+        results2 = results2.get_best_result().metrics
+
+        # Test rollout behavior.
+        check(
+            results1[ENV_RUNNER_RESULTS]["hist_stats"],
+            results2[ENV_RUNNER_RESULTS]["hist_stats"],
+        )
+        # As well as training behavior (minibatch sequence during SGD
+        # iterations).
+        # As well as training behavior (minibatch sequence during SGD
+        # iterations).
+        if algo_config.enable_rl_module_and_learner:
+            check(
+                results1["info"][LEARNER_INFO][DEFAULT_POLICY_ID],
+                results2["info"][LEARNER_INFO][DEFAULT_POLICY_ID],
             )
-            print("/// config")
-            pprint.pprint(algo_config.to_dict())
-            # test tune.Tuner().fit() reproducibility
-            results1 = tune.Tuner(
-                algo_class,
-                param_space=algo_config.to_dict(),
-                run_config=air.RunConfig(stop=stop_dict, verbose=1),
-            ).fit()
-            results1 = results1.get_best_result().metrics
-
-            results2 = tune.Tuner(
-                algo_class,
-                param_space=algo_config.to_dict(),
-                run_config=air.RunConfig(stop=stop_dict, verbose=1),
-            ).fit()
-            results2 = results2.get_best_result().metrics
-
-            # Test rollout behavior.
+        else:
             check(
-                results1[ENV_RUNNER_RESULTS]["hist_stats"],
-                results2[ENV_RUNNER_RESULTS]["hist_stats"],
+                results1["info"][LEARNER_INFO][DEFAULT_POLICY_ID]["learner_stats"],
+                results2["info"][LEARNER_INFO][DEFAULT_POLICY_ID]["learner_stats"],
             )
-            # As well as training behavior (minibatch sequence during SGD
-            # iterations).
-            # As well as training behavior (minibatch sequence during SGD
-            # iterations).
-            if algo_config.enable_rl_module_and_learner:
-                check(
-                    results1["info"][LEARNER_INFO][DEFAULT_POLICY_ID],
-                    results2["info"][LEARNER_INFO][DEFAULT_POLICY_ID],
-                )
-            else:
-                check(
-                    results1["info"][LEARNER_INFO][DEFAULT_POLICY_ID]["learner_stats"],
-                    results2["info"][LEARNER_INFO][DEFAULT_POLICY_ID]["learner_stats"],
-                )
 
 
 def get_cartpole_dataset_reader(batch_size: int = 1) -> "DatasetReader":
@@ -2030,85 +1921,80 @@ def test_ckpt_restore(
     if replay_buffer:
         config["store_buffer_in_checkpoints"] = True
 
-    frameworks = (["tf2"] if tf2 else []) + ["torch", "tf"]
-    for fw in framework_iterator(config, frameworks=frameworks):
-        env = gym.make(env_name)
-        alg1 = config.environment(env_name).framework(fw).build()
-        alg2 = config.environment(env_name).build()
-
-        policy1 = alg1.get_policy()
-
-        res = alg1.train()
-        print("current status: " + str(res))
-
-        # Check optimizer state as well.
-        optim_state = policy1.get_state().get("_optimizer_variables")
-
-        checkpoint = alg1.save()
-
-        # Test if we can restore multiple times (at least twice, assuming failure
-        # would mainly stem from improperly reused variables)
-        for num_restores in range(2):
-            # Sync the models
-            alg2.restore(checkpoint)
-
-        # Compare optimizer state with re-loaded one.
-        if optim_state:
-            s2 = alg2.get_policy().get_state().get("_optimizer_variables")
-            # Tf -> Compare states 1:1.
-            if fw in ["tf2", "tf"]:
-                check(s2, optim_state)
-            # For torch, optimizers have state_dicts with keys=params,
-            # which are different for the two models (ignore these
-            # different keys, but compare all values nevertheless).
-            else:
-                for i, s2_ in enumerate(s2):
-                    check(
-                        list(s2_["state"].values()),
-                        list(optim_state[i]["state"].values()),
-                    )
+    env = gym.make(env_name)
+    alg1 = config.environment(env_name).framework("torch").build()
+    alg2 = config.environment(env_name).build()
 
-        # Compare buffer content with restored one.
-        if replay_buffer:
-            data = alg1.local_replay_buffer.replay_buffers["default_policy"]._storage[
-                42 : 42 + 42
-            ]
-            new_data = alg2.local_replay_buffer.replay_buffers[
-                "default_policy"
-            ]._storage[42 : 42 + 42]
-            check(data, new_data)
-
-        # Check, whether the eval EnvRunnerGroup has the same policies and
-        # `policy_mapping_fn`.
-        if eval_env_runner_group:
-            eval_mapping_src = inspect.getsource(alg1.eval_env_runner.policy_mapping_fn)
-            check(
-                eval_mapping_src,
-                inspect.getsource(alg2.eval_env_runner.policy_mapping_fn),
-            )
+    policy1 = alg1.get_policy()
+
+    res = alg1.train()
+    print("current status: " + str(res))
+
+    # Check optimizer state as well.
+    optim_state = policy1.get_state().get("_optimizer_variables")
+
+    checkpoint = alg1.save()
+
+    # Test if we can restore multiple times (at least twice, assuming failure
+    # would mainly stem from improperly reused variables)
+    for num_restores in range(2):
+        # Sync the models
+        alg2.restore(checkpoint)
+
+    # Compare optimizer state with re-loaded one.
+    if optim_state:
+        s2 = alg2.get_policy().get_state().get("_optimizer_variables")
+        # Tf -> Compare states 1:1.
+        # For torch, optimizers have state_dicts with keys=params,
+        # which are different for the two models (ignore these
+        # different keys, but compare all values nevertheless).
+        for i, s2_ in enumerate(s2):
             check(
-                eval_mapping_src,
-                inspect.getsource(alg2.env_runner.policy_mapping_fn),
-                false=True,
+                list(s2_["state"].values()),
+                list(optim_state[i]["state"].values()),
             )
 
-        for _ in range(1):
-            obs = env.observation_space.sample()
-            a1 = _get_mean_action_from_algorithm(alg1, obs)
-            a2 = _get_mean_action_from_algorithm(alg2, obs)
-            print("Checking computed actions", alg1, obs, a1, a2)
-            if abs(a1 - a2) > 0.1:
-                raise AssertionError(
-                    "algo={} [a1={} a2={}]".format(str(alg1.__class__), a1, a2)
-                )
-        # Stop algo 1.
-        alg1.stop()
+    # Compare buffer content with restored one.
+    if replay_buffer:
+        data = alg1.local_replay_buffer.replay_buffers["default_policy"]._storage[
+            42 : 42 + 42
+        ]
+        new_data = alg2.local_replay_buffer.replay_buffers["default_policy"]._storage[
+            42 : 42 + 42
+        ]
+        check(data, new_data)
+
+    # Check, whether the eval EnvRunnerGroup has the same policies and
+    # `policy_mapping_fn`.
+    if eval_env_runner_group:
+        eval_mapping_src = inspect.getsource(alg1.eval_env_runner.policy_mapping_fn)
+        check(
+            eval_mapping_src,
+            inspect.getsource(alg2.eval_env_runner.policy_mapping_fn),
+        )
+        check(
+            eval_mapping_src,
+            inspect.getsource(alg2.env_runner.policy_mapping_fn),
+            false=True,
+        )
+
+    for _ in range(1):
+        obs = env.observation_space.sample()
+        a1 = _get_mean_action_from_algorithm(alg1, obs)
+        a2 = _get_mean_action_from_algorithm(alg2, obs)
+        print("Checking computed actions", alg1, obs, a1, a2)
+        if abs(a1 - a2) > 0.1:
+            raise AssertionError(
+                "algo={} [a1={} a2={}]".format(str(alg1.__class__), a1, a2)
+            )
+    # Stop algo 1.
+    alg1.stop()
 
-        if run_restored_algorithm:
-            # Check that algo 2 can still run.
-            print("Starting second run on Algo 2...")
-            alg2.train()
-        alg2.stop()
+    if run_restored_algorithm:
+        # Check that algo 2 can still run.
+        print("Starting second run on Algo 2...")
+        alg2.train()
+    alg2.stop()
 
 
 def check_supported_spaces(
@@ -2137,11 +2023,8 @@ def check_supported_spaces(
 
 
     """
-    # do these imports here because otherwise we have circular imports
+    # Do these imports here because otherwise we have circular imports.
     from ray.rllib.examples.envs.classes.random_env import RandomEnv
-    from ray.rllib.models.tf.complex_input_net import ComplexInputNetwork as ComplexNet
-    from ray.rllib.models.tf.fcnet import FullyConnectedNetwork as FCNet
-    from ray.rllib.models.tf.visionnet import VisionNetwork as VisionNet
     from ray.rllib.models.torch.complex_input_net import (
         ComplexInputNetwork as TorchComplexNet,
     )
@@ -2193,8 +2076,6 @@ def check_supported_spaces(
         "dict",
     ]
 
-    rlmodule_supported_frameworks = ("torch", "tf2")
-
     # The action spaces that we test RLModules with
     rlmodule_supported_action_spaces = ["discrete", "continuous"]
 
@@ -2261,25 +2142,16 @@ def _do_check(alg, config, a_name, o_name):
             if alg not in ["SAC", "PPO"]:
                 # 2D (image) input: Expect VisionNet.
                 if o_name in ["atari", "image"]:
-                    if fw == "torch":
-                        assert isinstance(algo.get_policy().model, TorchVisionNet)
-                    else:
-                        assert isinstance(algo.get_policy().model, VisionNet)
+                    assert isinstance(algo.get_policy().model, TorchVisionNet)
                 # 1D input: Expect FCNet.
                 elif o_name == "continuous":
-                    if fw == "torch":
-                        assert isinstance(algo.get_policy().model, TorchFCNet)
-                    else:
-                        assert isinstance(algo.get_policy().model, FCNet)
+                    assert isinstance(algo.get_policy().model, TorchFCNet)
                 # Could be either one: ComplexNet (if disabled Preprocessor)
                 # or FCNet (w/ Preprocessor).
                 elif o_name == "vector2d":
-                    if fw == "torch":
-                        assert isinstance(
-                            algo.get_policy().model, (TorchComplexNet, TorchFCNet)
-                        )
-                    else:
-                        assert isinstance(algo.get_policy().model, (ComplexNet, FCNet))
+                    assert isinstance(
+                        algo.get_policy().model, (TorchComplexNet, TorchFCNet)
+                    )
             if train:
                 algo.train()
             algo.stop()
@@ -2288,21 +2160,14 @@ def _do_check(alg, config, a_name, o_name):
     if not frameworks:
         frameworks = ("tf2", "tf", "torch")
 
-    if config.enable_rl_module_and_learner:
-        # Only test the frameworks that are supported by RLModules.
-        frameworks = tuple(
-            fw for fw in frameworks if fw in rlmodule_supported_frameworks
-        )
-
     _do_check_remote = ray.remote(_do_check)
     _do_check_remote = _do_check_remote.options(num_gpus=1 if use_gpu else 0)
-    for _ in framework_iterator(config, frameworks=frameworks):
-        # Test all action spaces first.
-        for a_name in action_spaces_to_test.keys():
-            o_name = default_observation_space
-            ray.get(_do_check_remote.remote(alg, config, a_name, o_name))
-
-        # Now test all observation spaces.
-        for o_name in observation_spaces_to_test.keys():
-            a_name = default_action_space
-            ray.get(_do_check_remote.remote(alg, config, a_name, o_name))
+    # Test all action spaces first.
+    for a_name in action_spaces_to_test.keys():
+        o_name = default_observation_space
+        ray.get(_do_check_remote.remote(alg, config, a_name, o_name))
+
+    # Now test all observation spaces.
+    for o_name in observation_spaces_to_test.keys():
+        a_name = default_action_space
+        ray.get(_do_check_remote.remote(alg, config, a_name, o_name))
diff --git a/rllib/utils/tests/test_errors.py b/rllib/utils/tests/test_errors.py
index 2c7cca9a19f9..49410cede5bf 100644
--- a/rllib/utils/tests/test_errors.py
+++ b/rllib/utils/tests/test_errors.py
@@ -4,7 +4,6 @@
 import ray.rllib.algorithms.impala as impala
 import ray.rllib.algorithms.ppo as ppo
 from ray.rllib.utils.error import EnvError
-from ray.rllib.utils.test_utils import framework_iterator
 
 
 class TestErrors(unittest.TestCase):
@@ -26,13 +25,12 @@ def test_no_gpus_error(self):
 
         config = impala.IMPALAConfig().environment("CartPole-v1")
 
-        for _ in framework_iterator(config):
-            self.assertRaisesRegex(
-                RuntimeError,
-                # (?s): "dot matches all" (also newlines).
-                "(?s)Found 0 GPUs on your machine.+To change the config",
-                lambda: config.build(),
-            )
+        self.assertRaisesRegex(
+            RuntimeError,
+            # (?s): "dot matches all" (also newlines).
+            "(?s)Found 0 GPUs on your machine.+To change the config",
+            lambda: config.build(),
+        )
 
     def test_bad_envs(self):
         """Tests different "bad env" errors."""
@@ -42,41 +40,37 @@ def test_bad_envs(self):
             .environment("Alien-Attack-v42")
         )
 
-        for _ in framework_iterator(config):
-            self.assertRaisesRegex(
-                EnvError,
-                f"The env string you provided \\('{config.env}'\\) is",
-                lambda: config.build(),
-            )
+        self.assertRaisesRegex(
+            EnvError,
+            f"The env string you provided \\('{config.env}'\\) is",
+            lambda: config.build(),
+        )
 
         # Malformed gym env string (must have v\d at end).
         config.environment("Alien-Attack-part-42")
-        for _ in framework_iterator(config):
-            self.assertRaisesRegex(
-                EnvError,
-                f"The env string you provided \\('{config.env}'\\) is",
-                lambda: config.build(),
-            )
+        self.assertRaisesRegex(
+            EnvError,
+            f"The env string you provided \\('{config.env}'\\) is",
+            lambda: config.build(),
+        )
 
         # Non-existing class in a full-class-path.
         config.environment(
             "ray.rllib.examples.envs.classes.random_env.RandomEnvThatDoesntExist"
         )
-        for _ in framework_iterator(config):
-            self.assertRaisesRegex(
-                EnvError,
-                f"The env string you provided \\('{config.env}'\\) is",
-                lambda: config.build(),
-            )
+        self.assertRaisesRegex(
+            EnvError,
+            f"The env string you provided \\('{config.env}'\\) is",
+            lambda: config.build(),
+        )
 
         # Non-existing module inside a full-class-path.
         config.environment("ray.rllib.examples.envs.module_that_doesnt_exist.SomeEnv")
-        for _ in framework_iterator(config):
-            self.assertRaisesRegex(
-                EnvError,
-                f"The env string you provided \\('{config.env}'\\) is",
-                lambda: config.build(),
-            )
+        self.assertRaisesRegex(
+            EnvError,
+            f"The env string you provided \\('{config.env}'\\) is",
+            lambda: config.build(),
+        )
 
 
 if __name__ == "__main__":
diff --git a/rllib/utils/tests/test_framework_agnostic_components.py b/rllib/utils/tests/test_framework_agnostic_components.py
index 9a50262d79c9..743f30c5c790 100644
--- a/rllib/utils/tests/test_framework_agnostic_components.py
+++ b/rllib/utils/tests/test_framework_agnostic_components.py
@@ -7,7 +7,7 @@
 from ray.rllib.utils.exploration.exploration import Exploration
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.from_config import from_config
-from ray.rllib.utils.test_utils import check, framework_iterator
+from ray.rllib.utils.test_utils import check
 
 tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
@@ -63,102 +63,88 @@ def test_dummy_components(self):
         script_dir = Path(__file__).parent
         abs_path = script_dir.absolute()
 
-        for fw, sess in framework_iterator(session=True):
-            # Try to create from an abstract class w/o default constructor.
-            # Expect None.
-            test = from_config({"type": AbstractDummyComponent, "framework": fw})
-            check(test, None)
-
-            # Create a Component via python API (config dict).
-            component = from_config(
-                dict(
-                    type=DummyComponent, prop_a=1.0, prop_d="non_default", framework=fw
-                )
-            )
-            check(component.prop_d, "non_default")
-
-            # Create a tf Component from json file.
-            config_file = str(abs_path.joinpath("dummy_config.json"))
-            component = from_config(config_file, framework=fw)
-            check(component.prop_c, "default")
-            check(component.prop_d, 4)  # default
-            value = component.add(3.3)
-            if sess:
-                value = sess.run(value)
-            check(value, 5.3)  # prop_b == 2.0
-
-            # Create a torch Component from yaml file.
-            config_file = str(abs_path.joinpath("dummy_config.yml"))
-            component = from_config(config_file, framework=fw)
-            check(component.prop_a, "something else")
-            check(component.prop_d, 3)
-            value = component.add(1.2)
-            if sess:
-                value = sess.run(value)
-            check(value, np.array([2.2]))  # prop_b == 1.0
-
-            # Create tf Component from json-string (e.g. on command line).
-            component = from_config(
-                '{"type": "ray.rllib.utils.tests.'
-                'test_framework_agnostic_components.DummyComponent", '
-                '"prop_a": "A", "prop_b": -1.0, "prop_c": "non-default", '
-                '"framework": "' + fw + '"}'
-            )
-            check(component.prop_a, "A")
-            check(component.prop_d, 4)  # default
-            value = component.add(-1.1)
-            if sess:
-                value = sess.run(value)
-            check(value, -2.1)  # prop_b == -1.0
-
-            # Test recognizing default module path.
-            component = from_config(
-                DummyComponent,
-                '{"type": "NonAbstractChildOfDummyComponent", '
-                '"prop_a": "A", "prop_b": -1.0, "prop_c": "non-default",'
-                '"framework": "' + fw + '"}',
-            )
-            check(component.prop_a, "A")
-            check(component.prop_d, 4)  # default
-            value = component.add(-1.1)
-            if sess:
-                value = sess.run(value)
-            check(value, -2.1)  # prop_b == -1.0
-
-            # Test recognizing default package path.
-            scope = None
-            if sess:
-                scope = tf1.variable_scope("exploration_object")
-                scope.__enter__()
-            component = from_config(
-                Exploration,
-                {
-                    "type": "EpsilonGreedy",
-                    "action_space": Discrete(2),
-                    "framework": fw,
-                    "num_workers": 0,
-                    "worker_index": 0,
-                    "policy_config": {},
-                    "model": None,
-                },
-            )
-            if scope:
-                scope.__exit__(None, None, None)
-            check(component.epsilon_schedule.outside_value, 0.05)  # default
-
-            # Create torch Component from yaml-string.
-            component = from_config(
-                "type: ray.rllib.utils.tests."
-                "test_framework_agnostic_components.DummyComponent\n"
-                "prop_a: B\nprop_b: -1.5\nprop_c: non-default\nframework: "
-                "{}".format(fw)
+        # Try to create from an abstract class w/o default constructor.
+        # Expect None.
+        test = from_config({"type": AbstractDummyComponent, "framework": "torch"})
+        check(test, None)
+
+        # Create a Component via python API (config dict).
+        component = from_config(
+            dict(
+                type=DummyComponent, prop_a=1.0, prop_d="non_default", framework="torch"
             )
-            check(component.prop_a, "B")
-            check(component.prop_d, 4)  # default
-            value = component.add(-5.1)
-            if sess:
-                value = sess.run(value)
-            check(value, np.array([-6.6]))  # prop_b == -1.5
+        )
+        check(component.prop_d, "non_default")
+
+        # Create a tf Component from json file.
+        config_file = str(abs_path.joinpath("dummy_config.json"))
+        component = from_config(config_file, framework="torch")
+        check(component.prop_c, "default")
+        check(component.prop_d, 4)  # default
+        value = component.add(3.3)
+        check(value, 5.3)  # prop_b == 2.0
+
+        # Create a torch Component from yaml file.
+        config_file = str(abs_path.joinpath("dummy_config.yml"))
+        component = from_config(config_file, framework="torch")
+        check(component.prop_a, "something else")
+        check(component.prop_d, 3)
+        value = component.add(1.2)
+        check(value, np.array([2.2]))  # prop_b == 1.0
+
+        # Create tf Component from json-string (e.g. on command line).
+        component = from_config(
+            '{"type": "ray.rllib.utils.tests.'
+            'test_framework_agnostic_components.DummyComponent", '
+            '"prop_a": "A", "prop_b": -1.0, "prop_c": "non-default", '
+            '"framework": "' + "torch" + '"}'
+        )
+        check(component.prop_a, "A")
+        check(component.prop_d, 4)  # default
+        value = component.add(-1.1)
+        check(value, -2.1)  # prop_b == -1.0
+
+        # Test recognizing default module path.
+        component = from_config(
+            DummyComponent,
+            '{"type": "NonAbstractChildOfDummyComponent", '
+            '"prop_a": "A", "prop_b": -1.0, "prop_c": "non-default",'
+            '"framework": "torch"}',
+        )
+        check(component.prop_a, "A")
+        check(component.prop_d, 4)  # default
+        value = component.add(-1.1)
+        check(value, -2.1)  # prop_b == -1.0
+
+        # Test recognizing default package path.
+        scope = None
+        component = from_config(
+            Exploration,
+            {
+                "type": "EpsilonGreedy",
+                "action_space": Discrete(2),
+                "framework": "torch",
+                "num_workers": 0,
+                "worker_index": 0,
+                "policy_config": {},
+                "model": None,
+            },
+        )
+        if scope:
+            scope.__exit__(None, None, None)
+        check(component.epsilon_schedule.outside_value, 0.05)  # default
+
+        # Create torch Component from yaml-string.
+        component = from_config(
+            "type: ray.rllib.utils.tests."
+            "test_framework_agnostic_components.DummyComponent\n"
+            "prop_a: B\nprop_b: -1.5\nprop_c: non-default\nframework: "
+            "torch"
+        )
+        check(component.prop_a, "B")
+        check(component.prop_d, 4)  # default
+        value = component.add(-5.1)
+        check(value, np.array([-6.6]))  # prop_b == -1.5
 
     def test_unregistered_envs(self):
         """Tests, whether an Env can be specified simply by its absolute class."""