fix

Signed-off-by: sven1977 <svenmika1977@gmail.com>
ray-project · Sep 26, 2024 · 760cb13 · 760cb13
1 parent e7d95ca
commit 760cb13
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 6 deletions.
diff --git a/rllib/algorithms/cql/cql.py b/rllib/algorithms/cql/cql.py
@@ -85,24 +85,36 @@ def __init__(self, algo_class=None):
         self.lagrangian_thresh = 5.0
         self.min_q_weight = 5.0
         self.deterministic_backup = True
+        self.lr = 3e-4
         # Note, the new stack defines learning rates for each component.
         # The base learning rate `lr` has to be set to `None`, if using
         # the new stack.
         self.actor_lr = 1e-4
         self.critic_lr = 1e-3
         self.alpha_lr = 1e-3
-        self.lr = None
 
-        # Changes to Algorithm's/SACConfig's default:
+        self.replay_buffer_config = {
+            "_enable_replay_buffer_api": True,
+            "type": "MultiAgentPrioritizedReplayBuffer",
+            "capacity": int(1e6),
+            # If True prioritized replay buffer will be used.
+            "prioritized_replay": False,
+            "prioritized_replay_alpha": 0.6,
+            "prioritized_replay_beta": 0.4,
+            "prioritized_replay_eps": 1e-6,
+            # Whether to compute priorities already on the remote worker side.
+            "worker_side_prioritization": False,
+        }
 
+        # Changes to Algorithm's/SACConfig's default:
+        # .reporting()
+        self.min_sample_timesteps_per_iteration = 0
+        self.min_train_timesteps_per_iteration = 100
         # `.api_stack()`
         self.api_stack(
             enable_rl_module_and_learner=False,
             enable_env_runner_and_connector_v2=False,
         )
-        # .reporting()
-        self.min_sample_timesteps_per_iteration = 0
-        self.min_train_timesteps_per_iteration = 100
         # fmt: on
         # __sphinx_doc_end__
 

diff --git a/rllib/algorithms/dqn/dqn.py b/rllib/algorithms/dqn/dqn.py
@@ -211,7 +211,7 @@ def __init__(self, algo_class=None):
         # fmt: on
         # __sphinx_doc_end__
 
-        # Deprecated.
+        # Deprecated
         self.buffer_size = DEPRECATED_VALUE
         self.prioritized_replay = DEPRECATED_VALUE
         self.learning_starts = DEPRECATED_VALUE

diff --git a/rllib/algorithms/sac/sac.py b/rllib/algorithms/sac/sac.py
@@ -113,6 +113,7 @@ def __init__(self, algo_class=None):
         }
         self.train_batch_size_per_learner = 256
         self.train_batch_size = 256  # @OldAPIstack
+
         # Number of timesteps to collect from rollout workers before we start
         # sampling from replay buffers for learning. Whether we count this in agent
         # steps  or environment steps depends on config.multi_agent(count_steps_by=..).