Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
Signed-off-by: sven1977 <svenmika1977@gmail.com>
  • Loading branch information
sven1977 committed Sep 26, 2024
1 parent e7d95ca commit 760cb13
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 6 deletions.
22 changes: 17 additions & 5 deletions rllib/algorithms/cql/cql.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,36 @@ def __init__(self, algo_class=None):
self.lagrangian_thresh = 5.0
self.min_q_weight = 5.0
self.deterministic_backup = True
self.lr = 3e-4
# Note, the new stack defines learning rates for each component.
# The base learning rate `lr` has to be set to `None`, if using
# the new stack.
self.actor_lr = 1e-4
self.critic_lr = 1e-3
self.alpha_lr = 1e-3
self.lr = None

# Changes to Algorithm's/SACConfig's default:
self.replay_buffer_config = {
"_enable_replay_buffer_api": True,
"type": "MultiAgentPrioritizedReplayBuffer",
"capacity": int(1e6),
# If True prioritized replay buffer will be used.
"prioritized_replay": False,
"prioritized_replay_alpha": 0.6,
"prioritized_replay_beta": 0.4,
"prioritized_replay_eps": 1e-6,
# Whether to compute priorities already on the remote worker side.
"worker_side_prioritization": False,
}

# Changes to Algorithm's/SACConfig's default:
# .reporting()
self.min_sample_timesteps_per_iteration = 0
self.min_train_timesteps_per_iteration = 100
# `.api_stack()`
self.api_stack(
enable_rl_module_and_learner=False,
enable_env_runner_and_connector_v2=False,
)
# .reporting()
self.min_sample_timesteps_per_iteration = 0
self.min_train_timesteps_per_iteration = 100
# fmt: on
# __sphinx_doc_end__

Expand Down
2 changes: 1 addition & 1 deletion rllib/algorithms/dqn/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def __init__(self, algo_class=None):
# fmt: on
# __sphinx_doc_end__

# Deprecated.
# Deprecated
self.buffer_size = DEPRECATED_VALUE
self.prioritized_replay = DEPRECATED_VALUE
self.learning_starts = DEPRECATED_VALUE
Expand Down
1 change: 1 addition & 0 deletions rllib/algorithms/sac/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def __init__(self, algo_class=None):
}
self.train_batch_size_per_learner = 256
self.train_batch_size = 256 # @OldAPIstack

# Number of timesteps to collect from rollout workers before we start
# sampling from replay buffers for learning. Whether we count this in agent
# steps or environment steps depends on config.multi_agent(count_steps_by=..).
Expand Down

0 comments on commit 760cb13

Please sign in to comment.