Skip to content

Commit

Permalink
[RLlib; Offline RL] - Replace GAE in MARWILOfflinePreLearner with `…
Browse files Browse the repository at this point in the history
…GeneralAdvantageEstimation` connector in learner pipeline. (ray-project#47532)

Signed-off-by: ujjawal-khare <ujjawal.khare@dream11.com>
  • Loading branch information
simonsays1980 authored and ujjawal-khare committed Oct 15, 2024
1 parent d28b712 commit dd18dd0
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 37 deletions.
12 changes: 5 additions & 7 deletions rllib/algorithms/marwil/tests/test_marwil.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,12 @@ def possibly_masked_mean(data_):

# Calculate our own expected values (to then compare against the
# agent's loss output).
module = algo.learner_group._learner.module[DEFAULT_MODULE_ID].unwrapped()
fwd_out = module.forward_train(
{k: v for k, v in batch[DEFAULT_MODULE_ID].items()}
)
advantages = (
batch[DEFAULT_MODULE_ID][Columns.VALUE_TARGETS].detach().cpu().numpy()
- module.compute_values(batch[DEFAULT_MODULE_ID]).detach().cpu().numpy()
fwd_out = (
algo.learner_group._learner.module[DEFAULT_MODULE_ID]
.unwrapped()
.forward_train({k: v for k, v in batch[DEFAULT_MODULE_ID].items()})
)
advantages = batch[DEFAULT_MODULE_ID][Columns.ADVANTAGES].detach().cpu().numpy()
advantages_squared = possibly_masked_mean(np.square(advantages))
c_2 = 100.0 + 1e-8 * (advantages_squared - 100.0)
c = np.sqrt(c_2)
Expand Down
23 changes: 8 additions & 15 deletions rllib/tuned_examples/cql/pendulum_cql.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,14 @@
)
.offline_data(
input_=[data_path.as_posix()],
# The `kwargs` for the `input_read_method`. We override the
# the number of blocks to pull at once b/c our dataset is
# small.
input_read_method_kwargs={"override_num_blocks": max(args.num_gpus * 2, 2)},
# The `kwargs` for the `map_batches` method in which our
# `OfflinePreLearner` is run. 2 data workers should be run
# concurrently.
map_batches_kwargs={"concurrency": 2, "num_cpus": 2},
# The `kwargs` for the `iter_batches` method. Due to the small
# dataset we choose only a single batch to prefetch.
iter_batches_kwargs={"prefetch_batches": 1},
# The number of iterations to be run per learner when in multi-learner
# mode in a single RLlib training iteration. Leave this to `None` to
# run an entire epoch on the dataset during a single RLlib training
# iteration. For single-learner mode 1 is the only option.
# Define the number of reading blocks, these should be larger than 1
# and aligned with the data size.
input_read_method_kwargs={"override_num_blocks": max(args.num_gpus, 2)},
# Concurrency defines the number of processes that run the
# `map_batches` transformations. This should be aligned with the
# 'prefetch_batches' argument in 'iter_batches_kwargs'.
map_batches_kwargs={"concurrency": max(2, args.num_gpus * 2)},
actions_in_input_normalized=True,
dataset_num_iters_per_learner=1 if args.num_gpus == 0 else None,
# TODO (sven): Has this any influence in the connectors?
actions_in_input_normalized=True,
Expand Down
19 changes: 4 additions & 15 deletions rllib/tuned_examples/marwil/cartpole_marwil.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,10 @@
# as remote learners.
.offline_data(
input_=[data_path.as_posix()],
# The `kwargs` for the `input_read_method`. We override the
# the number of blocks to pull at once b/c our dataset is
# small.
input_read_method_kwargs={"override_num_blocks": max(args.num_gpus * 2, 2)},
# The `kwargs` for the `map_batches` method in which our
# `OfflinePreLearner` is run. 2 data workers should be run
# concurrently.
map_batches_kwargs={"concurrency": 2, "num_cpus": 2},
# The `kwargs` for the `iter_batches` method. Due to the small
# dataset we choose only a single batch to prefetch.
iter_batches_kwargs={"prefetch_batches": 1},
# The number of iterations to be run per learner when in multi-learner
# mode in a single RLlib training iteration. Leave this to `None` to
# run an entire epoch on the dataset during a single RLlib training
# iteration. For single-learner mode 1 is the only option.
# Note, we want to have at leat 2 data blocks to read from such that
# concurrency in `map_batches` works.
input_read_method_kwargs={"override_num_blocks": max(args.num_gpus, 2)},
prelearner_module_synch_period=20,
dataset_num_iters_per_learner=1 if args.num_gpus == 0 else None,
)
.training(
Expand Down

0 comments on commit dd18dd0

Please sign in to comment.