From dd18dd02fab3e241233933970a26518c54294d81 Mon Sep 17 00:00:00 2001 From: simonsays1980 Date: Mon, 9 Sep 2024 20:06:58 +0200 Subject: [PATCH] [RLlib; Offline RL] - Replace GAE in `MARWILOfflinePreLearner` with `GeneralAdvantageEstimation` connector in learner pipeline. (#47532) Signed-off-by: ujjawal-khare --- rllib/algorithms/marwil/tests/test_marwil.py | 12 ++++------ rllib/tuned_examples/cql/pendulum_cql.py | 23 +++++++------------ .../tuned_examples/marwil/cartpole_marwil.py | 19 ++++----------- 3 files changed, 17 insertions(+), 37 deletions(-) diff --git a/rllib/algorithms/marwil/tests/test_marwil.py b/rllib/algorithms/marwil/tests/test_marwil.py index 5c2584d2ed821..f31241c709125 100644 --- a/rllib/algorithms/marwil/tests/test_marwil.py +++ b/rllib/algorithms/marwil/tests/test_marwil.py @@ -167,14 +167,12 @@ def possibly_masked_mean(data_): # Calculate our own expected values (to then compare against the # agent's loss output). - module = algo.learner_group._learner.module[DEFAULT_MODULE_ID].unwrapped() - fwd_out = module.forward_train( - {k: v for k, v in batch[DEFAULT_MODULE_ID].items()} - ) - advantages = ( - batch[DEFAULT_MODULE_ID][Columns.VALUE_TARGETS].detach().cpu().numpy() - - module.compute_values(batch[DEFAULT_MODULE_ID]).detach().cpu().numpy() + fwd_out = ( + algo.learner_group._learner.module[DEFAULT_MODULE_ID] + .unwrapped() + .forward_train({k: v for k, v in batch[DEFAULT_MODULE_ID].items()}) ) + advantages = batch[DEFAULT_MODULE_ID][Columns.ADVANTAGES].detach().cpu().numpy() advantages_squared = possibly_masked_mean(np.square(advantages)) c_2 = 100.0 + 1e-8 * (advantages_squared - 100.0) c = np.sqrt(c_2) diff --git a/rllib/tuned_examples/cql/pendulum_cql.py b/rllib/tuned_examples/cql/pendulum_cql.py index 24e74f0781a7b..e2727aba5febe 100644 --- a/rllib/tuned_examples/cql/pendulum_cql.py +++ b/rllib/tuned_examples/cql/pendulum_cql.py @@ -39,21 +39,14 @@ ) .offline_data( input_=[data_path.as_posix()], - # The `kwargs` for the `input_read_method`. We override the - # the number of blocks to pull at once b/c our dataset is - # small. - input_read_method_kwargs={"override_num_blocks": max(args.num_gpus * 2, 2)}, - # The `kwargs` for the `map_batches` method in which our - # `OfflinePreLearner` is run. 2 data workers should be run - # concurrently. - map_batches_kwargs={"concurrency": 2, "num_cpus": 2}, - # The `kwargs` for the `iter_batches` method. Due to the small - # dataset we choose only a single batch to prefetch. - iter_batches_kwargs={"prefetch_batches": 1}, - # The number of iterations to be run per learner when in multi-learner - # mode in a single RLlib training iteration. Leave this to `None` to - # run an entire epoch on the dataset during a single RLlib training - # iteration. For single-learner mode 1 is the only option. + # Define the number of reading blocks, these should be larger than 1 + # and aligned with the data size. + input_read_method_kwargs={"override_num_blocks": max(args.num_gpus, 2)}, + # Concurrency defines the number of processes that run the + # `map_batches` transformations. This should be aligned with the + # 'prefetch_batches' argument in 'iter_batches_kwargs'. + map_batches_kwargs={"concurrency": max(2, args.num_gpus * 2)}, + actions_in_input_normalized=True, dataset_num_iters_per_learner=1 if args.num_gpus == 0 else None, # TODO (sven): Has this any influence in the connectors? actions_in_input_normalized=True, diff --git a/rllib/tuned_examples/marwil/cartpole_marwil.py b/rllib/tuned_examples/marwil/cartpole_marwil.py index e33a23d62c69a..d40d389de39e2 100644 --- a/rllib/tuned_examples/marwil/cartpole_marwil.py +++ b/rllib/tuned_examples/marwil/cartpole_marwil.py @@ -49,21 +49,10 @@ # as remote learners. .offline_data( input_=[data_path.as_posix()], - # The `kwargs` for the `input_read_method`. We override the - # the number of blocks to pull at once b/c our dataset is - # small. - input_read_method_kwargs={"override_num_blocks": max(args.num_gpus * 2, 2)}, - # The `kwargs` for the `map_batches` method in which our - # `OfflinePreLearner` is run. 2 data workers should be run - # concurrently. - map_batches_kwargs={"concurrency": 2, "num_cpus": 2}, - # The `kwargs` for the `iter_batches` method. Due to the small - # dataset we choose only a single batch to prefetch. - iter_batches_kwargs={"prefetch_batches": 1}, - # The number of iterations to be run per learner when in multi-learner - # mode in a single RLlib training iteration. Leave this to `None` to - # run an entire epoch on the dataset during a single RLlib training - # iteration. For single-learner mode 1 is the only option. + # Note, we want to have at leat 2 data blocks to read from such that + # concurrency in `map_batches` works. + input_read_method_kwargs={"override_num_blocks": max(args.num_gpus, 2)}, + prelearner_module_synch_period=20, dataset_num_iters_per_learner=1 if args.num_gpus == 0 else None, ) .training(