From 4f7ee529c0141b40f54d4561a51560d4ffb41f07 Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:02:35 +0100 Subject: [PATCH 1/4] Adds optional linear LR schedule to the example --- examples/stable_baselines3_example.py | 46 +++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index ce035c5b..dbea0099 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -1,6 +1,7 @@ import argparse import os import pathlib +from typing import Callable from stable_baselines3.common.callbacks import CheckpointCallback from godot_rl.core.utils import can_import @@ -13,7 +14,7 @@ # 1. gdrl.env_from_hub -r edbeeching/godot_rl_BallChase # 2. chmod +x examples/godot_rl_BallChase/bin/BallChase.x86_64 if can_import("ray"): - print("WARNING, stable baselines and ray[rllib] are not compatable") + print("WARNING, stable baselines and ray[rllib] are not compatible") parser = argparse.ArgumentParser(allow_abbrev=False) parser.add_argument( @@ -85,11 +86,19 @@ help="Instead of training, it will run inference on a loaded model for --timesteps steps. " "Requires --resume_model_path to be set." ) +parser.add_argument( + "--linear_lr_schedule", + default=False, + action="store_true", + help="Use a linear LR schedule for training. If set, learning rate will decrease until it reaches 0 at " + "--timesteps" + "value. Note: On resuming training, the schedule will reset. If disabled, constant LR will be used." +) parser.add_argument( "--viz", action="store_true", help="If set, the simulation will be displayed in a window during training. Otherwise " - "training will run without rendering the simualtion. This setting does not apply to in-editor training.", + "training will run without rendering the simulation. This setting does not apply to in-editor training.", default=False ) parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env") @@ -117,8 +126,39 @@ speedup=args.speedup) env = VecMonitor(env) + +# LR schedule code snippet from: +# https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#learning-rate-schedule +def linear_schedule(initial_value: float) -> Callable[[float], float]: + """ + Linear learning rate schedule. + + :param initial_value: Initial learning rate. + :return: schedule that computes + current learning rate depending on remaining progress + """ + + def func(progress_remaining: float) -> float: + """ + Progress will decrease from 1 (beginning) to 0. + + :param progress_remaining: + :return: current learning rate + """ + return progress_remaining * initial_value + + return func + + if args.resume_model_path is None: - model = PPO("MultiInputPolicy", env, ent_coef=0.0001, verbose=2, n_steps=32, tensorboard_log=args.experiment_dir) + learning_rate = 0.0003 if args.inference else linear_schedule(0.0003) + model: PPO = PPO("MultiInputPolicy", + env, + ent_coef=0.0001, + verbose=2, + n_steps=32, + tensorboard_log=args.experiment_dir, + learning_rate=learning_rate) else: path_zip = pathlib.Path(args.resume_model_path) print("Loading model: " + os.path.abspath(path_zip)) From 1bc9a0b4772ad40dd6d5830247bb0139ffd25151 Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:12:33 +0100 Subject: [PATCH 2/4] Adds `linear_lr_schedule` flag to docs. --- docs/ADV_STABLE_BASELINES_3.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md index 9ba19a3a..8b865151 100644 --- a/docs/ADV_STABLE_BASELINES_3.md +++ b/docs/ADV_STABLE_BASELINES_3.md @@ -48,6 +48,7 @@ We recommend taking the [sb3 example](https://github.com/edbeeching/godot_rl_age The example exposes more parameters for the user to configure, such as `--speedup` to run the environment faster than realtime and the `--n_parallel` to launch several instances of the game executable in order to accelerate training (not available for in-editor training). +## SB3 Example script usage: To use the example script, first move to the location where the downloaded script is in the console/terminal, and then try some of the example use cases below: ### Train a model in editor: @@ -109,3 +110,11 @@ You can run inference on a model that was previously saved using either `--save_ ```bash python stable_baselines3_example.py --timesteps=100_000 --resume_model_path=model.zip --inference ``` + +### Use a linear learning rate schedule +By default, the learning rate will be constant throughout training. +If you add `--linear_lr_schedule`, learning rate will decrease with the progress, +and reach 0 at `--timesteps` value. +```bash +python stable_baselines3_example.py --timesteps=1_000_000 --linear_lr_schedule +``` From bebf23cf99dc80176823c2e5a0e930ba975522e9 Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:14:21 +0100 Subject: [PATCH 3/4] More consistent titles. --- docs/ADV_STABLE_BASELINES_3.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md index 8b865151..8c2092a2 100644 --- a/docs/ADV_STABLE_BASELINES_3.md +++ b/docs/ADV_STABLE_BASELINES_3.md @@ -79,20 +79,20 @@ You can optionally set an experiment directory and name to override the default. python stable_baselines3_example.py --experiment_dir="experiments" --experiment_name="experiment1" ``` -### Train a model for 100_000 steps then save and export the model +### Train a model for 100_000 steps then save and export the model: The exported .onnx model can be used by the Godot sync node to run inference from Godot directly, while the saved .zip model can be used to resume training later or run inference from the example script by adding `--inference`. ```bash python stable_baselines3_example.py --timesteps=100_000 --onnx_export_path=model.onnx --save_model_path=model.zip ``` -### Resume training from a saved .zip model +### Resume training from a saved .zip model: This will load the previously saved model.zip, and resume training for another 100 000 steps, so the saved model will have been trained for 200 000 steps in total. Note that the console log will display the `total_timesteps` for the last training session only, so it will show `100000` instead of `200000`. ```bash python stable_baselines3_example.py --timesteps=100_000 --save_model_path=model_200_000_total_steps.zip --resume_model_path=model.zip ``` -### Save periodic checkpoints +### Save periodic checkpoints: You can save periodic checkpoints and later resume training from any checkpoint using the same CL argument as above, or run inference on any checkpoint just like with the saved model. Note that you need to use a unique `experiment_name` or `experiment_dir` for each run so that checkpoints from one run won't overwrite checkpoints from another run. Alternatively, you can remove the folder containing checkpoints from a previous run if you don't need them anymore. @@ -105,13 +105,13 @@ python stable_baselines3_example.py --experiment_name=experiment1 --timesteps=2_ Checkpoints will be saved to `logs\sb3\experiment1_checkpoints` in the above case, the location is affected by `--experiment_dir` and `--experiment_name`. -### Run inference on a saved model for 100_000 steps +### Run inference on a saved model for 100_000 steps: You can run inference on a model that was previously saved using either `--save_model_path` or `--save_checkpoint_frequency`. ```bash python stable_baselines3_example.py --timesteps=100_000 --resume_model_path=model.zip --inference ``` -### Use a linear learning rate schedule +### Use a linear learning rate schedule: By default, the learning rate will be constant throughout training. If you add `--linear_lr_schedule`, learning rate will decrease with the progress, and reach 0 at `--timesteps` value. From 835a2e88002492ece703af7d4d04e65143ee816b Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Fri, 1 Dec 2023 17:34:46 +0100 Subject: [PATCH 4/4] Fix conditional statement --- examples/stable_baselines3_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index dbea0099..cf208b77 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -151,7 +151,7 @@ def func(progress_remaining: float) -> float: if args.resume_model_path is None: - learning_rate = 0.0003 if args.inference else linear_schedule(0.0003) + learning_rate = 0.0003 if not args.linear_lr_schedule else linear_schedule(0.0003) model: PPO = PPO("MultiInputPolicy", env, ent_coef=0.0001,