huggingface · vwxyzjn · Jan 26, 2024 · Dec 8, 2023 · Dec 11, 2023 · Dec 11, 2023
diff --git a/benchmark/benchmark_and_report.sh b/benchmark/benchmark_and_report.sh
@@ -1,19 +1,3 @@
-#### Step 1: create a work directory:
-# this is necessary because another github action job will remove
-# the entire directory, which slurm depends on.
-# https://stackoverflow.com/questions/4632028/how-to-create-a-temporary-directory
-MY_SLURM_TMP_DIR=/fsx/costa/slurm_tmpdir
-mkdir -p $MY_SLURM_TMP_DIR
-WORK_DIR=`mktemp -d -p "$MY_SLURM_TMP_DIR"`
-cp -r "$PWD" "$WORK_DIR"
-cd "$WORK_DIR/$(basename "$PWD")"
-echo WORK_DIR: $WORK_DIR
-
-#### Step 2: actual work starts:
-echo PATH is $PATH
-echo PYTHONPATH is $PYTHONPATH
-echo whcih python is $(which python)
-
 export WANDB_ENTITY=huggingface
 bash $BENCHMARK_SCRIPT > output.txt
 

diff --git a/benchmark/benchmark_level1.sh b/benchmark/benchmark_level1.sh
@@ -1,6 +1,6 @@
 # hello world experiment
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \

diff --git a/benchmark/benchmark_level2.sh b/benchmark/benchmark_level2.sh
@@ -1,6 +1,6 @@
 # compound experiments: gpt2xl + grad_accu
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_gpt2xl_grad_accu --ppo_config.model_name gpt2-xl --ppo_config.mini_batch_size 16 --ppo_config.gradient_accumulation_steps 8 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_gpt2xl_grad_accu --model_name gpt2-xl --mini_batch_size 16 --gradient_accumulation_steps 8 --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \
@@ -12,7 +12,7 @@ python benchmark/benchmark.py \
 
 # compound experiments: Cerebras-GPT-6.7B + deepspeed zero2 + grad_accu
 python benchmark/benchmark.py \
-    --command "accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml examples/scripts/ppo.py --ppo_config.exp_name ppo_Cerebras-GPT-6.7B_grad_accu_deepspeed_stage2  --ppo_config.batch_size 32  --ppo_config.mini_batch_size 32 --ppo_config.log_with wandb --ppo_config.model_name cerebras/Cerebras-GPT-6.7B --ppo_config.reward_model sentiment-analysis:cerebras/Cerebras-GPT-6.7B" \
+    --command "accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml examples/scripts/ppo.py --exp_name ppo_Cerebras-GPT-6.7B_grad_accu_deepspeed_stage2  --batch_size 32  --mini_batch_size 32 --log_with wandb --model_name cerebras/Cerebras-GPT-6.7B --reward_model sentiment-analysis:cerebras/Cerebras-GPT-6.7B" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \

diff --git a/benchmark/benchmark_level3.sh b/benchmark/benchmark_level3.sh
@@ -1,6 +1,6 @@
 ## w/ and w/o gradient accumulation
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_step_grad_accu --ppo_config.mini_batch_size 1 --ppo_config.gradient_accumulation_steps 128 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_step_grad_accu --mini_batch_size 1 --gradient_accumulation_steps 128 --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \
@@ -12,7 +12,7 @@ python benchmark/benchmark.py \
 
 ## w/ different models (gpt2, gpt2-xl, falcon, llama2)
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_gpt2 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_gpt2 --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \
@@ -22,7 +22,7 @@ python benchmark/benchmark.py \
     --slurm-total-cpus 12 \
     --slurm-template-path benchmark/trl.slurm_template
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_falcon_rw_1b --ppo_config.model_name tiiuae/falcon-rw-1b --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_falcon_rw_1b --model_name tiiuae/falcon-rw-1b --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \
@@ -35,7 +35,7 @@ python benchmark/benchmark.py \
 
 ## w/ and w/o PEFT
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_peft --use_peft --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_peft --use_peft --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \

diff --git a/benchmark/post_github_comment.sbatch b/benchmark/post_github_comment.sbatch
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH --job-name=trl
-#SBATCH --partition=production-cluster
+#SBATCH --partition=hopper-cpu
 #SBATCH --ntasks=1
 #SBATCH --output=slurm/logs/%x_%j.out
 

diff --git a/benchmark/regression_test.sh b/benchmark/regression_test.sh
@@ -0,0 +1,3 @@
+BENCHMARK_SCRIPT="benchmark/benchmark_level1.sh" \
+BENCHMARK_PLOT_SCRIPT="benchmark/benchmark_level1_plot.sh" \
+bash benchmark/benchmark_and_report.sh
diff --git a/benchmark/trl.slurm_template b/benchmark/trl.slurm_template
@@ -1,16 +1,16 @@
 #!/bin/bash
 #SBATCH --job-name=trl
-#SBATCH --partition=production-cluster
+#SBATCH --partition=hopper-prod
 #SBATCH --gpus-per-task={{gpus_per_task}}
 #SBATCH --cpus-per-gpu={{cpus_per_gpu}}
 #SBATCH --ntasks={{ntasks}}
 #SBATCH --output=slurm/logs/%x_%j.out
 #SBATCH --array={{array}}
-#SBATCH --exclude=ip-26-0-156-239,ip-26-0-148-151,ip-26-0-146-212,ip-26-0-145-137,ip-26-0-146-249,ip-26-0-146-149,ip-26-0-147-233,ip-26-0-145-154,ip-26-0-144-35,ip-26-0-144-189,ip-26-0-146-183,ip-26-0-147-120,ip-26-0-144-95,ip-26-0-145-193
+##SBATCH --exclude=ip-26-0-149-199
 {{nodes}}
 
 seeds={{seeds}}
 seed=${seeds[$SLURM_ARRAY_TASK_ID % {{len_seeds}}]}
 
 echo "Running task $SLURM_ARRAY_TASK_ID with seed: $seed"
-srun {{command}} --ppo_config.seed $seed
+srun {{command}} --seed $seed
diff --git a/docs/source/sentiment_tuning.mdx b/docs/source/sentiment_tuning.mdx
@@ -25,7 +25,7 @@ accelerate launch examples/scripts/ppo.py # launches training
 # 3. get help text and documentation
 python examples/scripts/ppo.py --help
 # 4. configure logging with wandb and, say, mini_batch_size=1 and gradient_accumulation_steps=16
-python examples/scripts/ppo.py --ppo_config.log_with wandb --ppo_config.mini_batch_size 1 --ppo_config.gradient_accumulation_steps 16
+python examples/scripts/ppo.py --log_with wandb --mini_batch_size 1 --gradient_accumulation_steps 16
 ```
 
 Note: if you don't want to log with `wandb` remove `log_with="wandb"` in the scripts/notebooks. You can also replace it with your favourite experiment tracker that's [supported by `accelerate`](https://huggingface.co/docs/accelerate/usage_guides/tracking).
@@ -42,7 +42,7 @@ Below are some benchmark results for `examples/scripts/ppo.py`. To reproduce loc
 
 ```bash
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -61,7 +61,7 @@ python benchmark/benchmark.py \
 
 ```bash
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_step_grad_accu --ppo_config.mini_batch_size 1 --ppo_config.gradient_accumulation_steps 128 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_step_grad_accu --mini_batch_size 1 --gradient_accumulation_steps 128 --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -79,7 +79,7 @@ python benchmark/benchmark.py \
 
 ```bash
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_gpt2 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_gpt2 --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -89,7 +89,7 @@ python benchmark/benchmark.py \
     --slurm-total-cpus 12 \
     --slurm-template-path benchmark/trl.slurm_template
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_gpt2xl_grad_accu --ppo_config.model_name gpt2-xl --ppo_config.mini_batch_size 16 --ppo_config.gradient_accumulation_steps 8 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_gpt2xl_grad_accu --model_name gpt2-xl --mini_batch_size 16 --gradient_accumulation_steps 8 --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -99,7 +99,7 @@ python benchmark/benchmark.py \
     --slurm-total-cpus 12 \
     --slurm-template-path benchmark/trl.slurm_template
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_falcon_rw_1b --ppo_config.model_name tiiuae/falcon-rw-1b --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_falcon_rw_1b --model_name tiiuae/falcon-rw-1b --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -116,7 +116,7 @@ python benchmark/benchmark.py \
 
 ```
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_peft --use_peft --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_peft --use_peft --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \

diff --git a/examples/scripts/ddpo.py b/examples/scripts/ddpo.py
@@ -11,59 +11,51 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+"""
+python examples/scripts/ddpo.py \
+    --num_epochs=200 \
+    --train_gradient_accumulation_steps=1 \
+    --sample_num_steps=50 \
+    --sample_batch_size=6 \
+    --train_batch_size=3 \
+    --sample_num_batches_per_epoch=4 \
+    --per_prompt_stat_tracking=True \
+    --per_prompt_stat_tracking_buffer_size=32 \
+    --tracker_project_name="stable_diffusion_training" \
+    --log_with="wandb"
+"""
 import os
 from dataclasses import dataclass, field
 
 import numpy as np
 import torch
 import torch.nn as nn
-import tyro
 from huggingface_hub import hf_hub_download
 from huggingface_hub.utils import EntryNotFoundError
-from transformers import CLIPModel, CLIPProcessor
+from transformers import CLIPModel, CLIPProcessor, HfArgumentParser
 
 from trl import DDPOConfig, DDPOTrainer, DefaultDDPOStableDiffusionPipeline
 from trl.import_utils import is_npu_available, is_xpu_available
 
 
 @dataclass
 class ScriptArguments:
-    hf_user_access_token: str
-    pretrained_model: str = "runwayml/stable-diffusion-v1-5"
-    """the pretrained model to use"""
-    pretrained_revision: str = "main"
-    """the pretrained model revision to use"""
-    hf_hub_model_id: str = "ddpo-finetuned-stable-diffusion"
-    """HuggingFace repo to save model weights to"""
-    hf_hub_aesthetic_model_id: str = "trl-lib/ddpo-aesthetic-predictor"
-    """HuggingFace model ID for aesthetic scorer model weights"""
-    hf_hub_aesthetic_model_filename: str = "aesthetic-model.pth"
-    """HuggingFace model filename for aesthetic scorer model weights"""
-    use_lora: bool = True
-    """Whether to use LoRA."""
-
-    ddpo_config: DDPOConfig = field(
-        default_factory=lambda: DDPOConfig(
-            num_epochs=200,
-            train_gradient_accumulation_steps=1,
-            sample_num_steps=50,
-            sample_batch_size=6,
-            train_batch_size=3,
-            sample_num_batches_per_epoch=4,
-            per_prompt_stat_tracking=True,
-            per_prompt_stat_tracking_buffer_size=32,
-            tracker_project_name="stable_diffusion_training",
-            log_with="wandb",
-            project_kwargs={
-                "logging_dir": "./logs",
-                "automatic_checkpoint_naming": True,
-                "total_limit": 5,
-                "project_dir": "./save",
-            },
-        )
+    pretrained_model: str = field(
+        default="runwayml/stable-diffusion-v1-5", metadata={"help": "the pretrained model to use"}
+    )
+    pretrained_revision: str = field(default="main", metadata={"help": "the pretrained model revision to use"})
+    hf_hub_model_id: str = field(
+        default="ddpo-finetuned-stable-diffusion", metadata={"help": "HuggingFace repo to save model weights to"}
+    )
+    hf_hub_aesthetic_model_id: str = field(
+        default="trl-lib/ddpo-aesthetic-predictor",
+        metadata={"help": "HuggingFace model ID for aesthetic scorer model weights"},
+    )
+    hf_hub_aesthetic_model_filename: str = field(
+        default="aesthetic-model.pth",
+        metadata={"help": "HuggingFace model filename for aesthetic scorer model weights"},
     )
+    use_lora: bool = field(default=True, metadata={"help": "Whether to use LoRA."})
 
 
 class MLP(nn.Module):
@@ -192,14 +184,21 @@ def image_outputs_logger(image_data, global_step, accelerate_logger):
 
 
 if __name__ == "__main__":
-    args = tyro.cli(ScriptArguments)
+    parser = HfArgumentParser((ScriptArguments, DDPOConfig))
+    args, ddpo_config = parser.parse_args_into_dataclasses()
+    ddpo_config.project_kwargs = {
+        "logging_dir": "./logs",
+        "automatic_checkpoint_naming": True,
+        "total_limit": 5,
+        "project_dir": "./save",
+    }
 
     pipeline = DefaultDDPOStableDiffusionPipeline(
         args.pretrained_model, pretrained_model_revision=args.pretrained_revision, use_lora=args.use_lora
     )
 
     trainer = DDPOTrainer(
-        args.ddpo_config,
+        ddpo_config,
         aesthetic_scorer(args.hf_hub_aesthetic_model_id, args.hf_hub_aesthetic_model_filename),
         prompt_fn,
         pipeline,
@@ -208,4 +207,4 @@ def image_outputs_logger(image_data, global_step, accelerate_logger):
 
     trainer.train()
 
-    trainer.push_to_hub(args.hf_hub_model_id, token=args.hf_user_access_token)
+    trainer.push_to_hub(args.hf_hub_model_id)