pytorch · SalmanMohammadi · Aug 5, 2024 · May 9, 2024 · May 10, 2024 · May 12, 2024
diff --git a/recipes/configs/mistral/7B_lora_ppo.yaml b/recipes/configs/mistral/7B_lora_ppo.yaml
@@ -0,0 +1,151 @@
+# Config for single device full finetuning in full_finetune_single_device.py
+# using a Mistral 7B model
+#
+# This config uses hyperparameters based on small set of experiments and information
+# available on various forums. These are not meant to replicate the numbers
+# from the paper
+#
+# This config assumes that you've run the following command before launching
+# this run:
+#   tune download mistralai/Mistral-7B-v0.1 --hf-token <HF_TOKEN> --output-dir /tmp/Mistral-7B-v0.1
+#
+# The default config uses an optimizer from bitsandbytes. If you do not have it installed,
+# you can install it with
+#   pip install bitsandbytes
+#
+# To launch on a single device, run the following command from root:
+#   tune run full_finetune_single_device --config mistral/7B_full_low_memory
+#
+# You can add specific overrides through the command line. For example
+# to override the checkpointer directory while launching training
+# you can run:
+#   tune run full_finetune_single_device --config mistral/7B_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#
+# This config works only for training on single device.
+
+# Tokenizer
+tokenizer:
+  _component_: torchtune.models.mistral.mistral_tokenizer
+  path:  ./target/weights/mistral_base/tokenizer.model
+
+# Dataset
+dataset:
+  _component_: torchtune.datasets.text_completion_dataset
+  source: nvidia/HelpSteer
+  max_seq_len: 32
+  split: train
+  column: prompt
+
+seed: null
+shuffle: True
+
+# Model Arguments
+model:
+  _component_: torchtune.models.mistral.lora_mistral_lm_with_value_head_7b
+  lora_attn_modules: ['q_proj', 'v_proj']
+  apply_lora_to_mlp: False
+  apply_lora_to_output: False
+  lora_rank: 8
+  lora_alpha: 16
+
+reward_model:
+  _component_: torchtune.models.mistral.mistral_classifier_7b
+
+
+checkpointer:
+  _component_: torchtune.utils.FullModelHFCheckpointer
+  checkpoint_dir: ./target/weights/mistral_base/
+  checkpoint_files: [
+    "pytorch_model-00001-of-00003.bin",
+    "pytorch_model-00002-of-00003.bin",
+    "pytorch_model-00003-of-00003.bin"
+  ]
+
+  recipe_checkpoint: null
+  adapter_checkpoint: null
+  output_dir: ${output_dir}/base/
+  model_type: LM_MISTRAL
+
+reward_checkpointer:
+  _component_: torchtune.utils.FullModelHFCheckpointer
+  checkpoint_dir: ./target/weights/mistral_reward/
+  checkpoint_files: [
+    "model-00001-of-00003.safetensors",
+    "model-00002-of-00003.safetensors",
+    "model-00003-of-00003.safetensors"
+  ]
+  output_dir: ${output_dir}/reward
+  model_type: MISTRAL_REWARD
+
+resume_from_checkpoint: False
+output_dir: target/full_7b
+
+initialise_value_head_from_reward_model: True
+
+# Fine-tuning arguments
+batch_size: 64
+num_steps: 10000
+ppo_epochs: 2
+ppo_batch_size: 2
+ppo_backward_batch_size: 2
+gradient_accumulation_steps: 1
+whiten_rewards: False
+
+# Generation arguments
+forward_batch_size: 2
+max_generated_tokens: 32
+temperature: 0.7
+top_k: null
+
+# Reward masking args
+truncate_after_tokens: null
+penalise_no_eos: False
+reward_penalty: -1.0
+
+# KL controller arguments
+# kl_controller:
+#   _component_: torchtune.utils.ppo_utils.AdaptiveKLController
+#   init_kl_coef: 0.15
+#   kl_target: 6
+#   kl_horizon: 10000
+
+# or
+kl_controller:
+  _component_: torchtune.utils.ppo_utils.FixedKLController
+  kl_coef: 0.05
+
+optimizer:
+  _component_: torch.optim.AdamW
+  weight_decay: 0.01
+  lr: 1.41e-5
+
+loss:
+  _component_: torchtune.modules.loss.PPOLoss
+  gamma: 1
+  lmbda: 0.95
+  epsilon: 0.2
+  value_coeff: 0.1
+  value_clip_range: 0.1
+
+# Training env
+device: mps
+
+# Memory management
+enable_activation_checkpointing: True
+
+# Reduced precision
+dtype: bf16
+
+# Logging
+metric_logger:
+  _component_: torchtune.utils.metric_logging.StdoutLogger
+  log_dir: ${output_dir}
+
+log_every_n_steps: 1
+log_peak_memory_stats: False
+
+
+profiler:
+  _component_: torchtune.utils.profiler
+  enabled: False
+  output_dir: ${output_dir}/torchtune_perf_tracing.json