Skip to content

Commit

Permalink
Update param
Browse files Browse the repository at this point in the history
  • Loading branch information
fduwjj committed Jul 29, 2024
1 parent a6ed602 commit bbcf7be
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions train_configs/llama3_405b.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# torchtitan Config.toml
# NOTE: this toml config is a preset for 64 A100 GPUs.
# NOTE: this toml config is a preset for 128 H100 GPUs.
# This is still WIP so please use with cautions.

[job]
dump_folder = "./outputs"
Expand All @@ -23,14 +24,14 @@ tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model"

[optimizer]
name = "AdamW"
lr = 1.5e-4
lr = 0.8e-4 # WIP right now.

[training]
batch_size = 16
batch_size = 1
seq_len = 8192
warmup_steps = 200 # lr scheduler warm up, normally 20% of the train steps
warmup_steps = 600 # lr scheduler warm up, normally 20% of the train steps
max_norm = 1.0 # grad norm clipping
steps = 1000
steps = 3000
data_parallel_degree = -1
tensor_parallel_degree = 8 # 8-way TP
enable_float8_linear = false
Expand All @@ -50,4 +51,4 @@ export_dtype = "float32"
async_mode = "disabled" # ["disabled", "async", "async_with_pinned_mem"]

[activation_checkpoint]
mode = 'full'
mode = 'full' # ['none', 'selective', 'full']

0 comments on commit bbcf7be

Please sign in to comment.