Skip to content

Commit

Permalink
doc strings
Browse files Browse the repository at this point in the history
Signed-off-by: Malay Nagda <malayn@nvidia.com>
  • Loading branch information
malay-nagda committed Dec 23, 2024
1 parent e005e8f commit cd8dbb2
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 3 deletions.
5 changes: 5 additions & 0 deletions scripts/llm/performance/llama3_405b.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def llama3_405b_performance_recipe(
vp_size: Optional[int],
max_steps: int,
):
"""
llama3 405b pre-train recipe aimed at achieving best possible performance.
NOTE: Use fp8 precision training with caution. It might not give desirable results.
"""
recipe = pretrain_recipe(dir=log_dir, performance_mode=True)

# data module configs
Expand Down
5 changes: 5 additions & 0 deletions scripts/llm/performance/llama3_70b.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def llama3_70b_performance_recipe(
vp_size: Optional[int],
max_steps: int,
):
"""
llama3 70b pre-train recipe aimed at achieving best possible performance.
NOTE: Use fp8 precision training with caution. It might not give desirable results.
"""
recipe = pretrain_recipe(dir=log_dir, performance_mode=True)

# data module configs
Expand Down
5 changes: 5 additions & 0 deletions scripts/llm/performance/llama3_8b.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def llama3_8b_performance_recipe(
vp_size: Optional[int],
max_steps: int,
):
"""
llama3 8b pre-train recipe aimed at achieving best possible performance.
NOTE: Use fp8 precision training with caution. It might not give desirable results.
"""
recipe = pretrain_recipe(dir=log_dir, performance_mode=True)

# data module configs
Expand Down
14 changes: 11 additions & 3 deletions scripts/llm/performance/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@

import argparse
import os
from typing import Any, List, Optional
from typing import List, Optional

import nemo_run as run
from lightning.pytorch.callbacks.callback import Callback

from nemo.collections.common.tokenizers.huggingface import AutoTokenizer
from nemo.collections.llm.recipes.llama3_8b import MegatronCommOverlapCallback


def slurm_executor(
account: str,
partition: str,
Expand All @@ -35,6 +34,10 @@ def slurm_executor(
custom_env_vars: Optional[dict[str, str]] = None,
retries: int = 0,
) -> run.SlurmExecutor:
"""
Slurm cluster definition with appropriate cluster params and NeMo container params needed for pre-training
and fine-tuning experiments
"""
if not (log_dir and account and partition and nodes and num_gpus_per_node):
raise RuntimeError(
"Please set user, host, remote_job_dir, account, partition, nodes and devices args for using this ",
Expand Down Expand Up @@ -85,7 +88,7 @@ def hf_tokenizer(model_name: str) -> run.Config[AutoTokenizer]:
AutoTokenizer first searches for tokenizer files locally in env var 'NEMO_HOME'.
If tokenizer files are not present locally, AutoTokenizer will try downloading from HuggingFace.
In the case tokenizer needs downloading, make sure env vars- 'TRANSFORMERS_OFFLINE=0' and
'HF_TOKEN:<token_value>' are set inside NeMo container.
'HF_TOKEN:<token_value>' are defined in SlurmExecutor.env_vars
"""
return run.Config(
AutoTokenizer,
Expand All @@ -95,6 +98,11 @@ def hf_tokenizer(model_name: str) -> run.Config[AutoTokenizer]:


def get_comm_overlap_callback_idx(callbacks: List[Callback]):
"""
nemo.lightning.Trainer has a list of callbacks defined. This method identifies index of MegatronCommOverlapCallback
from the list defined in recipes in nemo.collections.llm.recipes. The index is needed to override ddp communication
params
"""
if callbacks: # default is None in lightning
for idx, callback in enumerate(callbacks):
if isinstance(callback, MegatronCommOverlapCallback):
Expand Down

0 comments on commit cd8dbb2

Please sign in to comment.