Skip to content

Commit

Permalink
add main changes
Browse files Browse the repository at this point in the history
Signed-off-by: dimapihtar <dpihtar@gmail.com>
  • Loading branch information
dimapihtar committed Sep 27, 2024
1 parent e29e2d2 commit b47b28f
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 5 deletions.
3 changes: 2 additions & 1 deletion nemo/collections/llm/recipes/log/default.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import timedelta
from typing import Optional

from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
Expand Down Expand Up @@ -34,7 +35,7 @@ def default_log(
nl.ModelCheckpoint,
save_last=True,
save_top_k=10,
every_n_train_steps=200,
train_time_interval=Config(timedelta, minutes=15),
filename="{model_name}--{val_loss:.2f}-{step}-{consumed_samples}",
)

Expand Down
2 changes: 1 addition & 1 deletion nemo/lightning/pytorch/callbacks/model_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(
train_time_interval: Optional[timedelta] = None,
save_on_train_epoch_end: Optional[bool] = False, # Save after training, not after validation
save_optim_on_train_end: Optional[bool] = False,
always_save_context: bool = False,
always_save_context: bool = True,
save_context_on_train_end: bool = True,
**kwargs,
):
Expand Down
4 changes: 2 additions & 2 deletions nemo/lightning/pytorch/strategies/megatron_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,12 @@ def __init__(
lazy_init: bool = False,
pipeline_dtype: Optional[torch.dtype] = None,
save_ckpt_format: str = "torch_dist",
ckpt_async_save: bool = False,
ckpt_async_save: bool = True,
ckpt_torch_dist_multiproc: int = None, ## TODO(ashors): put elsewhere?
ckpt_assume_constant_structure: bool = False,
ckpt_parallel_save: bool = True,
ckpt_parallel_save_within_dp: bool = False,
ckpt_parallel_load: bool = False,
ckpt_parallel_load: bool = True,
ckpt_parallel_save_optim: bool = True,
ckpt_load_directly_on_device: bool = True,
setup_optimizers: bool = True,
Expand Down
2 changes: 1 addition & 1 deletion nemo/lightning/run/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class PreemptionPlugin(run.Plugin):
By default, the list includes NeMo's preemption callback.
"""

preempt_time: int = 300
preempt_time: int = 60
callbacks: list[run.Config[Callback]] = field(default_factory=lambda: [run.Config(PreemptionCallback)])

def setup(self, task: run.Partial | run.Script, executor: run.Executor):
Expand Down

0 comments on commit b47b28f

Please sign in to comment.