Skip to content

Commit

Permalink
make 'load_directly_on_device' configurable (#9657) (#9674)
Browse files Browse the repository at this point in the history
Signed-off-by: ashors1 <ashors@nvidia.com>
Co-authored-by: Anna Shors <71393111+ashors1@users.noreply.github.com>
Co-authored-by: Pablo Garay <palenq@gmail.com>
Co-authored-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Tugrul Konuk <ertkonuk@gmail.com>
  • Loading branch information
4 people authored and ertkonuk committed Jul 19, 2024
1 parent 8d1b19a commit 872554b
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions nemo/lightning/pytorch/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def __init__(
ckpt_parallel_save_within_dp=False,
ckpt_parallel_load=False,
ckpt_parallel_save_optim=True,
ckpt_load_directly_on_device=True,
setup_optimizers: bool = True,
init_model_parallel: bool = True,
**kwargs,
Expand Down Expand Up @@ -147,6 +148,7 @@ def __init__(
self.parallel_save_within_dp = ckpt_parallel_save_within_dp
self.parallel_load = ckpt_parallel_load
self.parallel_save_optim = ckpt_parallel_save_optim
self.load_directly_on_device = ckpt_load_directly_on_device

self._ddp = ddp
if ddp == "megatron":
Expand Down Expand Up @@ -582,6 +584,7 @@ def checkpoint_io(self) -> CheckpointIO:
parallel_save=self.parallel_save,
parallel_save_within_dp=self.parallel_save_within_dp,
parallel_load=self.parallel_load,
load_directly_on_device=self.load_directly_on_device,
)
if async_save:
self._checkpoint_io = AsyncFinalizableCheckpointIO(self._checkpoint_io)
Expand Down

0 comments on commit 872554b

Please sign in to comment.