Skip to content

Commit

Permalink
support offload/reload optimizer's states for custom device (#9467)
Browse files Browse the repository at this point in the history
  • Loading branch information
tianhaodongbd authored Dec 2, 2024
1 parent 0b6284e commit eae8d9f
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions paddlenlp/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
)
from ..utils.import_utils import is_datasets_available, is_paddle_cuda_available
from ..utils.log import logger
from ..utils.tools import get_env_device
from .argparser import strtobool
from .integrations import get_reporting_integration_callbacks
from .plugins.timer import RuntimeTimer, get_timers, set_timers
Expand Down Expand Up @@ -1773,10 +1774,6 @@ def apply_decay_param_fun(x):
return self.optimizer

def _apply_to_optimizer(self, action):
if "gpu" not in paddle.device.get_device():
logger.warning("offload/reload optimizer's states is only supported on GPU devices.")
return

attributes = [
("_accumulators", "_moment1_acc_str"),
("_accumulators", "_moment2_acc_str"),
Expand All @@ -1791,13 +1788,22 @@ def _apply_to_optimizer(self, action):
target_attr = target_attr[getattr(self.optimizer, attr[1])]

for key, value in target_attr.items():
target_attr[key] = getattr(value, action)()
if get_env_device() == "gpu":
target_attr[key] = getattr(value, action)()
else:
target_attr[key] = getattr(value, "to")(action)

def _offload_optimizer(self):
self._apply_to_optimizer("pin_memory")
if get_env_device() == "gpu":
self._apply_to_optimizer("pin_memory")
else:
self._apply_to_optimizer("cpu")

def _reload_optimizer(self):
self._apply_to_optimizer("cuda")
if get_env_device() == "gpu":
self._apply_to_optimizer("cuda")
else:
self._apply_to_optimizer(get_env_device())

def _load_rng_state(self, checkpoint):
# Load RNG states from `checkpoint`
Expand Down

0 comments on commit eae8d9f

Please sign in to comment.