Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove the optimizer base and learning rate base #56099

Merged
merged 4 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions python/paddle/amp/auto_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ def _is_valid_optimizer(optimizer):
optimizer,
(
paddle.optimizer.Optimizer,
paddle.fluid.optimizer.Optimizer,
DygraphShardingOptimizer,
),
)
Expand All @@ -260,7 +259,7 @@ def check_optimizers(optimizers):
for optimizer in optimizers:
if not _is_valid_optimizer(optimizer):
raise RuntimeError(
"Current train mode is pure fp16, optimizers should be paddle.optimizer.Optimizer or paddle.fluid.optimizer.Optimizer or DygraphShardingOptimizer, but receive {}.".format(
"Current train mode is pure fp16, optimizers should be paddle.optimizer.Optimizer or DygraphShardingOptimizer, but receive {}.".format(
type(optimizer)
)
)
Expand Down
3 changes: 1 addition & 2 deletions python/paddle/distributed/auto_parallel/static/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,10 @@ def __init__(

if optimizer and not isinstance(
optimizer,
(paddle.optimizer.Optimizer, paddle.static.Optimizer),
(paddle.optimizer.Optimizer),
):
raise TypeError(
"'optimizer' must be object of class `paddle.optimizer.Optimizer`"
" or `paddle.static.Optimizer`."
)
self._optimizer = auto_utils.validate_opt(optimizer)

Expand Down
59 changes: 54 additions & 5 deletions python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@

import paddle
from paddle.common_ops_import import LayerHelper
from paddle.fluid import framework
from paddle.fluid.dygraph import base as imperative_base
from paddle.fluid.optimizer import Optimizer
from paddle.framework import core, in_dynamic_mode
from paddle.nn.clip import ClipGradByNorm, append_gradient_clip_ops
from paddle.optimizer import Momentum
from paddle.optimizer import Momentum, Optimizer
from paddle.regularizer import L1Decay, L2Decay
from paddle.static import create_global_var

Expand Down Expand Up @@ -58,8 +58,8 @@ def __init__(
assert momentum is not None
super().__init__(
learning_rate=learning_rate,
parameter_list=parameter_list,
regularization=regularization,
parameters=parameter_list,
weight_decay=regularization,
grad_clip=grad_clip,
name=name,
)
Expand Down Expand Up @@ -396,6 +396,55 @@ def _dgc_op(
op_maker.kOpRoleVarAttrName(), [param_var.name, grad_var.name]
)

def _process_distribute_lookuptable(self, param_grads):
"""
Because distribute lookup table only support SGD optimizer for now, not support
other optimizer and regularization, so we should find the table parameter out,
and avoid to add regularization and other op for it, and add sgd optimize op
for it independently.
:param param_grads(list((Var, Var))): list of (param, grad) pair.
:param loss: the loss variable.
:param startup_program: the startup program
"""
from paddle.distributed.distribute_lookup_table import (
find_distributed_lookup_table,
)

program = framework.default_main_program()
global_block = framework.default_main_program().global_block()
table_name = find_distributed_lookup_table(program)
table_param = None
table_grad = None
new_param_grads = []
for p, g in param_grads:
if p.name == table_name:
if table_param is not None:
raise RuntimeError(
"multi dist table var found, only support one now!"
)
table_param = p
table_grad = g
else:
new_param_grads.append((p, g))
sgd_op = None
if table_param is not None:
param_and_grad = [table_param, table_grad]
with table_param.block.program._optimized_guard(
param_and_grad
), framework.name_scope("optimizer"):
self._create_global_learning_rate()
# create the optimize op
sgd_op = global_block.append_op(
type='sgd',
inputs={
"Param": table_param,
"Grad": table_grad,
"LearningRate": self._create_param_lr(param_and_grad),
},
outputs={"ParamOut": param_and_grad[0]},
)
return new_param_grads, (table_param, table_grad), sgd_op

@imperative_base.no_grad()
def apply_gradients(self, params_grads):
# Note: since we can't use all_reduce_op now,
Expand Down Expand Up @@ -532,7 +581,7 @@ def apply_gradients(self, params_grads):

def apply_optimize(self, loss, startup_program, params_grads):
self._init_dgc_opt()
return self.dgc_opt.apply_optimize(
return self.dgc_opt._apply_optimize(
loss, startup_program=startup_program, params_grads=params_grads
)

Expand Down
2 changes: 0 additions & 2 deletions python/paddle/fluid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
from .initializer import set_global_initializer
from . import layers
from . import dygraph
from . import optimizer
from . import backward
from .backward import gradients
from . import incubate
Expand Down Expand Up @@ -109,7 +108,6 @@
'disable_dygraph',
'enable_imperative',
'disable_imperative',
'optimizer',
'backward',
'LoDTensor',
'LoDTensorArray',
Expand Down
3 changes: 0 additions & 3 deletions python/paddle/fluid/dygraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@
from . import tracer
from .tracer import *

from . import learning_rate_scheduler
from .learning_rate_scheduler import *

__all__ = []
__all__ += base.__all__
__all__ += learning_rate_scheduler.__all__
180 changes: 0 additions & 180 deletions python/paddle/fluid/dygraph/learning_rate_scheduler.py

This file was deleted.

Loading