Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DoubleGrad Cherry-pick PR #3 #41895

Merged
merged 1 commit into from
Apr 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,16 @@
### Global Variables ###
########################
ops_to_fill_zero_for_empty_grads = set([
"split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad",
"sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad",
"add_triple_grad"
"split_grad",
"rnn_grad",
"matmul_double_grad",
"matmul_triple_grad",
"sigmoid_double_grad",
"sigmoid_triple_grad",
"add_double_grad",
"add_triple_grad",
"multiply_double_grad",
"multiply_triple_grad",
])

# For API dispatch used at python-level
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/eager/autograd_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ class AutogradMeta : public AbstractAutogradMeta {

GradNodeBase* GradNode() const { return grad_node_.get(); }

void ResetGradNode() { grad_node_.reset(); }

void SetSingleOutRankWithSlot(size_t slot_id, size_t rank) {
out_slot_id_ = slot_id;
out_rank_ = rank;
Expand Down
20 changes: 11 additions & 9 deletions paddle/fluid/eager/backward.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class GeneralGrad {
auto* target_node = auto_grad_meta->GetMutableGradNode().get();

if (orig_to_copied_node_mapping_.count(target_node)) {
target_node = orig_to_copied_node_mapping_[target_node];
target_node = orig_to_copied_node_mapping_[target_node].get();
} else {
VLOG(6) << "Unable to find target node in "
"orig_to_copied_node_mapping_, likely indicating an "
Expand Down Expand Up @@ -261,7 +261,7 @@ class GeneralGrad {

auto* target_node = auto_grad_meta->GetMutableGradNode().get();
if (orig_to_copied_node_mapping_.count(target_node)) {
target_node = orig_to_copied_node_mapping_[target_node];
target_node = orig_to_copied_node_mapping_[target_node].get();
} else {
VLOG(6) << "Unable to find target node in "
"orig_to_copied_node_mapping_, likely indicating an unused "
Expand Down Expand Up @@ -349,12 +349,12 @@ class GeneralGrad {

GradNodeBase* CopyGradNode(const std::shared_ptr<GradNodeBase>& orig_node) {
if (orig_to_copied_node_mapping_.count(orig_node.get())) {
return orig_to_copied_node_mapping_[orig_node.get()];
return orig_to_copied_node_mapping_[orig_node.get()].get();
}
std::shared_ptr<GradNodeBase> copied_node = orig_node->Copy();

// Save node and update mapping
orig_to_copied_node_mapping_[orig_node.get()] = copied_node.get();
orig_to_copied_node_mapping_[orig_node.get()] = copied_node;
copied_grad_nodes_.push_back(copied_node);

return copied_node.get();
Expand All @@ -379,7 +379,7 @@ class GeneralGrad {
paddle::platform::errors::Fatal(
"Cannot reconstruct backward graph,"
"unable to find copied target for certain grad node."));
GradNodeBase* copied_node = orig_to_copied_node_mapping_[orig_node];
GradNodeBase* copied_node = orig_to_copied_node_mapping_[orig_node].get();

const std::vector<std::vector<Edge>>& orig_edges = orig_node->GetEdges();
std::vector<std::vector<Edge>>& copied_edges =
Expand All @@ -397,13 +397,12 @@ class GeneralGrad {
std::shared_ptr<GradNodeBase> copied_next_node;
if (orig_to_copied_node_mapping_.count(orig_next_node.get())) {
copied_next_node =
orig_to_copied_node_mapping_[orig_next_node.get()]
->shared_from_this();
orig_to_copied_node_mapping_[orig_next_node.get()];

} else {
copied_next_node = orig_next_node->Copy();
orig_to_copied_node_mapping_[orig_next_node.get()] =
copied_next_node.get();
copied_next_node;
copied_grad_nodes_.push_back(copied_next_node);
}

Expand Down Expand Up @@ -436,7 +435,8 @@ class GeneralGrad {
std::unordered_map<GradNodeBase*, paddle::experimental::Tensor> results_map;

std::vector<std::shared_ptr<GradNodeBase>> copied_grad_nodes_;
std::unordered_map<GradNodeBase*, GradNodeBase*> orig_to_copied_node_mapping_;
std::unordered_map<GradNodeBase*, std::shared_ptr<GradNodeBase>>
orig_to_copied_node_mapping_;

DISABLE_COPY_AND_ASSIGN(GeneralGrad);
};
Expand Down Expand Up @@ -534,6 +534,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(

// GeneralGrad
bool is_general_grad = !inputs.empty();
if (is_general_grad) GeneralGrad::Instance().Clear();

/* --- Initialization --- */
// 1. Init queue with starting nodes
Expand Down Expand Up @@ -746,6 +747,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
VLOG(6) << "We get grad_output_tensor with slot: " << i
<< ", rank: " << j << " as uninitialized or undefined tensor";
}

VLOG(6) << "Get Edge and grad_output_tensor with slot: " << i
<< ", rank: " << j
<< " 's name is: " << grad_output_tensor.name();
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/grad_node_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class GradSlotMeta {
std::shared_ptr<phi::DenseTensorMeta> meta_ = nullptr;
};

class GradNodeBase : public std::enable_shared_from_this<GradNodeBase> {
class GradNodeBase {
public:
GradNodeBase() { VLOG(6) << "Construct GradNodeBase"; }
GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num);
Expand Down
31 changes: 16 additions & 15 deletions paddle/fluid/eager/tensor_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ class TensorWrapper {

auto* tensor_autograd_meta = EagerUtils::nullable_autograd_meta(tensor);
if (tensor_autograd_meta) {
auto autograd_meta = std::make_shared<AutogradMeta>(
Edge(nullptr, EagerUtils::OutRankInfo(tensor)));
autograd_meta->SetStopGradient(tensor_autograd_meta->StopGradient());
auto autograd_meta =
std::make_shared<AutogradMeta>(*tensor_autograd_meta);
autograd_meta->ResetGradNode();
intermidiate_tensor_.set_autograd_meta(autograd_meta);
weak_grad_node_ = tensor_autograd_meta->GetMutableGradNode();
}
Expand All @@ -98,8 +98,11 @@ class TensorWrapper {
check_inplace_version();

// if it's full_reserved just return the full copy of tensor
paddle::experimental::Tensor recovered_tensor = intermidiate_tensor_;
if (!full_reserved_) {
if (full_reserved_) {
return intermidiate_tensor_;
} else {
paddle::experimental::Tensor recovered_tensor = intermidiate_tensor_;

std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock();
if (new_grad_node) {
VLOG(3) << "Recovered TensorWrapper with GradNode "
Expand All @@ -109,17 +112,15 @@ class TensorWrapper {
}
auto* intermediate_autograd_meta =
EagerUtils::unsafe_autograd_meta(intermidiate_tensor_);
auto p_ab_autograd_meta = std::make_shared<AutogradMeta>(
Edge(new_grad_node, intermediate_autograd_meta->OutRankInfo()));
p_ab_autograd_meta->SetStopGradient(
intermediate_autograd_meta->StopGradient());

recovered_tensor.set_autograd_meta(
std::static_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
p_ab_autograd_meta));
}
auto p_ab_autograd_meta =
std::make_shared<AutogradMeta>(*intermediate_autograd_meta);
if (new_grad_node) {
p_ab_autograd_meta->SetGradNode(new_grad_node);
}
recovered_tensor.set_autograd_meta(p_ab_autograd_meta);

return recovered_tensor;
return recovered_tensor;
}
}

void check_inplace_version() {
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/infermeta/backward.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ void GatherNdGradInferMeta(const MetaTensor& x,
const MetaTensor& out_grad,
MetaTensor* x_grad);

void GeneralUnaryGradInferMeta(const MetaTensor& x, MetaTensor* dx);

void GeneralBinaryGradInferMeta(const MetaTensor& x,
const MetaTensor& y,
MetaTensor* dx,
Expand Down Expand Up @@ -132,8 +134,6 @@ void GeneralQuinaryGradInferMeta(const MetaTensor& x,
MetaTensor* dk,
MetaTensor* dl);

void GeneralUnaryGradInferMeta(const MetaTensor& x, MetaTensor* dx);

void GumbelSoftmaxGradInferMeta(const MetaTensor& out,
const MetaTensor& dout,
int axis,
Expand Down
4 changes: 4 additions & 0 deletions python/paddle/autograd/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,8 +943,10 @@ def func(x, y):
# [0., 1., 0., 1., 0., 1., 0., 1.]]))

'''

inputs = _as_tensors(inputs)
outputs = _as_tensors(func(*inputs))

batch_size = inputs[0].shape[0]
for input in inputs:
assert input.shape[
Expand All @@ -961,12 +963,14 @@ def func(x, y):
for i, flat_output in enumerate(flat_outputs):
jac_i = list([] for _ in range(fin_size))
for k in range(flat_output.shape[1]):

row_k = paddle.grad(
flat_output[:, k],
inputs,
create_graph=create_graph,
retain_graph=True,
allow_unused=allow_unused)

for j in range(fin_size):
jac_i[j].append(
paddle.reshape(
Expand Down
Loading