Skip to content

Commit

Permalink
refine error msg for some autograd code (#8541)
Browse files Browse the repository at this point in the history
* refine error msg for some autograd code

* refine error msg

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
pingzhuu and mergify[bot] authored Jul 3, 2022
1 parent 7b4cf12 commit 4d106db
Show file tree
Hide file tree
Showing 10 changed files with 41 additions and 32 deletions.
5 changes: 3 additions & 2 deletions oneflow/core/autograd/gradient_funcs/cublas_fused_mlp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,15 @@ class CublasFusedMLP : public OpExprGradFunction<CublasFusedMLPCaptureState> {

Maybe<void> CublasFusedMLP::Init(const OpExpr& op) {
const UserOpExpr* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}

Maybe<void> CublasFusedMLP::Capture(CublasFusedMLPCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
CHECK_OR_RETURN(inputs.size() % 2 == 1) << "Both weight and bias should be passed together. ";
CHECK_OR_RETURN(inputs.size() % 2 == 1)
<< Error::RuntimeError() << "Both weight and bias should be passed together";
int32_t weight_num = (inputs.size() - 1) / 2;
ctx->weight_num = weight_num;
ctx->x_requires_grad = JUST(VectorAt(inputs, 0))->requires_grad();
Expand Down
6 changes: 3 additions & 3 deletions oneflow/core/autograd/gradient_funcs/layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class LayerNorm : public OpExprGradFunction<LayerNormCaptureState> {

Maybe<void> LayerNorm::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
op_name_ = fw_op_expr->op_name();
return Maybe<void>::Ok();
Expand All @@ -73,8 +73,8 @@ Maybe<void> LayerNorm::Capture(LayerNormCaptureState* ctx, const TensorTuple& in
ctx->begin_params_axis = JUST(composed_attrs.GetAttr<int64_t>("begin_params_axis"));
ctx->epsilon = JUST(composed_attrs.GetAttr<double>("epsilon"));

CHECK_EQ_OR_RETURN(inputs.size(), ctx->center + ctx->scale + 1);
CHECK_EQ_OR_RETURN(outputs.size(), 3);
CHECK_EQ_OR_RETURN(inputs.size(), ctx->center + ctx->scale + 1); // NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN(outputs.size(), 3); // NOLINT(maybe-need-error-msg)

bool has_gamma_diff = ctx->scale && inputs.at(1)->requires_grad();
bool has_beta_diff = ctx->center && inputs.at(2)->requires_grad();
Expand Down
4 changes: 2 additions & 2 deletions oneflow/core/autograd/gradient_funcs/max_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class MaxPoolNdGrad : public OpExprGradFunction<MaxPoolCaptureState> {

Maybe<void> MaxPoolNdGrad::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Expand All @@ -85,7 +85,7 @@ Maybe<void> MaxPoolNdGrad::Capture(MaxPoolCaptureState* ctx, const TensorTuple&
Maybe<void> MaxPoolNdGrad::Apply(const MaxPoolCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
CHECK_LE_OR_RETURN(out_grads.size(), 2);
CHECK_LE_OR_RETURN(out_grads.size(), 2); // NOLINT(maybe-need-error-msg)

int32_t ndims = ctx->kernel_size.size();
const auto& input = ctx->SavedTensors().at(ctx->input_index);
Expand Down
8 changes: 3 additions & 5 deletions oneflow/core/autograd/gradient_funcs/nll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class NLLGradFunction : public OpExprGradFunction<NLLCaptureState> {

Maybe<void> NLLGradFunction::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Expand All @@ -65,12 +65,10 @@ Maybe<void> NLLGradFunction::Apply(const NLLCaptureState* ctx, const TensorTuple
TensorTuple* in_grads) const {
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }

CHECK_EQ_OR_RETURN(out_grads.size(), 2)
<< Error::RuntimeError() << "The number of out_grads is expected to be 2, got "
<< out_grads.size();
CHECK_EQ_OR_RETURN(out_grads.size(), 2); // NOLINT(maybe-need-error-msg)
CHECK_GE_OR_RETURN(ctx->SavedTensors().size(), 2)
<< Error::RuntimeError()
<< "The number of saved tensors is expected to be greater than or equal to 2, got "
<< "The number of saved tensors is expected to be greater than or equal to 2, but got "
<< ctx->SavedTensors().size();
const auto& out_grad = out_grads[0];
const auto& input = ctx->SavedTensors()[0];
Expand Down
8 changes: 5 additions & 3 deletions oneflow/core/autograd/gradient_funcs/normalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class NormalizationGrad : public OpExprGradFunction<NormalizationGradCaptureStat
public:
Maybe<void> Init(const OpExpr& op) override {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Expand All @@ -63,7 +63,7 @@ class NormalizationGrad : public OpExprGradFunction<NormalizationGradCaptureStat
beta = inputs.at(2);
ctx->track_running_stats = false;
} else {
CHECK_EQ_OR_RETURN(inputs.size(), 5);
CHECK_EQ_OR_RETURN(inputs.size(), 5); // NOLINT(maybe-need-error-msg)
gamma = inputs.at(3);
beta = inputs.at(4);
ctx->track_running_stats = true;
Expand Down Expand Up @@ -107,7 +107,9 @@ class NormalizationGrad : public OpExprGradFunction<NormalizationGradCaptureStat
}
const auto& results = JUST(functional::NormalizationGrad(y_grad, x, mean, inv_variance, gamma,
ctx->epsilon, ctx->axis));
CHECK_EQ_OR_RETURN(results->size(), 3);
CHECK_EQ_OR_RETURN(results->size(), 3)
<< Error::RuntimeError() << "The number of results is expected to be 3, but got "
<< results->size();

if (ctx->track_running_stats) {
// The normalization op has 5 inputs which are x, moving_mean, moving_variance, gamma and
Expand Down
12 changes: 6 additions & 6 deletions oneflow/core/autograd/gradient_funcs/normalization_add_relu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class NormalizationAddReluGrad : public OpExprGradFunction<NormalizationAddReluG
public:
Maybe<void> Init(const OpExpr& op) override {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Expand Down Expand Up @@ -112,7 +112,7 @@ class NormalizationAddReluGrad : public OpExprGradFunction<NormalizationAddReluG
ctx->SaveTensorForBackward(inputs.at(1)); // moving_mean 3
ctx->SaveTensorForBackward(inputs.at(2)); // moving_variance 4
} else {
CHECK_EQ_OR_RETURN(inputs.size(), 6);
CHECK_EQ_OR_RETURN(inputs.size(), 6); // NOLINT(maybe-need-error-msg)
// with add_end
ctx->SaveTensorForBackward(inputs.at(2)); // moving_mean 3
ctx->SaveTensorForBackward(inputs.at(3)); // moving_variance 4
Expand Down Expand Up @@ -149,10 +149,10 @@ class NormalizationAddReluGrad : public OpExprGradFunction<NormalizationAddReluG
const auto& results = JUST(functional::NormalizationAddReluGrad(
x, y_grad, mean, inv_variance, gamma, beta, reserve_space, y, ctx->axis, ctx->epsilon,
ctx->has_addend));
CHECK_EQ_OR_RETURN(results->size(),
ctx->has_addend ? 4 : 3)
<< "The result size is incorrect"; // here output includes "gamma_diff" "beta_diff" "dx"
// "addend_diff"
CHECK_EQ_OR_RETURN(results->size(), (ctx->has_addend ? 4 : 3))
<< Error::RuntimeError() << "The number of results is expected to be "
<< (ctx->has_addend ? 4 : 3) << ", but got "
<< results->size(); // here output includes "gamma_diff" "beta_diff" "dx" "addend_diff"

if (ctx->track_running_stats) {
// The normalization op has 5 inputs which are x, moving_mean, moving_variance, gamma and
Expand Down
2 changes: 1 addition & 1 deletion oneflow/core/autograd/gradient_funcs/select_top_n.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class SelectTopN : public OpExprGradFunction<SelectTopNCaptureState> {

Maybe<void> Apply(const SelectTopNCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
CHECK_EQ_OR_RETURN(ctx->top_n, out_grads.size());
CHECK_EQ_OR_RETURN(ctx->top_n, out_grads.size()); // NOLINT(maybe-need-error-msg)
for (int i = 0; i < ctx->top_n; ++i) {
if (!ctx->requires_grad.at(i)) { continue; }
in_grads->at(i) = out_grads.at(i);
Expand Down
4 changes: 2 additions & 2 deletions oneflow/core/autograd/gradient_funcs/split_like.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ class SplitLike : public OpExprGradFunction<SplitLikeCaptureState> {

Maybe<void> SplitLike::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}

Maybe<void> SplitLike::Capture(SplitLikeCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
CHECK_EQ_OR_RETURN(inputs.size(), outputs.size() + 1);
CHECK_EQ_OR_RETURN(inputs.size(), outputs.size() + 1); // NOLINT(maybe-need-error-msg)
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ComposedAttrMap composed_attrs(attrs, base_attrs_);
Expand Down
8 changes: 5 additions & 3 deletions oneflow/core/autograd/gradient_funcs/stack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class Stack : public OpExprGradFunction<StackCaptureState> {

Maybe<void> Stack::Init(const OpExpr& op) {
const UserOpExpr* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Expand All @@ -61,12 +61,14 @@ Maybe<void> Stack::Capture(StackCaptureState* ctx, const TensorTuple& inputs,

Maybe<void> Stack::Apply(const StackCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
CHECK_EQ_OR_RETURN(out_grads.size(), 1);
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
in_grads->resize(ctx->input_num);
TensorTuple like(ctx->input_num);
for (int i = 0; i < ctx->input_num; ++i) { like[i] = ctx->SavedTensors().at(i); }
const auto& results = JUST(functional::StackGrad(out_grads.at(0), like, ctx->axis));
CHECK_EQ_OR_RETURN(results->size(), ctx->input_num);
CHECK_EQ_OR_RETURN(results->size(), ctx->input_num)
<< Error::RuntimeError() << "The number of results (" << results->size()
<< ") must match the number of inputs (" << ctx->input_num << ")";
for (int i = 0; i < ctx->input_num; ++i) {
if (ctx->requires_grad.at(i)) { in_grads->at(i) = results->at(i); }
}
Expand Down
16 changes: 11 additions & 5 deletions oneflow/core/autograd/gradient_funcs/variance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ class Variance : public OpExprGradFunction<VarianceState> {

Maybe<void> Variance::Init(const OpExpr& op) {
const UserOpExpr* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}

Maybe<void> Variance::Capture(VarianceState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
CHECK_EQ_OR_RETURN(inputs.size(), 1);
CHECK_EQ_OR_RETURN(outputs.size(), 1);
CHECK_EQ_OR_RETURN(inputs.size(), 1); // NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN(outputs.size(), 1); // NOLINT(maybe-need-error-msg)
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ComposedAttrMap composed_attrs(attrs, base_attrs_);
Expand All @@ -70,7 +70,9 @@ Maybe<void> Variance::Apply(const VarianceState* ctx, const TensorTuple& out_gra
const std::shared_ptr<oneflow::one::Tensor>& x = ctx->SavedTensors().at(0);
size_t correction = ctx->unbiased ? 1 : 0;
size_t elem_cnt = 1;
CHECK_OR_RETURN(ctx->axis.size() > 0);
CHECK_OR_RETURN(ctx->axis.size() > 0)
<< Error::RuntimeError() << "The size of the axis must greater than 0, but got "
<< ctx->axis.size();
for (const auto& item : ctx->axis) { elem_cnt *= x->shape()->At(item); }

std::shared_ptr<Tensor> out_grad = out_grads.at(0);
Expand All @@ -82,7 +84,11 @@ Maybe<void> Variance::Apply(const VarianceState* ctx, const TensorTuple& out_gra
unsqueeze_vector.insert(unsqueeze_vector.begin() + ctx->axis.at(i), 1);
}
Shape unsqueeze_shape(unsqueeze_vector);
CHECK_EQ_OR_RETURN(unsqueeze_shape.elem_cnt(), out_grad_shape->elem_cnt());
CHECK_EQ_OR_RETURN(unsqueeze_shape.elem_cnt(), out_grad_shape->elem_cnt())
<< Error::RuntimeError()
<< "tensor size mismatch, expected tensor to have the same number of elements, but got "
<< unsqueeze_shape.elem_cnt() << " and " << out_grad_shape->elem_cnt()
<< " elements respectively";
out_grad = JUST(functional::Reshape(out_grad, unsqueeze_shape));
}

Expand Down

0 comments on commit 4d106db

Please sign in to comment.