Fix linalg vector norm and clip grad bug (#8007)

* fix reduce_sum scalar check bug * fix linalg vector norm and clip grad bug * fix comment * auto format by CI * Fix linalg vector norm backward bug (#8015) * has multi definition bug * fix bug * fix commnet * fix bug Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: oneflow-ci-bot <ci-bot@oneflow.org>
Oneflow-Inc · Apr 18, 2022 · 888ad73 · 888ad73
1 parent dfe4ec0
commit 888ad73
Show file tree

Hide file tree

Showing 8 changed files with 79 additions and 12 deletions.
diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml
@@ -349,6 +349,14 @@
   signature: "Tensor (Tensor input) => TransposeAllDimFunction"
   bind_python: True 
 
+- name: "not_equal_zero"
+  signature: "Tensor (Tensor x) => NotEqualZero"
+  bind_python: False
+
+- name: "not_equal_zero_grad"
+  signature: "Tensor (Tensor x, Tensor dy) => NotEqualZeroGrad"
+  bind_python: False
+
 - name: "reciprocal"
   signature: "Tensor (Tensor x) => Reciprocal"
   bind_python: True

diff --git a/oneflow/core/functional/impl/math_functor.cpp b/oneflow/core/functional/impl/math_functor.cpp
@@ -1081,8 +1081,7 @@ class VectorNormFunctor {
     if (ord.IsIntegral() || ord.IsFloatingPoint()) {
       double ord_val = JUST(ord.As<double>());
       if (ord_val == 0) {
-        std::vector<int32_t> dim_column(1, 0);
-        res = JUST(ReduceSum(JUST(ScalarLogicalNotEqual(x, 0)), dim_column, keepdim));
+        res = JUST(ReduceSum(JUST(functional::NotEqualZero(x)), dim, keepdim));
       } else if (ord_val == INFINITY) {
         res = JUST(ReduceMax(JUST(Abs(x)), dim, keepdim));
       } else if (ord_val == -INFINITY) {

diff --git a/oneflow/core/functional/impl/unary_functor.cpp b/oneflow/core/functional/impl/unary_functor.cpp
@@ -67,7 +67,8 @@ namespace impl {
   OF_PP_MAKE_TUPLE_SEQ("sqrt", Sqrt)             \
   OF_PP_MAKE_TUPLE_SEQ("square", Square)         \
   OF_PP_MAKE_TUPLE_SEQ("tan", Tan)               \
-  OF_PP_MAKE_TUPLE_SEQ("tanh", Tanh)
+  OF_PP_MAKE_TUPLE_SEQ("tanh", Tanh)             \
+  OF_PP_MAKE_TUPLE_SEQ("not_equal_zero", NotEqualZero)
 
 #define LOGICAL_FLOAT_UNARY_FUNC_SEQ OF_PP_MAKE_TUPLE_SEQ("logical_not", LogicalNot)
 
@@ -151,6 +152,7 @@ ONEFLOW_FUNCTION_LIBRARY(m) {
   ADD_UNARY_FUNCTOR(Square, "Square");
   ADD_UNARY_FUNCTOR(Tan, "Tan");
   ADD_UNARY_FUNCTOR(Tanh, "Tanh");
+  ADD_UNARY_FUNCTOR(NotEqualZero, "NotEqualZero")
   m.add_functor<LogicalNotFunctor>("LogicalNot");
   m.add_functor<InplaceSinFunctor>("Sin_");
   m.add_functor<InplaceFloorFunctor>("Floor_");

diff --git a/oneflow/ir/include/OneFlow/OneFlowUserOps.td b/oneflow/ir/include/OneFlow/OneFlowUserOps.td
@@ -7714,8 +7714,8 @@ def OneFlow_TestUserOpAttrAutoTypeOp : OneFlow_BaseOp<"test_user_op_attr_auto_ty
 #endif // GET_ONEFLOW_TEST_OP_DEFINITIONS
 
 // Group: TRIGONOMETRIC
-// acos, acos_grad, acosh, acosh_grad, asin, asin_grad, asinh, asinh_grad, atan, atan2, atan2_x_grad, atan2_y_grad, atan_grad, atanh, atanh_grad, cos, cos_grad, cosh, cosh_grad, hardtanh, hardtanh_grad, sin, sin_grad, sinh, sinh_grad, tan, tan_grad, tanh, tanh_grad
-// Total: 29
+// acos, acos_grad, acosh, acosh_grad, asin, asin_grad, asinh, asinh_grad, atan, atan2, atan2_x_grad, atan2_y_grad, atan_grad, atanh, atanh_grad, cos, cos_grad, cosh, cosh_grad, hardtanh, hardtanh_grad, sin, sin_grad, sinh, sinh_grad, tan, tan_grad, tanh, tanh_grad, not_equal_zero, not_equal_zero_grad
+// Total: 31
 
 #ifdef GET_ONEFLOW_TRIGONOMETRIC_OP_DEFINITIONS
 
@@ -8122,6 +8122,33 @@ def OneFlow_TanhGradOp : OneFlow_BaseOp<"tanh_grad", [NoSideEffect, DeclareOpInt
   let has_data_type_infer_fn = 1;
 }
 
+def OneFlow_NotEqualZeroOp : OneFlow_BaseOp<"not_equal_zero", [NoSideEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
+  let input = (ins
+    OneFlow_Tensor:$x
+  );
+  let output = (outs
+    OneFlow_Tensor:$y
+  );
+  let has_logical_tensor_desc_infer_fn = 1;
+  let has_physical_tensor_desc_infer_fn = 1;
+  let has_get_sbp_fn = 1;
+  let has_data_type_infer_fn = 1;
+}
+
+def OneFlow_NotEqualZeroGradOp : OneFlow_BaseOp<"not_equal_zero_grad", [NoSideEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
+  let input = (ins
+    OneFlow_Tensor:$x,
+    OneFlow_Tensor:$dy
+  );
+  let output = (outs
+    OneFlow_Tensor:$dx
+  );
+  let has_logical_tensor_desc_infer_fn = 1;
+  let has_physical_tensor_desc_infer_fn = 1;
+  let has_get_sbp_fn = 1;
+  let has_data_type_infer_fn = 1;
+}
+
 #endif // GET_ONEFLOW_TRIGONOMETRIC_OP_DEFINITIONS
 
 // Group: UNARY

diff --git a/oneflow/user/kernels/math_unary_elementwise_func.h b/oneflow/user/kernels/math_unary_elementwise_func.h
@@ -153,6 +153,13 @@ struct AtanhFunctor<float> {
   }
 };
 
+template<>
+struct NotEqualZeroFunctor<float> {
+  static OF_DEVICE_FUNC float Forward(const float x) { return x != 0; }
+
+  static OF_DEVICE_FUNC float Backward(const float x, const float dy) { return 0.0f; }
+};
+
 template<>
 struct CeilFunctor<float> {
   static OF_DEVICE_FUNC float Forward(const float x) { return MATH_FUNC_F(ceil, x); }
@@ -422,6 +429,13 @@ struct AtanhFunctor<double> {
   }
 };
 
+template<>
+struct NotEqualZeroFunctor<double> {
+  static OF_DEVICE_FUNC double Forward(const double x) { return x != 0; }
+
+  static OF_DEVICE_FUNC double Backward(const double x, const double dy) { return 0.0f; }
+};
+
 template<>
 struct CeilFunctor<double> {
   static OF_DEVICE_FUNC double Forward(const double x) { return MATH_FUNC_D(ceil, x); }
@@ -717,6 +731,13 @@ struct CeilFunctor<half> {
   static OF_HALF_FUNC half Backward(const half x, const half dy) { return GetZeroVal<half>(); }
 };
 
+template<>
+struct NotEqualZeroFunctor<half> {
+  static OF_HALF_FUNC half Forward(const half x) { return x != static_cast<half>(0.0); }
+
+  static OF_HALF_FUNC half Backward(const half x, const half dy) { return GetZeroVal<half>(); }
+};
+
 template<>
 struct CosFunctor<half> {
   static OF_HALF_FUNC half Forward(const half x) { return hcos(x); }

diff --git a/oneflow/user/ops/math_unary_elementwise_seq.h b/oneflow/user/ops/math_unary_elementwise_seq.h
@@ -53,7 +53,8 @@ namespace oneflow {
   OF_PP_MAKE_TUPLE_SEQ("sinh", Sinh)                         \
   OF_PP_MAKE_TUPLE_SEQ("sqrt", Sqrt)                         \
   OF_PP_MAKE_TUPLE_SEQ("square", Square)                     \
-  OF_PP_MAKE_TUPLE_SEQ("tan", Tan)
+  OF_PP_MAKE_TUPLE_SEQ("tan", Tan)                           \
+  OF_PP_MAKE_TUPLE_SEQ("not_equal_zero", NotEqualZero)
 
 #define MATH_UNARY_ELEMENTWISE_FUNC_SEQ_ODS                  \
   OF_PP_MAKE_TUPLE_SEQ("abs", Abs)                           \
@@ -88,7 +89,8 @@ namespace oneflow {
   OF_PP_MAKE_TUPLE_SEQ("sinh", Sinh)                         \
   OF_PP_MAKE_TUPLE_SEQ("sqrt", Sqrt)                         \
   OF_PP_MAKE_TUPLE_SEQ("square", Square)                     \
-  OF_PP_MAKE_TUPLE_SEQ("tan", Tan)
+  OF_PP_MAKE_TUPLE_SEQ("tan", Tan)                           \
+  OF_PP_MAKE_TUPLE_SEQ("not_equal_zero", NotEqualZero)
 
 }  // namespace oneflow
 

diff --git a/python/oneflow/nn/utils/clip_grad.py b/python/oneflow/nn/utils/clip_grad.py
@@ -120,9 +120,8 @@ def clip_grad_norm_(
                 ),
                 norm_type,
             )
-        if error_if_nonfinite and (
-            np.isnan(total_norm.to_local().numpy()).all()
-            or np.isinf(total_norm.to_local().numpy()).all()
+        if error_if_nonfinite and flow.logical_or(
+            total_norm.isnan(), total_norm.isinf()
         ):
             raise RuntimeError(
                 f"The total norm of order {norm_type} for gradients from "
@@ -152,8 +151,8 @@ def clip_grad_norm_(
                 ),
                 norm_type,
             )
-        if error_if_nonfinite and (
-            np.isnan(total_norm.numpy()).all() or np.isinf(total_norm.numpy()).all()
+        if error_if_nonfinite and flow.logical_or(
+            total_norm.isnan(), total_norm.isinf()
         ):
             raise RuntimeError(
                 f"The total norm of order {norm_type} for gradients from "

diff --git a/python/oneflow/test/modules/test_norm.py b/python/oneflow/test/modules/test_norm.py
@@ -304,6 +304,15 @@ def test_tuple_dim_norm_with_random_data(test_case):
         m = torch.linalg.norm(input, ord=ord, dim=dim, keepdim=keepdim)
         return m
 
+    @autotest(n=5)
+    def test_vector_norm_only_zero_with_random_data(test_case):
+        device = random_device()
+        input = random_tensor(ndim=2).to(device)
+        dim = oneof((-2, -1), (0, 1), (-1, 0))
+        keepdim = random().to(bool)
+        m = torch.linalg.vector_norm(input, ord=0, dim=dim, keepdim=keepdim)
+        return m
+
 
 if __name__ == "__main__":
     unittest.main()