Merge pull request #693 from sony/feature/20200717-disable-grad-inplace

Disable gradient inplace and its check
sony · Jul 29, 2020 · 2361fb5 · 2361fb5
2 parents e0dad67 + 5c80fa4
commit 2361fb5
Show file tree

Hide file tree

Showing 24 changed files with 54 additions and 161 deletions.
diff --git a/include/nbla/computation_graph/function.hpp b/include/nbla/computation_graph/function.hpp
@@ -145,7 +145,6 @@ class CgFunction {
 
   void check_data_inplace(int i, CgVariablePtr input,
                           const vector<CgVariablePtr> &outputs);
-  void check_grad_inplace(int i, CgVariablePtr input);
   void verify_during_forward();
 };
 

diff --git a/include/nbla/function.hpp b/include/nbla/function.hpp
@@ -218,32 +218,6 @@ class NBLA_API Function {
         "This must be implemented for in-place support of this function.");
   }
 
-  /** Get in-place-level of i-th input variable's grad (see below).
-
-      * 0 (NOT_INPLACE): Not in-placed
-      * 1 (INPLACE_NOT_MODIFY): In-placed but not modified.
-      * 2 (INPLACE): In-placed and modified.
-
-      @param[in] i Input variable index.
-      @retval Returns 0 by default.
-      @note If a subclass uses in-place computation, the function must override
-     this function.
-   */
-  virtual int inplace_grad(int i) const { return NOT_INPLACE; }
-
-  /** Get the output variable index where i-th variables' grad in-placed to.
-
-      @param[in] i Input variable index.
-      @note This is only valid if the i-th variable is in-placed.
-            The maintainer of a sub-class function must override
-            this function.
-   */
-  virtual int inplace_grad_with(int i) const {
-    NBLA_ERROR(
-        error_code::not_implemented,
-        "This must be implemented for in-place support of this function.");
-  }
-
   /** A flag for preventing that the graph engine clears buffers of
       input variables even if clear_buffer is true and condition mets.
    */

diff --git a/include/nbla/function/add2.hpp b/include/nbla/function/add2.hpp
@@ -71,12 +71,6 @@ template <typename T> class Add2 : public BaseFunction<bool> {
     // 0 is okay because never be called in the case of i != 0.
     return 0;
   }
-  virtual int inplace_grad(int i) const {
-    if (this->fall_back_func_ || !inplace_ || i > 0)
-      return Function::NOT_INPLACE;
-    return Function::INPLACE_NOT_MODIFY;
-  }
-  virtual int inplace_grad_with(int i) const { return 0; }
 
 protected:
   NBLA_API virtual void setup_impl(const Variables &inputs,

diff --git a/include/nbla/function/function_impl.hpp.tmpl b/include/nbla/function/function_impl.hpp.tmpl
@@ -89,10 +89,6 @@ public:
   // }
   // virtual int inplace_data_with(int i) const {
   // }
-  // virtual int inplace_grad(int i) const {
-  // }
-  // virtual int inplace_grad_with(int i) const {
-  // }
   // TODO: If you want to avoid clearing input buffers in any case, define this function returning true.
   // virtual bool prohibit_clear_input_buffers() const {
   //   return true;

diff --git a/include/nbla/function/leaky_relu.hpp b/include/nbla/function/leaky_relu.hpp
@@ -70,15 +70,10 @@ template <typename T> class LeakyReLU : public BaseFunction<float, bool> {
   virtual vector<string> allowed_array_classes() {
     return SingletonManager::get<Cpu>()->array_classes();
   }
-  virtual bool grad_depends_output_data(int i, int o) const { return inplace_; }
   virtual int inplace_data(int i) const {
     return inplace_ ? Function::INPLACE : Function::NOT_INPLACE;
   }
   virtual int inplace_data_with(int i) const { return 0; }
-  virtual int inplace_grad(int i) const {
-    return inplace_ ? Function::INPLACE : Function::NOT_INPLACE;
-  }
-  virtual int inplace_grad_with(int i) const { return 0; }
 
 protected:
   NBLA_API virtual void setup_impl(const Variables &inputs,

diff --git a/include/nbla/function/random_erase.hpp b/include/nbla/function/random_erase.hpp
@@ -109,10 +109,6 @@ class RandomErase
     return inplace_ ? Function::INPLACE : Function::NOT_INPLACE;
   }
   virtual int inplace_data_with(int i) const { return 0; }
-  virtual int inplace_grad(int i) const {
-    return inplace_ ? Function::INPLACE : Function::NOT_INPLACE;
-  }
-  virtual int inplace_grad_with(int i) const { return 0; }
 
 protected:
   NBLA_API virtual void setup_impl(const Variables &inputs,

diff --git a/include/nbla/function/relu.hpp b/include/nbla/function/relu.hpp
@@ -69,10 +69,6 @@ template <typename T> class ReLU : public BaseFunction<bool> {
     return inplace_ ? Function::INPLACE : Function::NOT_INPLACE;
   }
   virtual int inplace_data_with(int i) const { return 0; }
-  virtual int inplace_grad(int i) const {
-    return inplace_ ? Function::INPLACE : Function::NOT_INPLACE;
-  }
-  virtual int inplace_grad_with(int i) const { return 0; }
 
 protected:
   NBLA_API virtual void setup_impl(const Variables &inputs,

diff --git a/include/nbla/function/reshape.hpp b/include/nbla/function/reshape.hpp
@@ -72,10 +72,6 @@ class Reshape : public BaseFunction<const vector<int> &, bool> {
     return inplace_ ? Function::INPLACE_NOT_MODIFY : Function::NOT_INPLACE;
   }
   virtual int inplace_data_with(int i) const { return 0; }
-  virtual int inplace_grad(int i) const {
-    return inplace_ ? Function::INPLACE_NOT_MODIFY : Function::NOT_INPLACE;
-  }
-  virtual int inplace_grad_with(int i) const { return 0; }
 
 protected:
   NBLA_API virtual void setup_impl(const Variables &inputs,

diff --git a/python/src/nnabla/function.pxd.tmpl b/python/src/nnabla/function.pxd.tmpl
@@ -50,8 +50,6 @@ cdef extern from "nbla/function.hpp" namespace "nbla":
         cpp_bool grad_depends_output_data(int i, int o) except+
         int inplace_data(int i) except+
         int inplace_data_with(int i) except+
-        int inplace_grad(int i) except+
-        int inplace_grad_with(int i) except+
 
     ctypedef shared_ptr[CFunction] FunctionPtr
 

diff --git a/python/src/nnabla/function.pyx.tmpl b/python/src/nnabla/function.pyx.tmpl
@@ -239,12 +239,6 @@ cdef class Function:
     def inplace_data_with(self, int i):
         return self.funp.function().get().inplace_data_with(i)
 
-    def inplace_grad(self, int i):
-        return self.funp.function().get().inplace_grad(i)
-
-    def inplace_grad_with(self, int i):
-        return self.funp.function().get().inplace_grad_with(i)
-
     @property
     def need_grad(self):
         return self.funp.need_grad()

diff --git a/python/src/nnabla/utils/cli/profile.py b/python/src/nnabla/utils/cli/profile.py
@@ -150,8 +150,7 @@ def prepare_backward():
 
             def backward():
                 o.network.backward_function(seq)
-            in_place_str = ' : in_place' if seq.func.function_instance.inplace_grad(
-                0) > 0 else ''
+            in_place_str = ''
             profile(config, 'backward_function (%s : %s%s)' % (
                 seq.func.name, seq.func.function_instance.name, in_place_str), backward, result_dict, synchronize)
 

diff --git a/python/src/nnabla/utils/network.py b/python/src/nnabla/utils/network.py
@@ -151,7 +151,7 @@ class BackwardSequenceItem:
                     seq.func = func
                     for i, v in enumerate(func.variable_inputs):
                         accum = (
-                            v in backward_sequence.grad_variables or v in backward_sequence.parameters) and not func.function_instance.inplace_grad(i)
+                            v in backward_sequence.grad_variables or v in backward_sequence.parameters)
                         seq.accum_grad.append(accum)
                         if not v in backward_sequence.grad_variables:
                             backward_sequence.grad_variables.append(v)

diff --git a/python/test/function/test_interpolate.py b/python/test/function/test_interpolate.py
@@ -498,4 +498,4 @@ def test_interpolate_nearest_double_backward(seed, inshape, outsize, scale, sdim
                  half_pixel, half_pixel_for_nn, channel_last]
     backward_function_tester(rng, F.interpolate, ref_interpolate, inputs,
                              func_name=func_name, func_args=func_args,
-                             atol_f=1e-6, atol_b=1e-2, atol_accum=1e-2, dstep=2e-3, ctx=ctx)
+                             atol_f=1e-6, atol_b=5e-2, atol_accum=5e-2, dstep=1e-3, ctx=ctx)
diff --git a/python/test/nbla_test_utils.py b/python/test/nbla_test_utils.py
@@ -628,10 +628,6 @@ def reset_ograds():
             continue
         f = o[0].parent
 
-        # If input's grad is inplaced, the test doesn't work correctly.
-        if f.inplace_grad(i):
-            continue
-
         # Prepare function inputs
         finputs = list(filter(lambda x: x is not None, vinputs))
 

diff --git a/python/test/test_variable.py b/python/test/test_variable.py
@@ -113,8 +113,6 @@ def test_reshape():
     assert np.all(v2_s.g == 1)
     v2.d = 1
     assert np.all(v2_s.d == 1)
-    v2.g = 1.5
-    assert np.all(v2_s.g == 1.5)
 
     # Check unlink
     v2_su = v2.reshape((3, 4, 2), unlink=True)

diff --git a/src/nbla/computation_graph/function.cpp b/src/nbla/computation_graph/function.cpp
@@ -94,27 +94,6 @@ void CgFunction::check_data_inplace(int i, CgVariablePtr input,
   }
 }
 
-void CgFunction::check_grad_inplace(int i, CgVariablePtr input) {
-  if (!input->need_grad_state()) {
-    return;
-  }
-  auto f = this->function();
-  int inplace_level = f->inplace_grad(i);
-  if (inplace_level == Function::INPLACE) {
-    NBLA_CHECK(input->parent(), error_code::value,
-               "A grad array of a root variable in a graph cannot be "
-               "in-placed (%d-th input of '%s').",
-               i, f->name().c_str());
-  }
-  if (inplace_level >= Function::INPLACE_NOT_MODIFY) {
-    NBLA_CHECK(input->function_reference_count() < 2, error_code::value,
-               "In-placing grad at a variable which branches"
-               " is prohibited. %d-th input "
-               "grad of `%s` (depth=%d) is inplaced.",
-               i, f->name().c_str(), this->rank());
-  }
-}
-
 void CgFunction::verify_during_forward() {
   for (auto o : this->outputs()) {
     o->set_allow_modify_data(true);
@@ -123,7 +102,6 @@ void CgFunction::verify_during_forward() {
   auto outputs = this->outputs();
   for (int i = 0; i < inputs.size(); ++i) {
     this->check_data_inplace(i, inputs[i], outputs);
-    this->check_grad_inplace(i, inputs[i]);
   }
 }
 

diff --git a/src/nbla/computation_graph/variable.cpp b/src/nbla/computation_graph/variable.cpp
@@ -289,9 +289,6 @@ class BackwardCallback {
         if (f->inplace_data(i)) {
           clear[f->inplace_data_with(i)].first = false;
         }
-        if (f->inplace_grad(i)) {
-          clear[f->inplace_grad_with(i)].second = false;
-        }
       }
       for (int o = 0; o < outputs.size(); ++o) {
         if (prohibit_clear[o] || outputs[o]->persistent()) {
@@ -364,13 +361,6 @@ class BackwardCallback {
           it->second = true;
         }
       }
-      if (func->function()->inplace_grad(i)) {
-        auto inplaced = outputs[func->function()->inplace_grad_with(i)];
-        auto it2 = vseen_.find(inplaced);
-        if (it2 == vseen_.end() || it2->second) {
-          it->second = true;
-        }
-      }
     }
     return ret;
   }

diff --git a/src/nbla/function.cpp b/src/nbla/function.cpp
@@ -148,8 +148,7 @@ void Function::backward(const Variables &inputs, const Variables &outputs,
   // array instance.
   if (!this->prohibit_zero_input_grad()) {
     for (int i = 0; i < inputs.size(); i++) {
-      if (propagate_down[i] && !accum[i] &&
-          (this->inplace_grad(i) == Function::NOT_INPLACE)) {
+      if (propagate_down[i] && !accum[i]) {
         inputs[i]->grad()->zero();
       }
     }

diff --git a/src/nbla/function/generic/add2.cpp b/src/nbla/function/generic/add2.cpp
@@ -29,7 +29,6 @@ void Add2<T>::setup_impl(const Variables &inputs, const Variables &outputs) {
     outputs[0]->reshape(inputs[0]->shape(), true);
     if (inplace_) {
       outputs[0]->data()->set_array(inputs[0]->data()->array());
-      outputs[0]->grad()->set_array(inputs[0]->grad()->array());
     }
     return;
   }
@@ -69,16 +68,12 @@ void Add2<T>::backward_impl(const Variables &inputs, const Variables &outputs,
 
   for (int i = 0; i < 2; ++i) {
     if (propagate_down[i]) {
-      T *dx = inputs[i]->cast_grad_and_get_pointer<T>(
-          this->ctx_, !((i == 0 && inplace_) || accum[i]));
-      // dx == dy at i = 1 never happens.
-      if (dx != dy) {
-        // Not in-place
-        if (accum[i])
-          add2_backward_cpu<T, true>(size, dx, dy);
-        else
-          add2_backward_cpu<T, false>(size, dx, dy);
-      }
+      T *dx = inputs[i]->cast_grad_and_get_pointer<T>(this->ctx_,
+                                                      !(i == 0 || accum[i]));
+      if (accum[i])
+        add2_backward_cpu<T, true>(size, dx, dy);
+      else
+        add2_backward_cpu<T, false>(size, dx, dy);
     }
   }
 }

diff --git a/src/nbla/function/generic/fused_batch_normalization.cpp b/src/nbla/function/generic/fused_batch_normalization.cpp
@@ -26,6 +26,27 @@ namespace nbla {
 NBLA_REGISTER_FUNCTION_SOURCE(FusedBatchNormalization, const vector<int> &,
                               float, float, bool, const string &);
 
+namespace fused_batch_normalization {
+// These functions are special cases for the fused batch normalization
+template <typename T>
+void relu_backward(int size, T *dx, const T *dy, const T *y) {
+  for (int i = 0; i < size; i++) {
+    if (y[i] > 0)
+      dx[i] = dy[i];
+    else
+      dx[i] = T(0);
+  }
+}
+
+template <typename T>
+void add2_backward(int size, T *dx1, const T *dx, bool accum) {
+  bool accum_bn = false; // Whatever since it's inplaced.
+  for (int i = 0; i < size; i++) {
+    dx1[i] = accum ? dx1[i] + dx[i] : dx[i];
+  }
+}
+}
+
 template <class T>
 void FusedBatchNormalization<T>::setup_impl(const Variables &inputs,
                                             const Variables &outputs) {
@@ -75,23 +96,22 @@ void FusedBatchNormalization<T>::backward_impl(
   bool prop_down_bn =
       std::accumulate(propagate_down.begin(), propagate_down.begin() + 3, false,
                       std::logical_or<bool>());
-
-  bool accum_relu = false; // Whatever because inout are inplaced.
-  auto relu = create_ReLU(this->ctx_, true);
-  relu->setup(Variables{outputs[0]}, Variables{outputs[0]}); // Inplace
-  relu->backward(Variables{outputs[0]}, Variables{outputs[0]},
-                 {prop_down_add2 || prop_down_bn}, {accum_relu});
+  auto y = outputs[0]->get_data_pointer<T>(this->ctx_);
+  auto dx = outputs[0]->cast_grad_and_get_pointer<T>(this->ctx_);
+  auto dy = outputs[0]->get_grad_pointer<T>(this->ctx_);
+  auto size = outputs[0]->size();
+  if (prop_down_add2 || prop_down_bn) {
+    fused_batch_normalization::relu_backward(size, dx, dy, y);
+  }
 
   // 2. Perform Add2 backward
-  // NOTE: Output buffer are re-used by inplacing.
+  // NOTE: Output buffer for the first operand of the addition are re-used by
+  // inplacing,
+  // nothing done for it.
   if (prop_down_add2) {
-    auto add2 = create_Add2(this->ctx_, true);
-    bool accum_bn = false; // Whatever since it's inplaced.
-    add2->setup(Variables{outputs[0], inputs[5]}, Variables{outputs[0]});
-    add2->backward(Variables{outputs[0], inputs[5]}, Variables{outputs[0]},
-                   {prop_down_bn, prop_down_add2}, {accum_bn, accum[5]});
+    auto dx1 = inputs[5]->cast_grad_and_get_pointer<T>(this->ctx_);
+    fused_batch_normalization::add2_backward(size, dx1, dx, accum[5]);
   }
-
   // 3. Perform BN backward
   Variables inputs_bn(inputs.begin(), inputs.begin() + 5);
   vector<bool> prop_down_bn_inputs(propagate_down.begin(),

diff --git a/src/nbla/function/generic/leaky_relu.cpp b/src/nbla/function/generic/leaky_relu.cpp
@@ -34,7 +34,6 @@ void LeakyReLU<T>::setup_impl(const Variables &inputs,
         alpha_ > 0, error_code::value,
         "Alpha must be greater than zero with inplace option being true.");
     outputs[0]->data()->set_array(inputs[0]->data()->array());
-    outputs[0]->grad()->set_array(inputs[0]->grad()->array());
   }
 }
 
@@ -78,18 +77,11 @@ void LeakyReLU<T>::backward_impl(const Variables &inputs,
     return;
   }
   const T *x = inputs[0]->get_data_pointer<T>(this->ctx_);
-  T *dx = inputs[0]->cast_grad_and_get_pointer<T>(this->ctx_,
-                                                  !(inplace_ || accum[0]));
+  T *dx = inputs[0]->cast_grad_and_get_pointer<T>(this->ctx_, !accum[0]);
   const T *dy = outputs[0]->get_grad_pointer<T>(this->ctx_);
-  if (dx != dy) {
-    // not in-place
-    if (accum[0])
-      leaky_relu_backward_cpu<T, true>(inputs[0]->size(), alpha_, dx, dy, x);
-    else
-      leaky_relu_backward_cpu<T, false>(inputs[0]->size(), alpha_, dx, dy, x);
-  } else {
-    // in-place
+  if (accum[0])
+    leaky_relu_backward_cpu<T, true>(inputs[0]->size(), alpha_, dx, dy, x);
+  else
     leaky_relu_backward_cpu<T, false>(inputs[0]->size(), alpha_, dx, dy, x);
-  }
 }
 }
diff --git a/src/nbla/function/generic/random_erase.cpp b/src/nbla/function/generic/random_erase.cpp
@@ -212,7 +212,6 @@ void RandomErase<T>::setup_impl(const Variables &inputs,
   outputs[0]->reshape(inputs[0]->shape(), true);
   if (inplace_) {
     outputs[0]->data()->set_array(inputs[0]->data()->array());
-    outputs[0]->grad()->set_array(inputs[0]->grad()->array());
   }
 
   rgen_ = std::mt19937((seed_ == -1 ? std::random_device()() : seed_));
@@ -289,8 +288,7 @@ void RandomErase<T>::backward_impl(const Variables &inputs,
   auto H = shape[base_axis_ + 1];
   auto W = shape[base_axis_ + 2];
 
-  T *g_x = inputs[0]->cast_grad_and_get_pointer<T>(this->ctx_,
-                                                   !(inplace_ || accum[0]));
+  T *g_x = inputs[0]->cast_grad_and_get_pointer<T>(this->ctx_, !accum[0]);
   const T *g_y = outputs[0]->get_grad_pointer<T>(this->ctx_);
 
   // STE