From a8e341b1407455003cf30b08520afcbe174d59e3 Mon Sep 17 00:00:00 2001
From: Hao Jin <haoj@andrew.cmu.edu>
Date: Wed, 1 Aug 2018 23:49:16 +0000
Subject: [PATCH] add handling for grad req type other than kNullOp for indices

---
 src/operator/tensor/indexing_op.h      |  9 +++++++--
 tests/python/unittest/test_operator.py | 27 ++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h
index edaf9397303a..1daf0a2cb18a 100644
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -1034,8 +1034,8 @@ void TakeOpBackward(const nnvm::NodeAttrs& attrs,
   using namespace mshadow::expr;
   CHECK_EQ(inputs.size(), 2U);
   CHECK_EQ(outputs.size(), 2U);
-  CHECK_EQ(req[take_::kIdx], kNullOp)
-    << "take layer doesn't support gradient into index";
+  CHECK_NE(req[take_::kIdx], kAddTo)
+    << "take layer doesn't support gradient of req type kAddTo to index";
 
   const TakeParam& param = nnvm::get<TakeParam>(attrs.parsed);
 
@@ -1052,6 +1052,11 @@ void TakeOpBackward(const nnvm::NodeAttrs& attrs,
       const TShape& arrshape = outputs[0].shape_;
       const TShape& oshape = inputs[0].shape_;
 
+      if (req[take_::kIdx] != kNullOp) {
+        mxnet_op::Kernel<mxnet_op::set_zero, xpu>::Launch(
+          s, idxshape.Size(), outputs[take_::kIdx].dptr<IType>());
+      }
+
       const int actual_axis = param.axis + ((param.axis < 0) ? arrshape.ndim() : 0);
 
       int idxndim = idxshape.ndim();
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index c395199e8ea4..aeda36d8b487 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -3810,6 +3810,31 @@ def check_output_n_grad(data_shape, idx_shape, axis, mode):
         exe.backward([mx.nd.array(grad_out)])
         assert_almost_equal(exe.grad_dict['a'].asnumpy(), grad_in)
 
+    def check_autograd_req():
+        row_len = 2
+        col_len = 8
+        shape = (row_len, col_len)
+        sc = mx.nd.random.uniform(-1.0, 1.0, shape=shape, dtype="float32")
+        sc.attach_grad()
+        i = mx.nd.array([0], dtype="int64")
+        j = mx.nd.array([0], dtype="int64")
+        with mx.autograd.record(train_mode=True):
+            xs = []
+            for _ in range(row_len):
+                x_i = []
+                for _ in range(col_len):
+                    x_ij = sc.take(i).squeeze(axis=0).take(j).squeeze(axis=0)
+                    x_i.append(x_ij)
+                    j = j + 1
+                i = i + 1
+                j = j - col_len  # reset j
+                xs.append(mx.nd.stack(*x_i))
+            x = mx.nd.stack(*xs)
+            x = x.sum()
+
+        x.backward()
+        assert_almost_equal(np.ones(sc.grad.shape), sc.grad.asnumpy())
+
     for mode in ['clip', 'wrap']:
         for data_ndim in range(1, 5):
             for idx_ndim in range(1, 4):
@@ -3822,6 +3847,8 @@ def check_output_n_grad(data_shape, idx_shape, axis, mode):
                         idx_shape += (np.random.randint(low=1, high=5), )
                     check_output_n_grad(data_shape, idx_shape, axis, mode)
 
+    check_autograd_req()
+
 
 @with_seed()
 def test_grid_generator():