Refactor sparse tensor code (apache#99)

* Initial checkin test_sparse_ndarray passes * Fix test failure * Clean up * Clean up * Move init backend op to ndarray_utils * Fix lint * Eliminate circular dependency on headers * More refactor * Fix gpu build and consolidate Slice for dense and sparse * Clean up * More refactor * Clean up * Fix gpu build * Fix comment
reminisce · Jun 19, 2017 · 24105cf · 24105cf
1 parent b2b3af2
commit 24105cf
Show file tree

Hide file tree

Showing 42 changed files with 934 additions and 900 deletions.
diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
@@ -391,18 +391,6 @@ MXNET_DLL int MXNDArraySlice(NDArrayHandle handle,
                              mx_uint slice_end,
                              NDArrayHandle *out);
 
-/*!
- * \brief Slice the NDArray with non-default storage along axis 0.
- * \param handle the handle to the NDArray
- * \param slice_begin The beginning index of slice
- * \param slice_end The ending index of slice
- * \param out The NDArrayHandle of sliced NDArray
- * \return 0 when success, -1 when failure happens
- */
-MXNET_DLL int MXNDArraySliceEx(NDArrayHandle handle,
-                   mx_uint slice_begin,
-                   mx_uint slice_end,
-                   NDArrayHandle out);
 /*!
  * \brief Index the NDArray along axis 0.
  * \param handle the handle to the NDArray

diff --git a/include/mxnet/io.h b/include/mxnet/io.h
@@ -44,19 +44,6 @@ class IIterator : public dmlc::DataIter<DType> {
   }
 };  // class IIterator
 
-/*!
- * \brief iterator type
- * \param DType data type
- */
-template<typename DType>
-class SparseIIterator : public IIterator<DType> {
- public:
-  /*! \brief storage type of the data or label */
-  virtual const NDArrayStorageType GetStorageType(bool is_data) const = 0;
-  /*! \brief shape of the data or label */
-  virtual const TShape GetShape(bool is_data) const = 0;
-};  // class SparseIIterator
-
 /*! \brief a single data instance */
 struct DataInst {
   /*! \brief unique id for instance */

diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
@@ -28,16 +28,6 @@
 #endif
 
 namespace mxnet {
-// forward declarations
-class NDArray;
-
-namespace op {
-template<typename xpu>
-void FillZerosRspImpl(mshadow::Stream<xpu> *s, NDArray *dst);
-
-template<typename xpu>
-void CastStorageComputeImpl(mshadow::Stream<xpu> *s, const NDArray& input, const NDArray& output);
-};
 
 namespace ndarray {
 template<typename from_xpu, typename to_xpu>
@@ -202,7 +192,7 @@ class NDArray {
    * nnz that is smaller than nnz1+nnz2. Therefore, the storage shape's size
    * needs to be shrunk from nnz1+nnz2 to nnz.
    */
-  inline void SetStorageShape(const TShape& sshape) {
+  inline void set_storage_shape(const TShape& sshape) {
     CHECK(storage_type() != kDefaultStorage);
     ptr_->storage_shape = sshape;
   }
@@ -222,7 +212,7 @@ class NDArray {
    * for the final result. After the operation is done, the exact size of
    * the shape is known and need to be reset using this function.
    */
-  inline void SetAuxShape(size_t i, const TShape& shape) const {
+  inline void set_aux_shape(size_t i, const TShape& shape) const {
     ptr_->aux_shapes[i] = shape;
   }
 
@@ -269,19 +259,6 @@ class NDArray {
     CHECK(!is_none());
     return ptr_->aux_types[i];
   }
-  /*!
-   * \return the number of aux data used for given storage type
-   */
-  static size_t NumAuxData(NDArrayStorageType stype) {
-    size_t num = 0;
-    switch (stype) {
-      case kDefaultStorage: num = 0; break;
-      case kCSRStorage: num = 2; break;
-      case kRowSparseStorage: num = 1; break;
-       default: LOG(FATAL) << "Unknown storage type" << stype; break;
-    }
-    return num;
-  }
 
   inline NDArrayStorageType storage_type() const {
     if (is_none()) return kUndefinedStorage;
@@ -452,13 +429,6 @@ class NDArray {
    */
   NDArray Slice(index_t begin, index_t end) const;
 
-  /*!
-   * \brief Slice a NDArray with non-default storage
-   * \param begin begin index in first dim (inclusive)
-   * \param end end index in first dim (exclusive)
-   * \return sliced NDArray
-   */
-  void SliceEx(index_t begin, index_t end, NDArray *dst) const;
   /*!
    * \brief Index a NDArray
    * \param idx the index
@@ -603,13 +573,7 @@ class NDArray {
 
     /*! \brief default cosntructor */
     Chunk() : static_data(true), delay_alloc(false) {}
-/*
-      if (data.dev_mask() == cpu::kDevMask) {
-        shandle.ctx = Context::CPU();
-      } else {
-        CHECK_EQ(data.dev_mask(), gpu::kDevMask);
-        shandle.ctx = Context::GPU(dev_id);
-*/
+
     /*! \brief construct a new chunk */
     Chunk(TShape shape, Context ctx_, bool delay_alloc_, int dtype)
         : static_data(false), delay_alloc(true), ctx(ctx_) {
@@ -744,7 +708,7 @@ class NDArray {
         }
       }, shandle.ctx, var);
     }
-  };
+  };  // struct Chunk
 
   void SetTBlob() const {
     CHECK(ptr_ != nullptr);
@@ -767,7 +731,6 @@ class NDArray {
 #endif
   }
 
-
 #if MKL_EXPERIMENTAL == 1
   std::shared_ptr<MKLMemHolder> Mkl_mem_;
 #endif
@@ -789,7 +752,12 @@ class NDArray {
    *     this situation.
    */
   mutable TBlob tblob_;
-};
+};  // class NDArray
+
+/*!
+ * \return the number of aux data used for given storage type
+ */
+size_t num_aux_data(NDArrayStorageType stype);
 
 /*!
  * \brief issue an copy operation from one NDArray to another
@@ -805,107 +773,6 @@ class NDArray {
  */
 void CopyFromTo(const NDArray &from, NDArray *to, int priority = 0);
 
-// Make a copy of a CSR NDArray
-template<typename from_xpu, typename to_xpu>
-inline void CopyFromToCsrImpl(const NDArray from, NDArray *to, RunContext ctx) {
-  using namespace mshadow;
-  CHECK_EQ(from.storage_type(), to->storage_type()) << "Copying with different storage type";
-  // if source storage is not initialized, fill destination with zeros
-  auto s = ctx.get_stream<to_xpu>();
-  if (!from.storage_initialized()) {
-    // TODO(haibin) implement FillZerosCsrImpl
-    // op::FillZerosCsrImpl<to_xpu>(s, to);
-    return;
-  }
-  // Allocate storage
-  to->CheckAndAllocAuxData(csr::kIndPtr, from.aux_shape(csr::kIndPtr));
-  to->CheckAndAllocAuxData(csr::kIdx, from.aux_shape(csr::kIdx));
-  to->CheckAndAllocData(from.aux_shape(csr::kIdx));
-  // FIXME This is a naive implementation for CSR copy. It, however, is
-  // not efficient when the source CSR is sliced. In that case, we're copying
-  // a superset of values and indices of the slice.
-  // Ideally, we should truncate the values and indices array, and adjust indptr
-  // accordingly.
-  TBlob val = to->data();
-  TBlob indptr = to->aux_data(csr::kIndPtr);
-  TBlob idx = to->aux_data(csr::kIdx);
-  ndarray::Copy<from_xpu, to_xpu>(from.data(), &val,
-                                  from.ctx(), to->ctx(), ctx);
-  ndarray::Copy<from_xpu, to_xpu>(from.aux_data(csr::kIndPtr), &indptr,
-                                  from.ctx(), to->ctx(), ctx);
-  ndarray::Copy<from_xpu, to_xpu>(from.aux_data(csr::kIdx), &idx,
-                                  from.ctx(), to->ctx(), ctx);
-}
-
-// Make a copy of a row-sparse NDArray
-template<typename from_xpu, typename to_xpu>
-inline void CopyFromToRspImpl(const NDArray from, NDArray *to, RunContext ctx) {
-  using namespace mshadow;
-  CHECK_EQ(from.storage_type(), to->storage_type()) << "Copying with different storage type";
-  // if source is zeros, fill destination with zeros, too
-  auto s = ctx.get_stream<to_xpu>();
-  if (!from.storage_initialized()) {
-    op::FillZerosRspImpl<to_xpu>(s, to);
-    return;
-  }
-  auto aux_shape = from.aux_shape(rowsparse::kIdx);
-  to->CheckAndAlloc({aux_shape});
-  TBlob val = to->data();
-  TBlob idx = to->aux_data(rowsparse::kIdx);
-  ndarray::Copy<from_xpu, to_xpu>(from.data(), &val,
-                                  from.ctx(), to->ctx(), ctx);
-  ndarray::Copy<from_xpu, to_xpu>(from.aux_data(rowsparse::kIdx), &idx,
-                                  from.ctx(), to->ctx(), ctx);
-}
-
-// Make a copy of a dense NDArray
-template<typename from_xpu, typename to_xpu>
-inline void CopyFromToDnsImpl(const NDArray from, NDArray *to, RunContext ctx) {
-  using namespace mshadow;
-  CHECK_EQ(from.storage_type(), to->storage_type()) << "Copying with different storage type";
-  TBlob tmp = to->data();
-  ndarray::Copy<from_xpu, to_xpu>(from.data(), &tmp,
-                                  from.ctx(), to->ctx(), ctx);
-}
-
-// Make a copy of an NDArray based on storage type
-template<typename from_xpu, typename to_xpu>
-void CopyFromToImpl(const NDArray from, NDArray *to, RunContext ctx) {
-  using namespace std;
-  using namespace mshadow;
-  // if storage type doesn't match, cast the storage first
-  auto from_stype = from.storage_type();
-  auto to_stype = to->storage_type();
-  NDArray casted_nd;
-  if (from_stype != to_stype) {
-    TShape shape = from.shape();
-    auto from_ctx = from.ctx();
-    auto s = ctx.get_stream<from_xpu>();
-    // TODO(haibin) inplace conversion
-    if (to_stype == kDefaultStorage) {
-      casted_nd = NDArray(shape, from_ctx);
-    } else {
-      casted_nd = NDArray(to_stype, shape, from_ctx);
-    }
-    op::CastStorageComputeImpl<from_xpu>(s, from, casted_nd);
-  } else {
-    casted_nd = from;
-  }
-  if (to_stype == kDefaultStorage) {
-    CopyFromToDnsImpl<from_xpu, to_xpu>(casted_nd, to, ctx);
-  } else if (to_stype == kRowSparseStorage) {
-    CopyFromToRspImpl<from_xpu, to_xpu>(casted_nd, to, ctx);
-  } else if (to_stype == kCSRStorage) {
-    CopyFromToCsrImpl<from_xpu, to_xpu>(casted_nd, to, ctx);
-  } else {
-    LOG(FATAL) << "unknown storage type" << to_stype;
-  }
-  if (is_same<from_xpu, mshadow::gpu>::value || is_same<to_xpu, mshadow::gpu>::value) {
-    // Wait GPU kernel to complete
-    ctx.get_stream<gpu>()->Wait();
-  }
-}
-
 /*!
  * \brief Perform elementwise sum over each data from source, store result into out.
  * \param source the ndarray we want to sum

diff --git a/include/mxnet/op_attr_types.h b/include/mxnet/op_attr_types.h
@@ -17,9 +17,6 @@
 #include "./operator.h"
 #include "./ndarray.h"
 
-#define FCOMP_EX_CPU "FComputeEx<cpu>"
-#define FCOMP_EX_GPU "FComputeEx<gpu>"
-
 namespace mxnet {
 
 using nnvm::NodeAttrs;

diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
@@ -9,6 +9,7 @@
 from . import contrib
 from . import ndarray
 from . import sparse_ndarray
+from . import ndarray_utils
 from . import name
 # use mx.sym as short for symbol
 from . import symbol as sym
@@ -20,6 +21,7 @@
 # use mx.nd as short for mx.ndarray
 from . import ndarray as nd
 from . import sparse_ndarray as sparse_nd
+from . import ndarray_utils as nd_utils
 # use mx.rnd as short for mx.random
 from . import random as rnd
 from . import random

diff --git a/python/mxnet/contrib/autograd.py b/python/mxnet/contrib/autograd.py
@@ -8,7 +8,6 @@
 from ..base import _LIB, check_call, string_types
 from ..base import mx_uint, NDArrayHandle, c_array
 # pylint: disable= unused-import
-from ..sparse_ndarray import SparseNDArray
 from ..ndarray import NDArray, zeros_like
 from ..symbol import _GRAD_REQ_MAP
 

diff --git a/python/mxnet/module/module.py b/python/mxnet/module/module.py
@@ -7,9 +7,9 @@
 import logging
 import warnings
 
+import mxnet as mx
 from .. import context as ctx
 from .. import ndarray as nd
-from .. import sparse_ndarray as sparse_nd
 from .. import optimizer as opt
 
 from .executor_group import DataParallelExecutorGroup
@@ -399,7 +399,7 @@ def bind(self, data_shapes, label_shapes=None, for_training=True,
         else:
             assert self._arg_params is None and self._aux_params is None
             param_arrays = [
-                sparse_nd.zeros(x[0].storage_type, x[0].shape, dtype=x[0].dtype)
+                mx.nd.zeros(shape=x[0].shape, dtype=x[0].dtype, storage_type=x[0].storage_type)
                 for x in self._exec_group.param_arrays
             ]
             self._arg_params = {name:arr for name, arr in zip(self._param_names, param_arrays)}
@@ -413,7 +413,6 @@ def bind(self, data_shapes, label_shapes=None, for_training=True,
         if shared_module is not None and shared_module.optimizer_initialized:
             self.borrow_optimizer(shared_module)
 
-
     def reshape(self, data_shapes, label_shapes=None):
         """Reshapes the module for new input shapes.