apache · antinucleon · Jun 21, 2015 · Jun 19, 2015 · Jun 19, 2015 · Jun 19, 2015
@@ -40,15 +40,16 @@ endif
 
 ifneq ($(ADD_CFLAGS), NONE)
 	CFLAGS += $(ADD_CFLAGS)
+	CFLAGS += -DDMLC_USE_CXX11=1
 endif
 
 ifneq ($(ADD_LDFLAGS), NONE)
 	LDFLAGS += $(ADD_LDFLAGS)
 endif
 
-OBJ = storage.o narray_op_cpu.o
-OBJCXX11 = engine.o narray.o 
-CUOBJ = narray_op_gpu.o
+OBJ = storage.o narray_op_cpu.o operator.o operator_cpu.o
+OBJCXX11 = engine.o narray.o
+CUOBJ = narray_op_gpu.o operator_gpu.o
 
 LIB_DEP = $(DMLC_CORE)/libdmlc.a
 
@@ -64,6 +65,9 @@ engine.o: src/dag_engine/simple_engine.cc
 narray.o: src/narray/narray.cc
 narray_op_cpu.o: src/narray/narray_op_cpu.cc src/narray/narray_op-inl.h
 narray_op_gpu.o: src/narray/narray_op_gpu.cu src/narray/narray_op-inl.h
+operator.o: src/operator/operator.cc
+operator_cpu.o: src/operator/operator_cpu.cc
+operator_gpu.o: src/operator/operator_gpu.cu
 
 $(BIN) :
 	$(CXX) $(CFLAGS)  -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS)
@@ -72,7 +76,7 @@ $(OBJ) :
 	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
 
 $(OBJCXX11) :
-	$(CXX) -std=c++0x -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
+	$(CXX) -std=c++11 -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
 
 $(SLIB) :
 	$(CXX) $(CFLAGS) -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS)

@@ -7,6 +7,7 @@
 #define MXNET_NARRAY_H_
 #include <memory>
 #include <dmlc/base.h>
+#include <dmlc/logging.h>
 #include "./base.h"
 #include "./storage.h"
 #include "./tensor_blob.h"
@@ -25,7 +26,7 @@ class NArray {
   /*! \brief default cosntructor */
   NArray() {}
   /*!
-   * \brief constructing a new dynamic NArray 
+   * \brief constructing a new dynamic NArray
    * \param shape the shape of array
    * \param ctx context of NArray
    */
@@ -34,16 +35,16 @@ class NArray {
   }
   /*!
    * \brief constructing a static NArray that shares data with TBlob
-   *  Use with caution: allocate ONLY ONE NArray for each TBlob, 
+   *  Use with caution: allocate ONLY ONE NArray for each TBlob,
    *  make sure the memory region is available through out the life of NArray
    * \param data the memory content of static data
    * \param dev_id the device id this tensor sits at
-   */  
+   */
   NArray(const TBlob &data, int dev_id)
       : ptr_(new Chunk(data, dev_id)) {
   }
   /*!
-   * \return the shape of current NArray    
+   * \return the shape of current NArray
    */
   inline const TShape &shape() const {
     return ptr_->data.shape_;
@@ -57,7 +58,7 @@ class NArray {
   /*! \return whether this narray is not initialized */
   inline bool is_empty() const {
     return ptr_.get() == nullptr;
-  }  
+  }
 
  private:
   /*! \brief the real data chunk that backs NArray */
@@ -79,7 +80,7 @@ class NArray {
     Chunk() : static_data(true), delay_alloc(false) {
       var  = DAGEngine::Get()->NewVar();
     }
-    /*! \brief construct from static data */    
+    /*! \brief construct from static data */
     Chunk(const TBlob &data, int dev_id)
         : data(data),
           static_data(true),
@@ -118,14 +119,14 @@ class NArray {
   /*! \brief internal data of NArray */
   std::shared_ptr<Chunk> ptr_;
   /*!
-   * \brief constructing a new dynamic NArray 
+   * \brief constructing a new dynamic NArray
    * \param shape the shape of array
    * \param ctx context of NArray
    * \param delay_alloc whether delay the allocation
    */
   NArray(const TShape &shape, Context ctx, bool delay_alloc)
       : ptr_(new Chunk(shape, ctx, delay_alloc)) {
-  }  
+  }
   // add friend to helper functions
   template<typename OP>
   friend NArray BinaryEWise(const NArray &lhs, const NArray &rhs);

@@ -1,13 +1,13 @@
 /*!
  *  Copyright (c) 2015 by Contributors
  * \file operator.h
- * \brief operator interface of mxnet
+ * \brief static operator interface of mxnet
  */
 #ifndef MXNET_OPERATOR_H_
 #define MXNET_OPERATOR_H_
+// this file will be seen by cuda, no c++11 for now
 #include <dmlc/base.h>
 #include "./base.h"
-#include "./narray.h"
 #include "./tensor_blob.h"
 
 namespace mxnet {
@@ -38,24 +38,64 @@ class Operator {
     /*! \brief add to the provided space */
     kAddTo = 3
   };
+  /*! \brief input argument type of the operator have */
+  enum ArgType {
+    /*! \brief data argument */
+    kDataArg = 0,
+    /*! \brief weight argument */
+    kWeightArg = 1,
+    /*! \brief bias argument */
+    kBiasArg = 2
+  };
+  enum Property {
+    /*! \brief Op contains interanl state, won't influence engine schedule */
+    kContainInteralState = 1,
+    /*! \brief Op forward require random number, will influence engine schedule */
+    kForwardRequireRnd = 2,
+  };
+  /*!
+   * \brief get types of input argument of this oeprator
+   * \return a vector corresponding to type of each argument
+   *  this order is same as the order of inputs in Forward, InferShape and Backward
+   */
+  virtual std::vector<ArgType> DescribeArgs() const {
+    // default most of layers only have one data argument
+    return std::vector<ArgType>(1, kDataArg);
+  }
+  /*!
+   * \brief describe property of op
+   * \return a bit map in int
+   */
+  virtual int DescribeProperty() const {
+    // default most of layer only conatin internal state
+    return kContainInteralState;
+  }
   /*!
    * \brief set param for the operator from string
    * \param name parameter name
    * \param val string for configuration
    */
-  virtual void SetParam(const char *name, const char *val) {}  
+  virtual void SetParam(const char *name, const char *val) {}
   /*!
-   * \brief inter the shape of output given the input data
+   * \brief inter the shapes of outputs and unknown input arguments
    * \param in_shape the shape of input arguments of the operator
+   *     this should be of same length as the vector returned by DescribeArgs
+   *     in_shape allows unknown elements, which are checked by shape.ndim() == 0.
+   *     For unknown shapes, InferShape will try to fill in the correct Shape in in_shape
+   *     For known shapes, InferShape will check shape consistency
+   *
+   *     common practice: set the shape of data input, and usually weight's shape can be infered
+   *
    * \param out_shape the shape of outputs of the operator
+   *     InferShape will modify the vector to fill output TShape
    */
-  virtual void InferShape(const std::vector<TShape> &in_shape,
+  virtual void InferShape(std::vector<TShape> *in_shape,
                           std::vector<TShape> *out_shape) = 0;
   /*!
    * \brief perform a forward operation of operator, save the output to TBlob
    * \param opt option on Forward such as whether this is training phase
    * \param ctx runtime context
-   * \param in_data array of input data
+   * \param in_data array of input data, it is const
    * \param out_data array of output data,
    *        the space of TBlob in out_data must be pre-allocated with InferShape
    */
@@ -71,13 +111,21 @@ class Operator {
    * \param out_grad array of output gradient, there could be three possible TBlob
    *  in the each element in the array
    * \param req_types request types of the gradient saving operation
+   *                  only inplace will change input data
    * \sa GradReqType
    */
   virtual void Backward(RunContext ctx,
                         const std::vector<TBlob> &grad_next,
                         const std::vector<TBlob> &in_data,
                         const std::vector<TBlob> &out_grad,
-                        const std::vector<GradReqType> req);
+                        const std::vector<GradReqType> &req);
+
+  /*!
+   * \brief factory unction, create a new operator
+   * \param type the type of operator
+   * \param ctx the context device type of operator
+   */
+  static Operator *Create(const char *type, Context ctx);
 };
 }  // namespace mxnet
 #endif  // MXNET_OPERATOR_H_
diff --git a/src/dag_engine/simple_engine.cc b/src/dag_engine/simple_engine.cc
@@ -1,19 +1,18 @@
 #include <dmlc/logging.h>
 #include <mxnet/dag_engine.h>
-
 namespace mxnet {
 class SimpleEngine : public DAGEngine {
  public:
   virtual void Push(AsyncOp exec_fun,
                     Context exec_ctx,
-                    const std::vector<Variable> &use_vars, 
+                    const std::vector<Variable> &use_vars,
                     const std::vector<Variable> &mutate_vars) {
     // cannot schedule async using naive way because deps are not captured
     LOG(FATAL) << "cannot schedule async operations";
   }
   virtual void Push(Op exec_fun,
                     Context exec_ctx,
-                    const std::vector<Variable> &use_vars, 
+                    const std::vector<Variable> &use_vars,
                     const std::vector<Variable> &mutate_vars) {
     exec_fun(RunContext());
   }
@@ -25,7 +24,7 @@ class SimpleEngine : public DAGEngine {
     // that have the info about the variable
     // use ptr directly instead of ID because this avoids an indirect mapping
     return NULL;
-  }  
+  }
 };
 // implements the singleton factory
 DAGEngine* DAGEngine::Get() {

diff --git a/src/narray/narray_op-inl.h b/src/narray/narray_op-inl.h
@@ -19,7 +19,7 @@ namespace mxnet {
 namespace narray {
 // true implementation
 template<typename xpu, typename OP>
-inline void Eval_(const TBlob &lhs, const TBlob &rhs, TBlob ret, RunContext ctx) {  
+inline void Eval_(const TBlob &lhs, const TBlob &rhs, TBlob ret, RunContext ctx) {
   using namespace mshadow::expr;
   mshadow::Stream<xpu> *s = static_cast<mshadow::Stream<xpu>*>(ctx.stream);
   ret.FlatTo2D<xpu, real_t>(s)

diff --git a/src/operator/activation_op-inl.h b/src/operator/activation_op-inl.h
@@ -0,0 +1,60 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file activation_op-inl.h
+ * \brief activation operator of mxnet
+ */
+
+#ifndef MXNET_OPERATOR_ACTIVATION_OP_INL_H_
+#define MXNET_OPERATOR_ACTIVATION_OP_INL_H_
+
+#include <vector>
+#include <dmlc/logging.h>
+#include <mxnet/operator.h>
+#include "./operator_common.h"
+
+namespace mxnet {
+namespace op {
+template<typename xpu, typename ForwardOp, typename BackOp>
+class ActivationOp : public Operator {
+ public:
+  virtual void InferShape(std::vector<TShape> *in_shape,
+                          std::vector<TShape> *out_shape) {
+    CHECK(in_shape->size() == 1) << "Only 1 input is allowed";
+    CHECK((*in_shape)[0].ndim() != 0 ) << "Require data shape to be known";
+    out_shape->clear();
+    out_shape->push_back((*in_shape)[0]);
+  }
+  virtual void Forward(Option opt,
+                       RunContext ctx,
+                       const std::vector<TBlob> &in_data,
+                       const std::vector<TBlob> &out_data) {
+    CHECK(out_data.size() == 1);
+    CHECK(in_data.size() == 1);
+    mshadow::Stream<xpu> *stream = \
+      static_cast<mshadow::Stream<xpu> *>(ctx.stream);
+    mshadow::Tensor<xpu, 2> in = in_data[0].FlatTo2D<xpu, real_t>(stream);
+    mshadow::Tensor<xpu, 2> out = out_data[0].FlatTo2D<xpu, real_t>(stream);
+    out = mshadow::expr::F<ForwardOp>(in);
+  }
+  virtual void Backward(RunContext ctx,
+                        const std::vector<TBlob> &grad_next,
+                        const std::vector<TBlob> &in_data,
+                        const std::vector<TBlob> &out_grad,
+                        const std::vector<GradReqType> &req) {
+    CHECK(grad_next.size() == 1);
+    CHECK(in_data.size() == 1);
+    CHECK(out_grad.size() == 1);
+    CHECK(req.size() == 1);
+    mshadow::Stream<xpu> *stream = \
+      static_cast<mshadow::Stream<xpu> *>(ctx.stream);
+    mshadow::Tensor<xpu, 2> grad = grad_next[0].FlatTo2D<xpu, real_t>(stream);
+    mshadow::Tensor<xpu, 2> data = in_data[0].FlatTo2D<xpu, real_t>(stream);
+    mshadow::Tensor<xpu, 2> out = out_grad[0].FlatTo2D<xpu, real_t>(stream);
+    Assign(out, req[0], mshadow::expr::F<BackOp>(
+        mshadow::expr::F<ForwardOp>(data)) * grad);
+  }
+};  // class ActivationOp
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_ACTIVATION_OP_INL_H_