apache · zhanghang1989 · Mar 28, 2018 · Mar 30, 2018 · piiswrong · Mar 28, 2018
diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h
@@ -164,7 +164,12 @@ inline void SetShapeType(const Context& ctx,
     NDArrayStorageType storage_type = static_cast<NDArrayStorageType>(out_storage_types[i]);
     if (outputs[i]->is_none()) {
       if (storage_type == kDefaultStorage) {
-        *outputs[i] = NDArray(out_shapes[i], ctx, true, out_types[i]);
+        if (outputs.size() == inputs.size()) {
+          // TODO FIXME (Hang Zhang), find a properate way to handle multi-device ctx
+          *outputs[i] = NDArray(out_shapes[i], inputs[i]->ctx(), true, out_types[i]);
+        } else {
+          *outputs[i] = NDArray(out_shapes[i], ctx, true, out_types[i]);
+        }
       } else {
         *outputs[i] = NDArray(storage_type, out_shapes[i], ctx, true, out_types[i]);
       }

diff --git a/src/operator/allreduce-inl.h b/src/operator/allreduce-inl.h
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ /*!
+ * Copyright (c) 2018 by Contributors
+ * \file allreduce-inl.h
+ * \brief all reduce operator
+ * \author Hang Zhang
+ */
+#ifndef MXNET_OPERATOR_ALL_REDUCE_INL_H_
+#define MXNET_OPERATOR_ALL_REDUCE_INL_H_
+
+#include <dmlc/logging.h>
+#include <dmlc/parameter.h>
+#include <mxnet/operator.h>
+#include <mxnet/ndarray.h>
+#include <map>
+#include <vector>
+#include <string>
+#include <utility>
+#include "../ndarray/ndarray_function.h"
+#include "./operator_common.h"
+#include "./mxnet_op.h"
+#include "./mshadow_op.h"
+#include "../kvstore/comm.h"
+
+namespace mxnet {
+namespace op {
+
+struct AllReduceOpParam : public dmlc::Parameter<AllReduceOpParam> {
+    int num_args;
+    DMLC_DECLARE_PARAMETER(AllReduceOpParam) {
+    DMLC_DECLARE_FIELD(num_args).set_lower_bound(1)
+    .describe("Number of inputs to be allreduced.");
+  }
+}; // struct AllReduceOpParam
+
+template<typename xpu>
+inline void AllReduceOpForwardEx(const nnvm::NodeAttrs& attrs,    
+                                 const OpContext &ctx,
+                                 const std::vector<NDArray> &inputs,
+                                 const std::vector<OpReqType> &req,
+                                 const std::vector<NDArray> &outputs) {
+  using namespace mshadow;
+  using namespace mshadow::expr;
+  CHECK_EQ(inputs.size(), outputs.size());
+  CHECK_EQ(inputs.size(), req.size());
+  //int priority = 0;
+  // create buf
+  std::vector<NDArray> reduce(inputs.size());
+  NDArray out(outputs[0].shape(), outputs[0].ctx(), false, outputs[0].dtype());
+  // copy to buf
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    //inputs[i].WaitToRead();
+    reduce[i] = NDArray(
+      outputs[0].shape(), outputs[0].ctx(), false, outputs[0].dtype());
+    //CopyFromTo(inputs[i], &(reduce[i]), priority);
+    TBlob tmp = reduce[i].data();
+    ndarray::Copy<xpu, xpu>(inputs[i].data(), &tmp,
+                            inputs[i].ctx(), reduce[i].ctx(), ctx.run_ctx);
+  }
+  // all reduce
+  std::vector<TBlob> source_tblob(reduce.size());
+  for (size_t i = 0; i < reduce.size(); ++i) {
+    source_tblob[i] = reduce[i].data();
+  }
+  TBlob tmp = out.data();
+  ndarray::ElementwiseSum<xpu>(source_tblob, &tmp, ctx.run_ctx);
+  // copy to each
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    TBlob tmp = outputs[i].data();
+    ndarray::Copy<xpu, xpu>(out.data(), &tmp,
+                            out.ctx(), outputs[i].ctx(), ctx.run_ctx);
+  }
+}
+
+
+inline bool AllReduceShape(const nnvm::NodeAttrs& attrs,
+                           std::vector<TShape> *in_attrs,
+                           std::vector<TShape> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), out_attrs->size());
+  for (int i = 0; i < static_cast<int>(in_attrs->size()); ++i) {
+    TShape& ishape = (*in_attrs)[i];
+    SHAPE_ASSIGN_CHECK(*out_attrs, i, ishape);
+  }
+  for (int i = 0; i < static_cast<int>(in_attrs->size()); ++i) {
+    TShape& ishape = (*out_attrs)[i];
+    SHAPE_ASSIGN_CHECK(*in_attrs, i, ishape);
+  }
+  return true;
+}
+
+inline bool AllReduceType(const nnvm::NodeAttrs& attrs,
+                            std::vector<int>* in_attrs,
+                            std::vector<int>* out_attrs) {
+  int dtype = (*in_attrs)[0];
+  CHECK_NE(dtype, -1) << "First input must have specified type";
+  // assign
+  for (int i = 0; i < static_cast<int>(in_attrs->size()); ++i) {
+    dtype = (*in_attrs)[i];
+    TYPE_ASSIGN_CHECK(*out_attrs, i, dtype);
+  }
+  for (int i = 0; i < static_cast<int>(in_attrs->size()); ++i) {
+    dtype = (*out_attrs)[i];
+    TYPE_ASSIGN_CHECK(*in_attrs, i, dtype);
+  }
+  return true;
+}
+
+inline bool AllReduceStorageType(const nnvm::NodeAttrs& attrs,
+                                 const int dev_mask,
+                                 DispatchMode* dispatch_mode,
+                                 std::vector<int>* in_attrs,
+                                 std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), out_attrs->size());
+  *dispatch_mode = DispatchMode::kFComputeEx;
+  for (int& v : *in_attrs) {
+    if (v == - 1) v = kDefaultStorage;
+  }
+  for (size_t i = 0; i < out_attrs->size(); i++) {
+    (*out_attrs)[i] = kDefaultStorage;
+  }
+  return true;
+}
+
+}  // namespace op
+}  // namespace mxnet
+#endif  // MXNET_OPERATOR_ALL_REDUCE_INL_H_
diff --git a/src/operator/allreduce.cc b/src/operator/allreduce.cc
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ /*!
+ * Copyright (c) 2018 by Contributors
+ * \file allreduce.cc
+ * \brief all reduce operator
+ * \author Hang Zhang
+ */
+
+#include "./allreduce-inl.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(AllReduceOpParam);
+
+NNVM_REGISTER_OP(AllReduce)
+.describe(R"code(TODO docs
+)code" ADD_FILELINE)
+.set_attr_parser(ParamParser<AllReduceOpParam>)
+.set_num_inputs([](const nnvm::NodeAttrs& attrs) {
+    uint32_t ret = dmlc::get<AllReduceOpParam>(attrs.parsed).num_args;
+    return ret;
+  })
+.set_num_outputs([](const nnvm::NodeAttrs& attrs) {
+    uint32_t ret = dmlc::get<AllReduceOpParam>(attrs.parsed).num_args;
+    return ret;
+  })
+.set_attr<nnvm::FInferShape>("FInferShape", AllReduceShape)
+.set_attr<nnvm::FInferType>("FInferType", AllReduceType)
+.set_attr<FInferStorageType>("FInferStorageType", AllReduceStorageType)
+.set_attr<std::string>("key_var_num_args", "num_args")
+.set_attr<FComputeEx>("FComputeEx<cpu>", AllReduceOpForwardEx<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_AllReduce"})
+.set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs){
+  uint32_t n = dmlc::get<AllReduceOpParam>(attrs.parsed).num_args;
+  std::vector<std::pair<int, int> > ret;
+  for (uint32_t i = 0; i < n; i++) {
+    ret.push_back(std::pair<int, int>(i, i));
+  }
+  return ret;
+})
+.add_argument("data", "NDArray-or-Symbol[]", "List of arrays to allreduce");
+
+NNVM_REGISTER_OP(_backward_AllReduce)
+.set_attr_parser(ParamParser<AllReduceOpParam>)
+.set_num_inputs([](const nnvm::NodeAttrs& attrs) {
+    uint32_t ret = dmlc::get<AllReduceOpParam>(attrs.parsed).num_args;
+    return ret;
+  })
+.set_num_outputs([](const nnvm::NodeAttrs& attrs) {
+    uint32_t ret = dmlc::get<AllReduceOpParam>(attrs.parsed).num_args;
+    return ret;
+  })
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FInferStorageType>("FInferStorageType", AllReduceStorageType)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs){
+  uint32_t n = dmlc::get<AllReduceOpParam>(attrs.parsed).num_args;
+  std::vector<std::pair<int, int> > ret;
+  for (uint32_t i = 0; i < n; i++) {
+    ret.push_back(std::pair<int, int>(i, i));
+  }
+  return ret;
+})
+.set_attr<FComputeEx>("FComputeEx<cpu>", AllReduceOpForwardEx<cpu>);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/allreduce.cu b/src/operator/allreduce.cu
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ /*!
+ * Copyright (c) 2018 by Contributors
+ * \file allreduce.cu
+ * \brief all reduce operator
+ * \author Hang Zhang
+ */
+
+#include "./allreduce-inl.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(AllReduce)
+.set_attr<FComputeEx>("FComputeEx<gpu>", AllReduceOpForwardEx<gpu>);
+
+NNVM_REGISTER_OP(_backward_AllReduce)
+.set_attr<FComputeEx>("FComputeEx<gpu>", AllReduceOpForwardEx<gpu>);
+
+}  // namespace op
+}  // namespace mxnet