From f0c72642b04e4e7ab4ff92a4a3fa4c56835e11af Mon Sep 17 00:00:00 2001 From: "Yuan, Pengxin" Date: Thu, 24 Jan 2019 17:47:26 -0800 Subject: [PATCH 01/10] mkl_func test with erf&log op, build success~ --- src/operator/mkl_functions-inl.h | 126 ++++++++++++++++++ src/operator/tensor/elemwise_unary_op.h | 76 ++++++----- .../tensor/elemwise_unary_op_basic.cc | 30 ++++- 3 files changed, 193 insertions(+), 39 deletions(-) create mode 100644 src/operator/mkl_functions-inl.h diff --git a/src/operator/mkl_functions-inl.h b/src/operator/mkl_functions-inl.h new file mode 100644 index 000000000000..c31accf967c4 --- /dev/null +++ b/src/operator/mkl_functions-inl.h @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file mkl_functions-inl.h + * \brief + * \author +*/ +#ifndef MXNET_OPERATOR_MKL_FUNCTIONS_H_ +#define MXNET_OPERATOR_MKL_FUNCTIONS_H_ + +#if MSHADOW_USE_MKL == 1 +#include "mkl.h" + +namespace mxnet { +namespace op { +namespace mkl_func { + +MSHADOW_XINLINE +static bool check_size(const size_t n) { + const size_t MKL_INT_MAX = (sizeof(MKL_INT) == sizeof(int)) ? INT_MAX : LLONG_MAX; + return (n <= MKL_INT_MAX); +} + +MSHADOW_XINLINE +static bool check_type(const int t) { + return (t == mshadow::kFloat32 || t == mshadow::kFloat64); +} + +#define MXNET_MKL_UNARY_MATH_FUNC(name, func) \ + struct name : public mxnet_op::tunable { \ + template \ + MSHADOW_XINLINE static void Map(const index_t n, const DType *src, float *dst) { \ + vs##func(static_cast(n), reinterpret_cast(src), dst); \ + } \ + MSHADOW_XINLINE static void Map(const index_t n, const double *src, double *dst) { \ + vd##func(static_cast(n), src, dst); \ + } \ + } + + +#define MXNET_MKL_BINARY_MATH_FUNC(name, func) \ + struct name : public mxnet_op::tunable { \ + template \ + MSHADOW_XINLINE static void Map(const index_t n, \ + const DType *a, \ + const DType *b, \ + float *c) { \ + vs##func(static_cast(n), \ + reinterpret_cast(a), \ + reinterpret_cast(b), \ + c); \ + } \ + MSHADOW_XINLINE static void Map(const index_t n, \ + const double *a, \ + const double *b, \ + double *c) { \ + vd##func(static_cast(n), a, b, c); \ + } \ + } + +MXNET_MKL_UNARY_MATH_FUNC(erf, Erf); +MXNET_MKL_UNARY_MATH_FUNC(exp, Exp); +MXNET_MKL_UNARY_MATH_FUNC(exp2, Exp2); +MXNET_MKL_UNARY_MATH_FUNC(exp10, Exp10); +MXNET_MKL_UNARY_MATH_FUNC(expm1, Expm1); +MXNET_MKL_UNARY_MATH_FUNC(log, Ln); +MXNET_MKL_UNARY_MATH_FUNC(log2, Log2); +MXNET_MKL_UNARY_MATH_FUNC(log10, Log10); +MXNET_MKL_UNARY_MATH_FUNC(log1p, Log1p); + +MXNET_MKL_UNARY_MATH_FUNC(sin, Sin); +MXNET_MKL_UNARY_MATH_FUNC(cos, Cos); +MXNET_MKL_UNARY_MATH_FUNC(tan, Tan); +MXNET_MKL_UNARY_MATH_FUNC(asin, Asin); +MXNET_MKL_UNARY_MATH_FUNC(acos, Acos); +MXNET_MKL_UNARY_MATH_FUNC(atan, Atan); + +MXNET_MKL_UNARY_MATH_FUNC(sinh, Sinh); +MXNET_MKL_UNARY_MATH_FUNC(cosh, Cosh); +MXNET_MKL_UNARY_MATH_FUNC(tanh, Tanh); +MXNET_MKL_UNARY_MATH_FUNC(asinh, Asinh); +MXNET_MKL_UNARY_MATH_FUNC(acosh, Acosh); +MXNET_MKL_UNARY_MATH_FUNC(atanh, Atanh); + +MXNET_MKL_UNARY_MATH_FUNC(sqrt, Sqrt); +MXNET_MKL_UNARY_MATH_FUNC(abs, Abs); +MXNET_MKL_UNARY_MATH_FUNC(cbrt, Cbrt); +MXNET_MKL_UNARY_MATH_FUNC(round, Round); +MXNET_MKL_UNARY_MATH_FUNC(ceil, Ceil); +MXNET_MKL_UNARY_MATH_FUNC(floor, Floor); +MXNET_MKL_UNARY_MATH_FUNC(trunc, Trunc); + +MXNET_MKL_UNARY_MATH_FUNC(lgamma, LGamma); +MXNET_MKL_UNARY_MATH_FUNC(tgamma, TGamma); +MXNET_MKL_UNARY_MATH_FUNC(square, Sqr); + +MXNET_MKL_BINARY_MATH_FUNC(add, Add); +MXNET_MKL_BINARY_MATH_FUNC(sub, Sub); +MXNET_MKL_BINARY_MATH_FUNC(mul, Mul); +MXNET_MKL_BINARY_MATH_FUNC(pow, Pow); +MXNET_MKL_BINARY_MATH_FUNC(hypot, Hypot); + + +} // namespace mkl_func +} // namespace op +} // namespace mxnet +#endif // MSHADOW_USE_MKL == 1 +#endif // MXNET_OPERATOR_MKL_FUNCTIONS_H_ diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 3085f6d2256a..9338fc60352b 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -35,9 +35,10 @@ #include "../mxnet_op.h" #include "../elemwise_op_common.h" #include "../../ndarray/ndarray_function.h" + #if MSHADOW_USE_MKL == 1 -#include "mkl.h" -#endif +#include "../mkl_functions-inl.h" +#endif // MSHADOW_USE_MKL == 1 namespace mxnet { namespace op { @@ -354,42 +355,27 @@ class UnaryOp : public OpBase { } #if MSHADOW_USE_MKL == 1 - static inline void MKLLog(MKL_INT size, const float* pIn, float* pOut) { - vsLn(size, pIn, pOut); - } - - static inline void MKLLog(MKL_INT size, const double* pIn, double* pOut) { - vdLn(size, pIn, pOut); - } -#endif - - template - static void LogCompute(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - if (req[0] == kNullOp) return; - // if defined MSHADOW_USE_MKL then call mkl log when req is KWriteTo, type_flag - // is mshadow::kFloat32 or mshadow::kFloat64 and data size less than or equal MKL_INT_MAX -#if MSHADOW_USE_MKL == 1 - auto type_flag = inputs[0].type_flag_; - const size_t MKL_INT_MAX = (sizeof(MKL_INT) == sizeof(int)) ? INT_MAX : LLONG_MAX; - size_t input_size = inputs[0].Size(); - if (req[0] == kWriteTo && - input_size <= MKL_INT_MAX && - (type_flag == mshadow::kFloat32 || type_flag == mshadow::kFloat64)) { - MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { - MKLLog(input_size, inputs[0].dptr(), outputs[0].dptr()); - }); - } else { - Compute(attrs, ctx, inputs, req, outputs); - } -#else - Compute(attrs, ctx, inputs, req, outputs); -#endif +template +static void MKL_Compute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs){ + if (req[0] == kNullOp) return; + auto type_flag = inputs[0].type_flag_; + size_t input_size = inputs[0].Size(); + if (req[0] == kWriteTo && + mkl_func::check_size(input_size) && + mkl_func::check_type(type_flag)){ + // set DType as float or double according to type_flag + MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { + MKL_OP::Map(input_size, inputs[0].dptr(), outputs[0].dptr()); + }); + } else { + Compute(attrs, ctx, inputs, req, outputs); } -}; + } +#endif // MSHADOW_USE_MKL == 1 /*! \brief Map legacy unary_bwd to backward_grad */ template @@ -562,6 +548,22 @@ struct ReshapeLikeParam : public dmlc::Parameter { }) \ .add_argument("data", "NDArray-or-Symbol", "The input array.") +/*! \bried MKL Unary compute. + * With this macro means mxnet compile with MKL to accelerate math function with mkl. + * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. + */ +#define MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + NNVM_REGISTER_OP(__name$) \ + .set_num_inputs(1) \ + .set_num_outputs(1) \ + .set_attr("FInferShape", ElemwiseShape<1, 1>) \ + .set_attr("FInferType", ElemwiseType<1, 1>) \ + .set_attr("FInplaceOption", \ + [](const NodeAttrs& attrs){ \ + return std::vector >{{0, 0}}; \ + }) \ + .add_argument("data", "NDArray-or-Symbol", "The input array.") + /*! \brief Unary compute, with FComputeEx for csr and rsp available */ #define MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$) \ MXNET_OPERATOR_REGISTER_UNARY(__name$) \ diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 4aaf4dfd33c4..084820f6bffe 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -899,8 +899,19 @@ The storage type of ``cbrt`` output depends upon the input storage type: MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_cbrt, unary_bwd); - // erf +#if MSHADOW_USE_MKL == 1 +MXNET_MKL_OPERATOR_REGISTER_UNARY(erf) +.describe(R"code(Returns element-wise gauss error function of the input. + +Example:: + + erf([0, -1., 10.]) = [0., -0.8427, 1.] + +)code" ADD_FILELINE) +.set_attr("FCompute", UnaryOp::MKL_Compute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_erf"}); +#else MXNET_OPERATOR_REGISTER_UNARY(erf) .describe(R"code(Returns element-wise gauss error function of the input. @@ -911,6 +922,7 @@ Example:: )code" ADD_FILELINE) .set_attr("FCompute", UnaryOp::Compute) .set_attr("FGradient", ElemwiseGradUseIn{"_backward_erf"}); +#endif // MSHADOW_USE_MKL == 1 MXNET_OPERATOR_REGISTER_BINARY(_backward_erf) .set_attr("FCompute", @@ -970,6 +982,18 @@ The storage type of ``exp`` output is always dense .set_attr("FGradient", ElemwiseGradUseOut{"_mul"}); // log +#if MSHADOW_USE_MKL == 1 +MXNET_MKL_OPERATOR_REGISTER_UNARY(log) +.describe(R"code(Returns element-wise Natural logarithmic value of the input. + +The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x`` + +The storage type of ``log`` output is always dense + +)code" ADD_FILELINE) +.set_attr("FCompute", UnaryOp::MKL_Compute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_log"}); +#else MXNET_OPERATOR_REGISTER_UNARY(log) MXNET_ADD_SPARSE_OP_ALIAS(log) .describe(R"code(Returns element-wise Natural logarithmic value of the input. @@ -979,8 +1003,10 @@ The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x`` The storage type of ``log`` output is always dense )code" ADD_FILELINE) -.set_attr("FCompute", UnaryOp::LogCompute) +.set_attr("FCompute", UnaryOp::Compute) .set_attr("FGradient", ElemwiseGradUseIn{"_backward_log"}); +#endif // MSHADOW_USE_MKL == 1 + // log10 MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(log10, cpu, mshadow_op::log10) From 931177741a1b84799141f03cc31e08a9830783f3 Mon Sep 17 00:00:00 2001 From: Tao Lv Date: Fri, 25 Jan 2019 10:11:20 +0800 Subject: [PATCH 02/10] fix lint and build issues --- src/operator/mkl_functions-inl.h | 9 +++--- src/operator/tensor/elemwise_unary_op.h | 40 ++++++++++++------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/operator/mkl_functions-inl.h b/src/operator/mkl_functions-inl.h index c31accf967c4..f3615f4ad17e 100644 --- a/src/operator/mkl_functions-inl.h +++ b/src/operator/mkl_functions-inl.h @@ -23,8 +23,8 @@ * \brief * \author */ -#ifndef MXNET_OPERATOR_MKL_FUNCTIONS_H_ -#define MXNET_OPERATOR_MKL_FUNCTIONS_H_ +#ifndef MXNET_OPERATOR_MKL_FUNCTIONS_INL_H_ +#define MXNET_OPERATOR_MKL_FUNCTIONS_INL_H_ #if MSHADOW_USE_MKL == 1 #include "mkl.h" @@ -54,7 +54,6 @@ static bool check_type(const int t) { vd##func(static_cast(n), src, dst); \ } \ } - #define MXNET_MKL_BINARY_MATH_FUNC(name, func) \ struct name : public mxnet_op::tunable { \ @@ -74,7 +73,7 @@ static bool check_type(const int t) { double *c) { \ vd##func(static_cast(n), a, b, c); \ } \ - } + } MXNET_MKL_UNARY_MATH_FUNC(erf, Erf); MXNET_MKL_UNARY_MATH_FUNC(exp, Exp); @@ -123,4 +122,4 @@ MXNET_MKL_BINARY_MATH_FUNC(hypot, Hypot); } // namespace op } // namespace mxnet #endif // MSHADOW_USE_MKL == 1 -#endif // MXNET_OPERATOR_MKL_FUNCTIONS_H_ +#endif // MXNET_OPERATOR_MKL_FUNCTIONS_INL_H_ diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 9338fc60352b..00c381f77589 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -355,27 +355,28 @@ class UnaryOp : public OpBase { } #if MSHADOW_USE_MKL == 1 -template -static void MKL_Compute(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs){ - if (req[0] == kNullOp) return; - auto type_flag = inputs[0].type_flag_; - size_t input_size = inputs[0].Size(); - if (req[0] == kWriteTo && - mkl_func::check_size(input_size) && - mkl_func::check_type(type_flag)){ - // set DType as float or double according to type_flag - MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { - MKL_OP::Map(input_size, inputs[0].dptr(), outputs[0].dptr()); - }); - } else { - Compute(attrs, ctx, inputs, req, outputs); + template + static void MKL_Compute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + if (req[0] == kNullOp) return; + auto type_flag = inputs[0].type_flag_; + size_t input_size = inputs[0].Size(); + if (req[0] == kWriteTo && + mkl_func::check_size(input_size) && + mkl_func::check_type(type_flag)) { + // set DType as float or double according to type_flag + MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { + MKL_OP::Map(input_size, inputs[0].dptr(), outputs[0].dptr()); + }); + } else { + Compute(attrs, ctx, inputs, req, outputs); + } } - } #endif // MSHADOW_USE_MKL == 1 +}; /*! \brief Map legacy unary_bwd to backward_grad */ template @@ -586,7 +587,6 @@ struct ReshapeLikeParam : public dmlc::Parameter { #define MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$) \ MXNET_OPERATOR_REGISTER_UNARY(__name$) \ .set_attr("FCompute<" #__xpu$ ">", UnaryOp::Compute<__xpu$, __kernel$>) - } // namespace op } // namespace mxnet From a79f7dbdd3dfb37e21be0a20dfc411a78136be50 Mon Sep 17 00:00:00 2001 From: "Wu, Shufan" Date: Fri, 22 Feb 2019 05:18:29 -0800 Subject: [PATCH 03/10] Try to add support to sparse array --- src/operator/tensor/elemwise_unary_op.h | 57 ++++++++++++++++- .../tensor/elemwise_unary_op_basic.cc | 39 ++++++++++++ tests/python/gpu/test_operator_gpu.py | 63 +++++++++++++++++++ 3 files changed, 158 insertions(+), 1 deletion(-) diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 00c381f77589..0aa49119b094 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -265,6 +265,27 @@ class UnaryOp : public OpBase { } } +#if MSHADOW_USE_MKL == 1 + template + static void MKL_ComputeEx(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 1U) + << "Invalid input, only one input is allowed"; + CHECK_EQ(outputs.size(), 1U) + << "Invalid output, only one output is allowed"; + CHECK_NE(inputs[0].storage_type(), kDefaultStorage) + << "Operation requires a sparse output storage type"; + CHECK_NE(outputs[0].storage_type(), kDefaultStorage) + << "Operation requires a sparse output storage type"; + if (inputs[0].storage_shape().Size()) { + MapToFCompute(attrs, ctx, inputs, req, outputs, MKL_Compute); + } + } +#endif + template static void ComputeWithHalf2(const nnvm::NodeAttrs &attrs, const OpContext &ctx, @@ -371,6 +392,12 @@ class UnaryOp : public OpBase { MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { MKL_OP::Map(input_size, inputs[0].dptr(), outputs[0].dptr()); }); + } else if (req[0] == kWriteInplace && + mkl_func::check_size(input_size) && + mkl_func::check_type(type_flag)) { + MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { + MKL_OP::Map(input_size, inputs[0].dptr(), inputs[0].dptr()); + }); } else { Compute(attrs, ctx, inputs, req, outputs); } @@ -565,6 +592,34 @@ struct ReshapeLikeParam : public dmlc::Parameter { }) \ .add_argument("data", "NDArray-or-Symbol", "The input array.") +#if MSHADOW_USE_MKL == 1 + /*! \bried MKL Unary compute. + * * With this macro means mxnet compile with MKL to accelerate math function with mkl. + * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. + */ + #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ + .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ + .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>) + + /*! \bried MKL Unary compute. + * * With this macro means mxnet compile with MKL to accelerate math function with mkl. + * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. + */ + #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ + .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ + .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>) + + #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) +#endif + /*! \brief Unary compute, with FComputeEx for csr and rsp available */ #define MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$) \ MXNET_OPERATOR_REGISTER_UNARY(__name$) \ @@ -582,7 +637,7 @@ struct ReshapeLikeParam : public dmlc::Parameter { .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::ComputeEx<__xpu$, __kernel$>) /*! \brief Unary compute, dense result. - * FInferStorageType attr is not set using this macro. By default DefaultStorageType is used. + * * FInferStorageType attr is not set using this macro. By default DefaultStorageType is used. */ #define MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$) \ MXNET_OPERATOR_REGISTER_UNARY(__name$) \ diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 084820f6bffe..c26f3949143f 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -812,6 +812,26 @@ The storage type of ``fix`` output depends upon the input storage type: .set_attr("FGradient", MakeZeroGradNodes); // square +#if MSHADOW_USE_MKL == 1 +MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(square, cpu, mshadow_op::square, mkl_func::square) +.describe(R"code(Returns element-wise squared value of the input. + +.. math:: + square(x) = x^2 + +Example:: + + square([2, 3, 4]) = [4, 9, 16] + +The storage type of ``square`` output depends upon the input storage type: + + - square(default) = default + - square(row_sparse) = row_sparse + - square(csr) = csr + +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_square"}); +#else MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(square, cpu, mshadow_op::square) .describe(R"code(Returns element-wise squared value of the input. @@ -830,6 +850,7 @@ The storage type of ``square`` output depends upon the input storage type: )code" ADD_FILELINE) .set_attr("FGradient", ElemwiseGradUseIn{"_backward_square"}); +#endif MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_square, unary_bwd); @@ -965,6 +986,23 @@ MXNET_OPERATOR_REGISTER_BINARY(_backward_rcbrt) unary_bwd>); // exp +#if MSHADOW_USE_MKL == 1 +MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(exp, cpu, mshadow_op::exp, mkl_func::exp) +MXNET_ADD_SPARSE_OP_ALIAS(exp) +.describe(R"code(Returns element-wise exponential value of the input. + +.. math:: + exp(x) = e^x \approx 2.718^x + +Example:: + + exp([0, 1, 2]) = [1., 2.71828175, 7.38905621] + +The storage type of ``exp`` output is always dense + +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseOut{"_mul"}); +#else MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(exp, cpu, mshadow_op::exp) MXNET_ADD_SPARSE_OP_ALIAS(exp) .describe(R"code(Returns element-wise exponential value of the input. @@ -980,6 +1018,7 @@ The storage type of ``exp`` output is always dense )code" ADD_FILELINE) .set_attr("FGradient", ElemwiseGradUseOut{"_mul"}); +#endif // log #if MSHADOW_USE_MKL == 1 diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index 4dbf82edd3f5..2007b93ef5ee 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -2121,6 +2121,69 @@ def test_context_num_gpus(): # Test that num_gpus reports at least one GPU, as the test is run on a GPU host. assert mx.context.num_gpus() > 0 +def math_log(shape, dtype, check_value): + np_x = np.random.rand(shape[0], shape[1]) + x = mx.nd.array(np_x, dtype=dtype) + mx.nd.waitall() + y = mx.nd.log(data=x) + y.wait_to_read() + if check_value: + x_ = x.as_in_context(mx.cpu()) + mx.nd.waitall() + y_ = mx.nd.log(data=x_) + y_.wait_to_read() + assert_almost_equal(y.asnumpy(), y_.asnumpy()) + +def math_erf(shape, dtype, check_value): + np_x = np.random.rand(shape[0], shape[1]) + x = mx.nd.array(np_x, dtype=dtype) + mx.nd.waitall() + y = mx.nd.erf(data=x) + y.wait_to_read() + if check_value: + x_ = x.as_in_context(mx.cpu()) + mx.nd.waitall() + y_ = mx.nd.erf(data=x_) + y_.wait_to_read() + assert_almost_equal(y.asnumpy(), y_.asnumpy()) + +def math_square(shape, dtype, check_value): + np_x = np.random.rand(shape[0], shape[1]) + x = mx.nd.array(np_x, dtype=dtype) + mx.nd.waitall() + y = mx.nd.square(data=x) + y.wait_to_read() + if check_value: + x_ = x.as_in_context(mx.cpu()) + mx.nd.waitall() + y_ = mx.nd.square(data=x_) + y_.wait_to_read() + assert_almost_equal(y.asnumpy(), y_.asnumpy()) + +def run_math(op, shape, dtype="float32", check_value=True): + run_num = 10 + for i in range(run_num): + if op == 'log': + math_log(shape=shape, dtype=dtype, check_value=check_value) + elif op == 'erf': + math_erf(shape=shape, dtype=dtype, check_value=check_value) + elif op == 'square': + math_square(shape=shape, dtype=dtype, check_value=check_value) + +@with_seed() +def test_math(): + ops = ['log', 'erf', 'square'] + check_value= True + lshape = 1000 + rshapes = [1, 10, 100, 1000, 10000] + dtypes = ["float32", "float64"] + for rshape in rshapes: + shape = (lshape, rshape) + print("shape:(%d, %d), " % (lshape, rshape), end="") + for dtype in dtypes: + for op in ops: + run_math(op, shape, dtype, check_value=check_value) + if __name__ == '__main__': import nose nose.runmodule() From 015fd0a6ae68a0b0c6d15de5aad8cfc4dd7cbadd Mon Sep 17 00:00:00 2001 From: Tao Lv Date: Sun, 3 Mar 2019 12:57:40 +0800 Subject: [PATCH 04/10] fix build --- src/operator/tensor/elemwise_unary_op.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 0aa49119b094..600803c953f6 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -385,19 +385,13 @@ class UnaryOp : public OpBase { if (req[0] == kNullOp) return; auto type_flag = inputs[0].type_flag_; size_t input_size = inputs[0].Size(); - if (req[0] == kWriteTo && + if ((req[0] == kWriteTo || req[0] == kWriteInplace) && mkl_func::check_size(input_size) && mkl_func::check_type(type_flag)) { // set DType as float or double according to type_flag MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { MKL_OP::Map(input_size, inputs[0].dptr(), outputs[0].dptr()); }); - } else if (req[0] == kWriteInplace && - mkl_func::check_size(input_size) && - mkl_func::check_type(type_flag)) { - MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { - MKL_OP::Map(input_size, inputs[0].dptr(), inputs[0].dptr()); - }); } else { Compute(attrs, ctx, inputs, req, outputs); } @@ -584,7 +578,7 @@ struct ReshapeLikeParam : public dmlc::Parameter { NNVM_REGISTER_OP(__name$) \ .set_num_inputs(1) \ .set_num_outputs(1) \ - .set_attr("FInferShape", ElemwiseShape<1, 1>) \ + .set_attr("FInferShape", ElemwiseShape<1, 1>) \ .set_attr("FInferType", ElemwiseType<1, 1>) \ .set_attr("FInplaceOption", \ [](const NodeAttrs& attrs){ \ From 672be6a715b59a6c01f745a039d758c8ecf325fe Mon Sep 17 00:00:00 2001 From: Tao Lv Date: Thu, 18 Apr 2019 14:39:26 +0800 Subject: [PATCH 05/10] add functions --- src/operator/mkl_functions-inl.h | 151 +++++++++++++++++++----- src/operator/tensor/elemwise_unary_op.h | 48 ++++---- 2 files changed, 146 insertions(+), 53 deletions(-) diff --git a/src/operator/mkl_functions-inl.h b/src/operator/mkl_functions-inl.h index f3615f4ad17e..b224d08ab126 100644 --- a/src/operator/mkl_functions-inl.h +++ b/src/operator/mkl_functions-inl.h @@ -44,36 +44,31 @@ static bool check_type(const int t) { return (t == mshadow::kFloat32 || t == mshadow::kFloat64); } -#define MXNET_MKL_UNARY_MATH_FUNC(name, func) \ - struct name : public mxnet_op::tunable { \ - template \ - MSHADOW_XINLINE static void Map(const index_t n, const DType *src, float *dst) { \ - vs##func(static_cast(n), reinterpret_cast(src), dst); \ - } \ - MSHADOW_XINLINE static void Map(const index_t n, const double *src, double *dst) { \ - vd##func(static_cast(n), src, dst); \ - } \ - } +#define MXNET_MKL_UNARY_MATH_FUNC(name, func) \ +struct name { \ + MSHADOW_XINLINE static void Vectorize(const index_t n, const float *src, float *dst) { \ + vs##func(static_cast(n), src, dst); \ + } \ + MSHADOW_XINLINE static void Vectorize(const index_t n, const double *src, double *dst) { \ + vd##func(static_cast(n), src, dst); \ + } \ +}; -#define MXNET_MKL_BINARY_MATH_FUNC(name, func) \ - struct name : public mxnet_op::tunable { \ - template \ - MSHADOW_XINLINE static void Map(const index_t n, \ - const DType *a, \ - const DType *b, \ - float *c) { \ - vs##func(static_cast(n), \ - reinterpret_cast(a), \ - reinterpret_cast(b), \ - c); \ - } \ - MSHADOW_XINLINE static void Map(const index_t n, \ - const double *a, \ - const double *b, \ - double *c) { \ - vd##func(static_cast(n), a, b, c); \ - } \ - } +#define MXNET_MKL_BINARY_MATH_FUNC(name, func) \ +struct name { \ + MSHADOW_XINLINE static void Vectorize(const index_t n, \ + const float *a, \ + const float *b, \ + float *c) { \ + vs##func(static_cast(n), a, b, c); \ + } \ + MSHADOW_XINLINE static void Vectorize(const index_t n, \ + const double *a, \ + const double *b, \ + double *c) { \ + vd##func(static_cast(n), a, b, c); \ + } \ +}; MXNET_MKL_UNARY_MATH_FUNC(erf, Erf); MXNET_MKL_UNARY_MATH_FUNC(exp, Exp); @@ -118,6 +113,104 @@ MXNET_MKL_BINARY_MATH_FUNC(pow, Pow); MXNET_MKL_BINARY_MATH_FUNC(hypot, Hypot); +template +MSHADOW_XINLINE static void sub_(index_t n, DType *in, DType b, DType *dst) { + for (index_t i = 0; i < n; i++) + dst[i] = in[i] - b; +} + +template +MSHADOW_XINLINE static void div_(index_t n, DType *in, DType b, DType *dst) { + for (index_t i = 0; i < n; i++) + dst[i] = in[i] / b; +} + +template +MSHADOW_XINLINE static void sum_(index_t n, DType *in, DType *dst) { + // dst[0] = cblas_sasum(n, in, 1); + DType sum = 0.0f; + for (index_t i = 0; i < n; i++) + sum += in[i]; + + dst[0] = sum; +} + +template +MSHADOW_XINLINE static void max_(int n, DType * __restrict__ in, DType *dst) { + dst[0] = in[0]; + for (int i = 1; i < n; i++) + dst[0] = (dst[0] < in[i]) ? in[i] : dst[0]; +} + +// LayerNorm on the last dimension +template +MSHADOW_XINLINE static void LayerNormLastDim(const index_t m, + const index_t n, + const DType *a, + const DType *b, + const DType *ws, + const DType *gamma, + const DType *beta, + const DType *mean, + const DType *var, + const DType eps) { +#pragma omp parallel for + for (index_t i = 0; i < m; i++) { + DType* in_offset = a + i * n; + DType* out_offset = b + i * n; + DType* ws_offset = ws + i * n; + + sum_(n, in_offset, &(mean[i])); + mean[i] /= n; + sub_(n, in_offset, mean[i], out_offset); + square(n, out_offset, ws_offset); + sum_(n, ws_offset, &(var[i])); + var[i] = sqrt(var[i] / n + eps); + + mul(n, out_offset, gamma, out_offset); + div_(n, out_offset, var[i], out_offset); + add(n, out_offset, beta, out_offset); + } +} + +// softmax on the last dimension +template +MSHADOW_XINLINE static void SoftmaxLastDim(const index_t m, + const index_t n, + const DType *a, + const DType *b) { +#pragma omp paralle for + for (index_t i = 0; i < m; i++) { + DType* in_offset = a + i * n; + DType* out_offset = b + i * n; + + exp(n, in_offset, out_offset); + float sum = 0.0f; + sum_(n, out_offset, &sum); + div_(n, out_offset, sum, out_offset); + } +} + +template +MSHADOW_XINLINE static void LogSoftmaxLastDim(const index_t m, + const index_t n, + const DType *a, + const DType *b) { +#pragma parallel for + for (index_t i = 0; i < m; i++) { + DType* in_offset = a + i * n; + DType* out_offset = b + i * n; + + DType b, logsum; + max_(n, in_offset, &b); + sub_(n, in_offset, b, out_offset); + exp(n, out_offset, out_offset); + sum_(n, out_offset, &logsum); + logsum = b + logf(logsum); + sub_(n, in_offset, logsum, out_offset); + } +} + } // namespace mkl_func } // namespace op } // namespace mxnet diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 600803c953f6..d2d221bbd628 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -390,7 +390,7 @@ class UnaryOp : public OpBase { mkl_func::check_type(type_flag)) { // set DType as float or double according to type_flag MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { - MKL_OP::Map(input_size, inputs[0].dptr(), outputs[0].dptr()); + MKL_OP::Vectorize(input_size, inputs[0].dptr(), outputs[0].dptr()); }); } else { Compute(attrs, ctx, inputs, req, outputs); @@ -562,7 +562,7 @@ struct ReshapeLikeParam : public dmlc::Parameter { NNVM_REGISTER_OP(__name$) \ .set_num_inputs(1) \ .set_num_outputs(1) \ - .set_attr("FInferShape", ElemwiseShape<1, 1>) \ + .set_attr("FInferShape", ElemwiseShape<1, 1>) \ .set_attr("FInferType", ElemwiseType<1, 1>) \ .set_attr("FInplaceOption", \ [](const NodeAttrs& attrs){ \ @@ -578,7 +578,7 @@ struct ReshapeLikeParam : public dmlc::Parameter { NNVM_REGISTER_OP(__name$) \ .set_num_inputs(1) \ .set_num_outputs(1) \ - .set_attr("FInferShape", ElemwiseShape<1, 1>) \ + .set_attr("FInferShape", ElemwiseShape<1, 1>) \ .set_attr("FInferType", ElemwiseType<1, 1>) \ .set_attr("FInplaceOption", \ [](const NodeAttrs& attrs){ \ @@ -591,27 +591,27 @@ struct ReshapeLikeParam : public dmlc::Parameter { * * With this macro means mxnet compile with MKL to accelerate math function with mkl. * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. */ - #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ - MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ - .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \ - .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ - .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>) - - /*! \bried MKL Unary compute. - * * With this macro means mxnet compile with MKL to accelerate math function with mkl. - * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. - */ - #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$) \ - MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ - .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>) \ - .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ - .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>) - - #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ - MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) +#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ + .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ + .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>) + +/*! \bried MKL Unary compute. + * * With this macro means mxnet compile with MKL to accelerate math function with mkl. + * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. +*/ +#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ + .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>)\ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ + .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>) + +#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$)\ + MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) #endif /*! \brief Unary compute, with FComputeEx for csr and rsp available */ From c69a25c7ec5694a7002a3e44bc2e6821156246d4 Mon Sep 17 00:00:00 2001 From: shufan wu Date: Sat, 9 Mar 2019 16:16:55 +0800 Subject: [PATCH 06/10] Fix review comments --- src/operator/tensor/elemwise_unary_op.h | 42 +++++++++---------- .../tensor/elemwise_unary_op_basic.cc | 35 ++++------------ 2 files changed, 30 insertions(+), 47 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index d2d221bbd628..26938404018c 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -591,27 +591,27 @@ struct ReshapeLikeParam : public dmlc::Parameter { * * With this macro means mxnet compile with MKL to accelerate math function with mkl. * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. */ -#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ - MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ - .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \ - .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ - .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>) - -/*! \bried MKL Unary compute. - * * With this macro means mxnet compile with MKL to accelerate math function with mkl. - * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. -*/ -#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$) \ - MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ - .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>)\ - .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ - .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>) - -#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$)\ - MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) + #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_OPERATOR_REGISTER_UNARY(__name$) \ + MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ + .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ + .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>) + + /*! \bried MKL Unary compute. + * * With this macro means mxnet compile with MKL to accelerate math function with mkl. + * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. + */ + #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_OPERATOR_REGISTER_UNARY(__name$) \ + MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ + .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ + .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>) + + #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_OPERATOR_REGISTER_UNARY(__name$) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) #endif /*! \brief Unary compute, with FComputeEx for csr and rsp available */ diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index dc876a02a2b4..8e37a488eebc 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -929,9 +929,9 @@ The storage type of ``cbrt`` output depends upon the input storage type: MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_cbrt, unary_bwd); + // erf -#if MSHADOW_USE_MKL == 1 -MXNET_MKL_OPERATOR_REGISTER_UNARY(erf) +MXNET_OPERATOR_REGISTER_UNARY(erf) .describe(R"code(Returns element-wise gauss error function of the input. Example:: @@ -939,20 +939,13 @@ Example:: erf([0, -1., 10.]) = [0., -0.8427, 1.] )code" ADD_FILELINE) +#if MSHADOW_USE_MKL == 1 .set_attr("FCompute", UnaryOp::MKL_Compute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_erf"}); #else -MXNET_OPERATOR_REGISTER_UNARY(erf) -.describe(R"code(Returns element-wise gauss error function of the input. - -Example:: - - erf([0, -1., 10.]) = [0., -0.8427, 1.] - -)code" ADD_FILELINE) .set_attr("FCompute", UnaryOp::Compute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_erf"}); #endif // MSHADOW_USE_MKL == 1 +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_erf"}); + MXNET_OPERATOR_REGISTER_BINARY(_backward_erf) .set_attr("FCompute", @@ -1030,18 +1023,6 @@ The storage type of ``exp`` output is always dense #endif // log -#if MSHADOW_USE_MKL == 1 -MXNET_MKL_OPERATOR_REGISTER_UNARY(log) -.describe(R"code(Returns element-wise Natural logarithmic value of the input. - -The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x`` - -The storage type of ``log`` output is always dense - -)code" ADD_FILELINE) -.set_attr("FCompute", UnaryOp::MKL_Compute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_log"}); -#else MXNET_OPERATOR_REGISTER_UNARY(log) MXNET_ADD_SPARSE_OP_ALIAS(log) .describe(R"code(Returns element-wise Natural logarithmic value of the input. @@ -1051,10 +1032,12 @@ The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x`` The storage type of ``log`` output is always dense )code" ADD_FILELINE) +#if MSHADOW_USE_MKL == 1 +.set_attr("FCompute", UnaryOp::MKL_Compute) +#else .set_attr("FCompute", UnaryOp::Compute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_log"}); #endif // MSHADOW_USE_MKL == 1 - +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_log"}); // log10 MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(log10, cpu, mshadow_op::log10) From 2c5c20c694f440a8a9b9335a634a76d70402f03d Mon Sep 17 00:00:00 2001 From: shufan wu Date: Sat, 9 Mar 2019 16:34:04 +0800 Subject: [PATCH 07/10] remove unecessary code --- src/operator/tensor/elemwise_unary_op.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 26938404018c..82a9aa9fe7a1 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -570,22 +570,6 @@ struct ReshapeLikeParam : public dmlc::Parameter { }) \ .add_argument("data", "NDArray-or-Symbol", "The input array.") -/*! \bried MKL Unary compute. - * With this macro means mxnet compile with MKL to accelerate math function with mkl. - * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. - */ -#define MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - NNVM_REGISTER_OP(__name$) \ - .set_num_inputs(1) \ - .set_num_outputs(1) \ - .set_attr("FInferShape", ElemwiseShape<1, 1>) \ - .set_attr("FInferType", ElemwiseType<1, 1>) \ - .set_attr("FInplaceOption", \ - [](const NodeAttrs& attrs){ \ - return std::vector >{{0, 0}}; \ - }) \ - .add_argument("data", "NDArray-or-Symbol", "The input array.") - #if MSHADOW_USE_MKL == 1 /*! \bried MKL Unary compute. * * With this macro means mxnet compile with MKL to accelerate math function with mkl. From b1b635558f68a1fec7f1bc287e7d2a1e9f5e7265 Mon Sep 17 00:00:00 2001 From: shufan wu Date: Sun, 10 Mar 2019 09:22:18 +0800 Subject: [PATCH 08/10] Update test case --- tests/python/gpu/test_operator_gpu.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index b037218dc07f..08902563c9b0 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -2200,7 +2200,7 @@ def test_context_num_gpus(): assert mx.context.num_gpus() > 0 def math_log(shape, dtype, check_value): - np_x = np.random.rand(shape[0], shape[1]) + np_x = np.random.rand(*tuple(shape)) x = mx.nd.array(np_x, dtype=dtype) mx.nd.waitall() y = mx.nd.log(data=x) @@ -2213,7 +2213,7 @@ def math_log(shape, dtype, check_value): assert_almost_equal(y.asnumpy(), y_.asnumpy()) def math_erf(shape, dtype, check_value): - np_x = np.random.rand(shape[0], shape[1]) + np_x = np.random.rand(*tuple(shape)) x = mx.nd.array(np_x, dtype=dtype) mx.nd.waitall() y = mx.nd.erf(data=x) @@ -2226,7 +2226,7 @@ def math_erf(shape, dtype, check_value): assert_almost_equal(y.asnumpy(), y_.asnumpy()) def math_square(shape, dtype, check_value): - np_x = np.random.rand(shape[0], shape[1]) + np_x = np.random.rand(*tuple(shape)) x = mx.nd.array(np_x, dtype=dtype) mx.nd.waitall() y = mx.nd.square(data=x) @@ -2252,12 +2252,9 @@ def run_math(op, shape, dtype="float32", check_value=True): def test_math(): ops = ['log', 'erf', 'square'] check_value= True - lshape = 1000 - rshapes = [1, 10, 100, 1000, 10000] + shape_lst = [[1000], [100,1000], [10,100,100], [10,100,100,100]] dtypes = ["float32", "float64"] - for rshape in rshapes: - shape = (lshape, rshape) - print("shape:(%d, %d), " % (lshape, rshape), end="") + for shape in shape_lst: for dtype in dtypes: for op in ops: run_math(op, shape, dtype, check_value=check_value) From f96c34a326c70a846fb7a3bca58d712af59d1817 Mon Sep 17 00:00:00 2001 From: shufan wu Date: Mon, 11 Mar 2019 11:56:17 +0800 Subject: [PATCH 09/10] minor fix --- tests/python/gpu/test_operator_gpu.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index 08902563c9b0..533d7b0ed655 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -2202,40 +2202,28 @@ def test_context_num_gpus(): def math_log(shape, dtype, check_value): np_x = np.random.rand(*tuple(shape)) x = mx.nd.array(np_x, dtype=dtype) - mx.nd.waitall() y = mx.nd.log(data=x) - y.wait_to_read() if check_value: x_ = x.as_in_context(mx.cpu()) - mx.nd.waitall() y_ = mx.nd.log(data=x_) - y_.wait_to_read() assert_almost_equal(y.asnumpy(), y_.asnumpy()) def math_erf(shape, dtype, check_value): np_x = np.random.rand(*tuple(shape)) x = mx.nd.array(np_x, dtype=dtype) - mx.nd.waitall() y = mx.nd.erf(data=x) - y.wait_to_read() if check_value: x_ = x.as_in_context(mx.cpu()) - mx.nd.waitall() y_ = mx.nd.erf(data=x_) - y_.wait_to_read() assert_almost_equal(y.asnumpy(), y_.asnumpy()) def math_square(shape, dtype, check_value): np_x = np.random.rand(*tuple(shape)) x = mx.nd.array(np_x, dtype=dtype) - mx.nd.waitall() y = mx.nd.square(data=x) - y.wait_to_read() if check_value: x_ = x.as_in_context(mx.cpu()) - mx.nd.waitall() y_ = mx.nd.square(data=x_) - y_.wait_to_read() assert_almost_equal(y.asnumpy(), y_.asnumpy()) def run_math(op, shape, dtype="float32", check_value=True): From 06c51e974cbaa70939e6ac82c98e5ee1b9a6bdf3 Mon Sep 17 00:00:00 2001 From: shufan wu Date: Thu, 18 Apr 2019 22:02:25 +0800 Subject: [PATCH 10/10] move the position of MKL_Compute --- src/operator/tensor/elemwise_unary_op.h | 43 ++++++++++++------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 82a9aa9fe7a1..279efcf97084 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -266,6 +266,27 @@ class UnaryOp : public OpBase { } #if MSHADOW_USE_MKL == 1 + template + static void MKL_Compute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + if (req[0] == kNullOp) return; + auto type_flag = inputs[0].type_flag_; + size_t input_size = inputs[0].Size(); + if ((req[0] == kWriteTo || req[0] == kWriteInplace) && + mkl_func::check_size(input_size) && + mkl_func::check_type(type_flag)) { + // set DType as float or double according to type_flag + MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { + MKL_OP::Vectorize(input_size, inputs[0].dptr(), outputs[0].dptr()); + }); + } else { + Compute(attrs, ctx, inputs, req, outputs); + } + } + template static void MKL_ComputeEx(const nnvm::NodeAttrs& attrs, const OpContext& ctx, @@ -375,28 +396,6 @@ class UnaryOp : public OpBase { } } -#if MSHADOW_USE_MKL == 1 - template - static void MKL_Compute(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - if (req[0] == kNullOp) return; - auto type_flag = inputs[0].type_flag_; - size_t input_size = inputs[0].Size(); - if ((req[0] == kWriteTo || req[0] == kWriteInplace) && - mkl_func::check_size(input_size) && - mkl_func::check_type(type_flag)) { - // set DType as float or double according to type_flag - MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { - MKL_OP::Vectorize(input_size, inputs[0].dptr(), outputs[0].dptr()); - }); - } else { - Compute(attrs, ctx, inputs, req, outputs); - } - } -#endif // MSHADOW_USE_MKL == 1 }; /*! \brief Map legacy unary_bwd to backward_grad */