diff --git a/docs/develop_guides/nnadapter.md b/docs/develop_guides/nnadapter.md index 2ca5dfce224..cd7476a6d9e 100644 --- a/docs/develop_guides/nnadapter.md +++ b/docs/develop_guides/nnadapter.md @@ -1102,6 +1102,14 @@ typedef struct Device { W_out = (W_in - 1) * stride_width - padding_width_left - padding_width_right + (dilation_width * (filter_width - 1) + 1)) + 1 + output_padding_width +- NNADAPTER_COS + + 逐元素取余弦值: `output` = cos(`input`) 。 + - 输入: + - 0 : input ,输入操作数,类型: NNADAPTER_FLOAT32 、 NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。 + - 输出: + - 0 : output ,输出操作数,与输入操作数 `input` 的形状和类型相同。 + - NNADAPTER_CUM_SUM 沿给定 `axis` 轴计算累加和。 @@ -1222,6 +1230,16 @@ typedef struct Device { - 输出: - 0 : output ,输出操作数,与输入操作数 `input` 的形状和类型相同。 +- NNADAPTER_FLOOR_DIV + + 逐元素相除并向下取整: `output` = floor (`input0` / `input1`) ,广播规则与 Numpy https://numpy.org/doc/stable/user/basics.broadcasting.html 相同。 + - 输入: + - 0 : input0 ,输入操作数 0 ,类型: NNADAPTER_FLOAT32 、NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。 + - 1 : input1 ,输入操作数 1 ,类型与输入操作数 `input0` 相同。 + - 2 : fuse_code ,融合的激活函数类型,形状: [1] ,类型: NNADAPTER_INT32 ,取值: NNAdapterFuseCode 类型的任意值, NNADAPTER_FUSED_NONE 、 NNADAPTER_FUSED_RELU 、 NNADAPTER_FUSED_RELU1 、 NNADAPTER_FUSED_RELU6 。 + - 输出: + - 0 : output ,输出操作数,形状:由输入操作数 `input0` 和 `input1` 广播后的形状决定,类型与输入操作数 `input0` 和 `input1` 相同。 + - NNADAPTER_FULLY_CONNECTED 全链接层: `output` = activation(`input` * `weight`' + `bias`) 。 @@ -1678,6 +1696,14 @@ typedef struct Device { - 输出: - 0 : output ,输出操作数,与输入操作数 `input` 的形状和类型相同。 +- NNADAPTER_SIN + + 逐元素取正弦值: `output` = sin(`input`) 。 + - 输入: + - 0 : input ,输入操作数,类型: NNADAPTER_FLOAT32 、 NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。 + - 输出: + - 0 : output ,输出操作数,与输入操作数 `input` 的形状和类型相同。 + - NNADAPTER_SLICE 沿着多个轴生成 `input` 的片段。类似 numpy : https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html ,沿着 `axes` 的每个轴以 `starts` 、 `ends` 、 `step` 为起始、终止、步长获取 `input` 的片段。如果 `starts[i]` 、 `ends[i]` 为负数,则需要加上输入 `input` 对应轴 `axes[i]` 的维度 `dims[axes[i]]` 。如果 `starts[i]` 或 `ends[i]` 的值大于 `dims[axes[i]]` ,将被截断到 `dims[axes[i]] - 1` 。如果 `dims[axes[i]]` 维度未知,建议将 `ends[i]` 设置为 `INT_MAX` ,反向则设置为 `INT_MIN` 。 diff --git a/lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h b/lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h index fba2d2cbc00..26560453659 100644 --- a/lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h +++ b/lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h @@ -540,6 +540,21 @@ typedef enum { */ NNADAPTER_CONV_2D_TRANSPOSE, + /** + * Performs element-wise cosine calculation. + * The output is calculated using this formula: `output` = cos(`input`) + * + * Inputs: + * * 0: input, a NNADAPTER_FLOAT32, + * NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor. + * + * Outputs: + * * 0: output, a tensor of the same shape and type as `input`. + * + * Available since version 1. + */ + NNADAPTER_COS, + /** * Performs cumulative sum of the input elements along the given `axis`. * @@ -784,6 +799,28 @@ typedef enum { */ NNADAPTER_FLOOR, + /** + * Performs element-wise binary floor divide(with Numpy-style broadcasting + * https://numpy.org/doc/stable/user/basics.broadcasting.html). + * The output is calculated using this formula: + * `output` = floor (`input0` / `input1`) + * Inputs: + * * 0: input0, a NNADAPTER_FLOAT32, NNADAPTER_QUANT_INT8_SYMM_PER_LAYER + * tensor. + * * 1: input1, a tensor of the compatible shape and the same type as + * `input0`. + * * 2: fuse_code, a NNADAPTER_INT32 tensor of shape [1], specifies the + * activation to the + * result, must be one of NNAdapterFuseCode values. + * + * Outputs: + * * 0: output, a tensor of the compatible shape and type as `input0` and + * `input1`. + * + * Available since version 1. + */ + NNADAPTER_FLOOR_DIV, + /** * Add a fully connected layer. * The output is calculated using this formula: @@ -1759,6 +1796,21 @@ typedef enum { */ NNADAPTER_SIGMOID, + /** + * Performs element-wise sine calculation. + * The output is calculated using this formula: `output` = sin(`input`) + * + * Inputs: + * * 0: input, a NNADAPTER_FLOAT32, + * NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor. + * + * Outputs: + * * 0: output, a tensor of the same shape and type as `input`. + * + * Available since version 1. + */ + NNADAPTER_SIN, + /** * Produces a slice of `input` along multiple axes. Similar to numpy: * https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html. diff --git a/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/elementwise.h b/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/elementwise.h index 46099f72e7c..109c7b9f029 100644 --- a/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/elementwise.h +++ b/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/elementwise.h @@ -58,6 +58,32 @@ static int elementwise(ElementwiseTypeCode eltwise_type, for (int64_t i = 0; i < output_count; i++) { output_data[i] = broadcasted_input0_data[i] * broadcasted_input1_data[i]; } + } else if (eltwise_type == FLOOR_DIV) { + for (int64_t i = 0; i < output_count; i++) { + output_data[i] = static_cast( + ::trunc(broadcasted_input0_data[i] / broadcasted_input1_data[i])); + } + } else if (eltwise_type == DIV) { + for (int64_t i = 0; i < output_count; i++) { + output_data[i] = broadcasted_input0_data[i] / broadcasted_input1_data[i]; + } + } else if (eltwise_type == MAX) { + for (int64_t i = 0; i < output_count; i++) { + output_data[i] = broadcasted_input0_data[i] > broadcasted_input1_data[i] + ? broadcasted_input0_data[i] + : broadcasted_input1_data[i]; + } + } else if (eltwise_type == MIN) { + for (int64_t i = 0; i < output_count; i++) { + output_data[i] = broadcasted_input0_data[i] > broadcasted_input1_data[i] + ? broadcasted_input1_data[i] + : broadcasted_input0_data[i]; + } + } else if (eltwise_type == POW) { + for (int64_t i = 0; i < output_count; i++) { + output_data[i] = + ::pow(broadcasted_input0_data[i], broadcasted_input1_data[i]); + } } else { return -1; } diff --git a/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/utility.h b/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/utility.h index 056c3675a4f..4bb041f8fbf 100644 --- a/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/utility.h +++ b/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/utility.h @@ -47,7 +47,8 @@ typedef enum { DIV = 4, MAX = 5, MIN = 6, - POW = 7 + POW = 7, + FLOOR_DIV = 8 } ElementwiseTypeCode; // Fused activation function types diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/all.h b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/all.h index 587b330452e..fdafb65cd91 100644 --- a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/all.h +++ b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/all.h @@ -31,6 +31,7 @@ REGISTER_CONVERTER(CLIP, ConvertClip) REGISTER_CONVERTER(CONCAT, ConvertConcat) REGISTER_CONVERTER(CONV_2D, ConvertConv2D) REGISTER_CONVERTER(CONV_2D_TRANSPOSE, ConvertConv2DTranspose) +REGISTER_CONVERTER(COS, ConvertUnaryActivations) REGISTER_CONVERTER(CUM_SUM, ConvertCumSum) REGISTER_CONVERTER(DEFORMABLE_CONV_2D, ConvertDeformableConv2d) REGISTER_CONVERTER(DEQUANTIZE, ConvertDequantize) @@ -42,6 +43,7 @@ REGISTER_CONVERTER(FILL, ConvertFill) REGISTER_CONVERTER(FILL_LIKE, ConvertFillLike) REGISTER_CONVERTER(FLATTEN, ConvertFlatten) REGISTER_CONVERTER(FLOOR, ConvertUnaryActivations) +REGISTER_CONVERTER(FLOOR_DIV, ConvertElementwise) REGISTER_CONVERTER(FULLY_CONNECTED, ConvertFullyConnected) REGISTER_CONVERTER(GATHER, ConvertGather) REGISTER_CONVERTER(GELU, ConvertGelu) @@ -82,6 +84,7 @@ REGISTER_CONVERTER(RESIZE_NEAREST, ConvertResizeNearest) REGISTER_CONVERTER(ROI_ALIGN, ConvertRoiAlign) REGISTER_CONVERTER(SHAPE, ConvertShape) REGISTER_CONVERTER(SIGMOID, ConvertUnaryActivations) +REGISTER_CONVERTER(SIN, ConvertUnaryActivations) REGISTER_CONVERTER(SLICE, ConvertSlice) REGISTER_CONVERTER(SOFTMAX, ConvertSoftmax) REGISTER_CONVERTER(SOFTPLUS, ConvertSoftplus) diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/elementwise.cc b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/elementwise.cc index d9df425174c..5d52d594586 100644 --- a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/elementwise.cc +++ b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/elementwise.cc @@ -49,6 +49,7 @@ int ConvertElementwise(Converter* converter, core::Operation* operation) { CONVERT_ELEMENTWISE(MAX, Maximum); CONVERT_ELEMENTWISE(MIN, Minimum); CONVERT_ELEMENTWISE(POW, Pow); + CONVERT_ELEMENTWISE(FLOOR_DIV, FloorDiv); #undef CONVERT_ELEMENTWISE default: NNADAPTER_LOG(FATAL) << "Unsupported element-wise operation type " diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/expand.cc b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/expand.cc index 8432349069b..3c2bf7eab14 100644 --- a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/expand.cc +++ b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/expand.cc @@ -63,10 +63,10 @@ int ConvertExpand(Converter* converter, core::Operation* operation) { shape_data); shape_operator = converter->AddInt32ConstantOperator(expand_shape); } else { - NNADAPTER_LOG(FATAL) << "Unsupported shape lifetime: " - << OperandLifetimeCodeToString( - shape_operand->type.lifetime); - return NNADAPTER_INVALID_PARAMETER; + shape_operator = converter->GetMappedOperator(shape_operand); + if (!shape_operator) { + shape_operator = converter->ConvertOperand(shape_operand); + } } auto expand_op = converter->AddOperator(output_operand); SET_INPUT(expand_op, x, input_operator); diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/mat_mul.cc b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/mat_mul.cc index 34d1f5a3af6..bf6f8ccf553 100644 --- a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/mat_mul.cc +++ b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/mat_mul.cc @@ -24,7 +24,6 @@ int ConvertMatMul(Converter* converter, core::Operation* operation) { MAT_MUL_OPERATION_EXTRACT_INPUTS_OUTPUTS // TODO(zhupengyang): support by reshape or squeeze NNADAPTER_CHECK_NE(x_operand->type.dimensions.count, 1); - NNADAPTER_CHECK_NE(y_operand->type.dimensions.count, 1); // Convert to GE operators auto x_operator = converter->GetMappedOperator(x_operand); diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/reshape.cc b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/reshape.cc index 9779cad5c68..8ff79b85c89 100644 --- a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/reshape.cc +++ b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/reshape.cc @@ -73,10 +73,17 @@ int ConvertReshape(Converter* converter, core::Operation* operation) { } std::shared_ptr shape_operator = nullptr; if (IsTemporaryShapeOperand(shape_operand)) { - auto& temporary_shape = *(GetTemporaryShape(shape_operand)); - auto shape_count = temporary_shape.count; - auto shape = GetShape(input_operand, output_operand, shape_count); - shape_operator = converter->AddInt32ConstantOperator(shape); + if (IsOperandWithDynamicShape(shape_operand)) { + shape_operator = converter->GetMappedOperator(shape_operand); + if (!shape_operator) { + shape_operator = converter->ConvertOperand(shape_operand); + } + } else { + auto& temporary_shape = *(GetTemporaryShape(shape_operand)); + auto shape_count = temporary_shape.count; + auto shape = GetShape(input_operand, output_operand, shape_count); + shape_operator = converter->AddInt32ConstantOperator(shape); + } } else if (IsConstantOperand(shape_operand)) { auto shape_count = shape_operand->length / sizeof(int32_t); auto shape = GetShape(input_operand, output_operand, shape_count); diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/unary_activations.cc b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/unary_activations.cc index 64c034fc33b..643949c7612 100644 --- a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/unary_activations.cc +++ b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/unary_activations.cc @@ -44,6 +44,8 @@ int ConvertUnaryActivations(Converter* converter, core::Operation* operation) { CONVERT_UNARY_ACTIVATION(EXP, Exp); CONVERT_UNARY_ACTIVATION(FLOOR, Floor); CONVERT_UNARY_ACTIVATION(SQUARE, Square); + CONVERT_UNARY_ACTIVATION(SIN, Sin); + CONVERT_UNARY_ACTIVATION(COS, Cos); #undef CONVERT_UNARY_ACTIVATION default: NNADAPTER_LOG(FATAL) << "Unsupported activation operation type " diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/model_client.cc b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/model_client.cc index e8b563fe95d..d34d28b119b 100644 --- a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/model_client.cc +++ b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/model_client.cc @@ -14,6 +14,9 @@ #include "driver/huawei_ascend_npu/model_client.h" +#if defined(LITE_WITH_PYTHON) +#include +#endif #include #include #include @@ -286,6 +289,9 @@ bool AclModelClient::Process(uint32_t input_count, std::vector* output_types, core::Argument* output_arguments, DynamicShapeMode dynamic_shape_mode) { +#if defined(LITE_WITH_PYTHON) + pybind11::gil_scoped_release no_gil; +#endif if (!model_desc_) { NNADAPTER_LOG(FATAL) << "No ACL model is loaded."; return false; diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/utility.cc b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/utility.cc index a6ea9b67594..7244136396c 100644 --- a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/utility.cc +++ b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/utility.cc @@ -13,6 +13,9 @@ // limitations under the License. #include "driver/huawei_ascend_npu/utility.h" +#if defined(LITE_WITH_PYTHON) +#include +#endif #include #include // NOLINT #include @@ -237,6 +240,9 @@ bool BuildOMModelToBuffer( const std::string& optional_shape_str, const DynamicShapeMode dynamic_shape_mode, AscendConfigParams* config_params) { +#if defined(LITE_WITH_PYTHON) + pybind11::gil_scoped_release no_gil; +#endif // Should initialize the GE graph builder before model building InitializeGraphBuilder(config_params); // Convert the CANN IR graph to the CANN om model diff --git a/lite/backends/nnadapter/nnadapter/src/operation/all.h b/lite/backends/nnadapter/nnadapter/src/operation/all.h index c3f4ae335e2..faa1d788992 100644 --- a/lite/backends/nnadapter/nnadapter/src/operation/all.h +++ b/lite/backends/nnadapter/nnadapter/src/operation/all.h @@ -65,6 +65,10 @@ REGISTER_OPERATION(CONV_2D_TRANSPOSE, PrepareConv2DTranspose, ExecuteConv2DTranspose) REGISTER_OPERATION(CUM_SUM, ValidateCumSum, PrepareCumSum, ExecuteCumSum) +REGISTER_OPERATION(COS, + ValidateUnaryActivations, + PrepareUnaryActivations, + ExecuteUnaryActivations) REGISTER_OPERATION(DEFORMABLE_CONV_2D, ValidateDeformableConv2D, PrepareDeformableConv2D, @@ -95,6 +99,10 @@ REGISTER_OPERATION(FLOOR, ValidateUnaryActivations, PrepareUnaryActivations, ExecuteUnaryActivations) +REGISTER_OPERATION(FLOOR_DIV, + ValidateElementwise, + PrepareElementwise, + ExecuteElementwise) REGISTER_OPERATION(FULLY_CONNECTED, ValidateFullyConnected, PrepareFullyConnected, @@ -216,6 +224,10 @@ REGISTER_OPERATION(SIGMOID, ValidateUnaryActivations, PrepareUnaryActivations, ExecuteUnaryActivations) +REGISTER_OPERATION(SIN, + ValidateUnaryActivations, + PrepareUnaryActivations, + ExecuteUnaryActivations) REGISTER_OPERATION(SLICE, ValidateSlice, PrepareSlice, ExecuteSlice) REGISTER_OPERATION(SOFTMAX, ValidateSoftmax, PrepareSoftmax, ExecuteSoftmax) REGISTER_OPERATION(SOFTPLUS, ValidateSoftplus, PrepareSoftplus, ExecuteSoftplus) diff --git a/lite/backends/nnadapter/nnadapter/src/operation/cast.cc b/lite/backends/nnadapter/nnadapter/src/operation/cast.cc index bce9035b122..21c06eee836 100644 --- a/lite/backends/nnadapter/nnadapter/src/operation/cast.cc +++ b/lite/backends/nnadapter/nnadapter/src/operation/cast.cc @@ -24,24 +24,93 @@ namespace nnadapter { namespace operation { +template +OutType TransOp(InType in) { + return static_cast(in); +} + NNADAPTER_EXPORT bool ValidateCast(const core::Operation* operation) { return false; } +NNADAPTER_EXPORT int ExecuteCast(core::Operation* operation) { + CAST_OPERATION_EXTRACT_INPUTS_OUTPUTS + + // Allocate and calculate the output operands + auto output_buffer = AllocateOperand(output_operand); + auto size = ProductionOfDimensions(input_operand->type.dimensions.data, + input_operand->type.dimensions.count); + auto input_precision = input_operand->type.precision; + if (input_precision == NNADAPTER_INT32 && dtype == NNADAPTER_INT32) { + auto input_data = reinterpret_cast(input_operand->buffer); + auto output_data = reinterpret_cast(output_buffer); + memcpy(output_data, input_data, sizeof(int32_t) * size); + } else if (input_precision == NNADAPTER_INT64 && dtype == NNADAPTER_INT64) { + auto input_data = reinterpret_cast(input_operand->buffer); + auto output_data = reinterpret_cast(output_buffer); + memcpy(output_data, input_data, sizeof(int64_t) * size); + } else if (input_precision == NNADAPTER_FLOAT32 && + dtype == NNADAPTER_FLOAT32) { + auto input_data = reinterpret_cast(input_operand->buffer); + auto output_data = reinterpret_cast(output_buffer); + memcpy(output_data, input_data, sizeof(float) * size); + } else if (input_precision == NNADAPTER_INT32 && dtype == NNADAPTER_INT64) { + auto input_data = reinterpret_cast(input_operand->buffer); + auto output_data = reinterpret_cast(output_buffer); + std::transform( + input_data, input_data + size, output_data, TransOp); + } else if (input_precision == NNADAPTER_INT64 && dtype == NNADAPTER_INT32) { + auto input_data = reinterpret_cast(input_operand->buffer); + auto output_data = reinterpret_cast(output_buffer); + std::transform( + input_data, input_data + size, output_data, TransOp); + } else if (input_precision == NNADAPTER_FLOAT32 && dtype == NNADAPTER_INT64) { + auto input_data = reinterpret_cast(input_operand->buffer); + auto output_data = reinterpret_cast(output_buffer); + std::transform( + input_data, input_data + size, output_data, TransOp); + } else if (input_precision == NNADAPTER_INT64 && dtype == NNADAPTER_FLOAT32) { + auto input_data = reinterpret_cast(input_operand->buffer); + auto output_data = reinterpret_cast(output_buffer); + std::transform( + input_data, input_data + size, output_data, TransOp); + } else if (input_precision == NNADAPTER_INT32 && dtype == NNADAPTER_FLOAT32) { + auto input_data = reinterpret_cast(input_operand->buffer); + auto output_data = reinterpret_cast(output_buffer); + std::transform( + input_data, input_data + size, output_data, TransOp); + } else if (input_precision == NNADAPTER_FLOAT32 && dtype == NNADAPTER_INT32) { + auto input_data = reinterpret_cast(input_operand->buffer); + auto output_data = reinterpret_cast(output_buffer); + std::transform( + input_data, input_data + size, output_data, TransOp); + } else { + NNADAPTER_LOG(FATAL) << "Unsupported input precision code(" + << OperandPrecisionCodeToString(input_precision) << ")" + << "and output precision code(" + << OperandPrecisionCodeToString(dtype) << ") for " + << OperationTypeToString(operation->type) + << " is found!"; + } + return NNADAPTER_NO_ERROR; +} + NNADAPTER_EXPORT int PrepareCast(core::Operation* operation) { CAST_OPERATION_EXTRACT_INPUTS_OUTPUTS // Infer the shape and type of output operands CopyOperandTypeExceptQuantParams(&output_operand->type, input_operand->type); output_operand->type.precision = dtype; - SetTemporaryShape(output_operand, input_operand->type.dimensions); + if (IsTemporaryShapeOperand(input_operand)) { + SetTemporaryShape(output_operand, input_operand->type.dimensions); + } else if (IsConstantOperand(input_operand)) { + ExecuteCast(operation); + output_operand->type.lifetime = NNADAPTER_CONSTANT_COPY; + } + NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand); return NNADAPTER_NO_ERROR; } -NNADAPTER_EXPORT int ExecuteCast(core::Operation* operation) { - return NNADAPTER_FEATURE_NOT_SUPPORTED; -} - } // namespace operation } // namespace nnadapter diff --git a/lite/backends/nnadapter/nnadapter/src/operation/elementwise.cc b/lite/backends/nnadapter/nnadapter/src/operation/elementwise.cc index 51468b977fd..7b3e4b1e8b0 100644 --- a/lite/backends/nnadapter/nnadapter/src/operation/elementwise.cc +++ b/lite/backends/nnadapter/nnadapter/src/operation/elementwise.cc @@ -18,6 +18,7 @@ #include "core/types.h" #include "operation/math/elementwise.h" #include "utility/debug.h" +#include "utility/hints.h" #include "utility/logging.h" #include "utility/micros.h" #include "utility/modeling.h" @@ -79,42 +80,17 @@ NNADAPTER_EXPORT void CalcEltwiseBinaryOperationsOutputSize( static std::unordered_map kSupportedElementwise = {{NNADAPTER_ADD, math::ADD}, {NNADAPTER_SUB, math::SUB}, - {NNADAPTER_MUL, math::MUL}}; + {NNADAPTER_MUL, math::MUL}, + {NNADAPTER_DIV, math::DIV}, + {NNADAPTER_MAX, math::MAX}, + {NNADAPTER_MIN, math::MIN}, + {NNADAPTER_POW, math::POW}, + {NNADAPTER_FLOOR_DIV, math::FLOOR_DIV}}; NNADAPTER_EXPORT bool ValidateElementwise(const core::Operation* operation) { return kSupportedElementwise.count(operation->type) > 0; } -NNADAPTER_EXPORT int PrepareElementwise(core::Operation* operation) { - ELEMENTWISE_OPERATION_EXTRACT_INPUTS_OUTPUTS - - // Infer the shape and type of output operands - if (IsConstantOperand(input0_operand) && !IsConstantOperand(input1_operand)) { - input0_operand->type.dimensions.dynamic_count = - input1_operand->type.dimensions.dynamic_count; - for (size_t i = 0; i < input0_operand->type.dimensions.dynamic_count; i++) { - for (size_t j = 0; j < input1_operand->type.dimensions.count; j++) { - input0_operand->type.dimensions.dynamic_data[i][j] = 1; - } - } - } else if (IsConstantOperand(input1_operand) && - !IsConstantOperand(input0_operand)) { - input1_operand->type.dimensions.dynamic_count = - input0_operand->type.dimensions.dynamic_count; - for (size_t i = 0; i < input1_operand->type.dimensions.dynamic_count; i++) { - for (size_t j = 0; j < input0_operand->type.dimensions.count; j++) { - input1_operand->type.dimensions.dynamic_data[i][j] = 1; - } - } - } - - CalcEltwiseBinaryOperationsOutputSize( - input0_operand->type, input1_operand->type, &output_operand->type); - output_operand->type.precision = input0_operand->type.precision; - NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand); - return NNADAPTER_NO_ERROR; -} - NNADAPTER_EXPORT int ExecuteElementwise(core::Operation* operation) { if (!kSupportedElementwise.count(operation->type)) return NNADAPTER_FEATURE_NOT_SUPPORTED; @@ -244,5 +220,154 @@ NNADAPTER_EXPORT int ExecuteElementwise(core::Operation* operation) { return NNADAPTER_NO_ERROR; } +NNADAPTER_EXPORT int PrepareElementwise(core::Operation* operation) { + ELEMENTWISE_OPERATION_EXTRACT_INPUTS_OUTPUTS + + // Infer the shape and type of output operands + if (IsConstantOperand(input0_operand) && !IsConstantOperand(input1_operand)) { + input0_operand->type.dimensions.dynamic_count = + input1_operand->type.dimensions.dynamic_count; + for (size_t i = 0; i < input0_operand->type.dimensions.dynamic_count; i++) { + for (size_t j = 0; j < input1_operand->type.dimensions.count; j++) { + input0_operand->type.dimensions.dynamic_data[i][j] = 1; + } + } + } else if (IsConstantOperand(input1_operand) && + !IsConstantOperand(input0_operand)) { + input1_operand->type.dimensions.dynamic_count = + input0_operand->type.dimensions.dynamic_count; + for (size_t i = 0; i < input1_operand->type.dimensions.dynamic_count; i++) { + for (size_t j = 0; j < input0_operand->type.dimensions.count; j++) { + input1_operand->type.dimensions.dynamic_data[i][j] = 1; + } + } + } + + CalcEltwiseBinaryOperationsOutputSize( + input0_operand->type, input1_operand->type, &output_operand->type); + output_operand->type.precision = input0_operand->type.precision; + auto eltwise_type = kSupportedElementwise[operation->type]; + if (IsConstantOperand(input0_operand) && IsConstantOperand(input1_operand)) { + ExecuteElementwise(operation); + output_operand->type.lifetime = NNADAPTER_CONSTANT_COPY; + } else if (IsTemporaryShapeOperand(input0_operand) && + IsTemporaryShapeOperand(input1_operand)) { + auto& temporary_shape0 = *(GetTemporaryShape(input0_operand)); + auto& temporary_shape1 = *(GetTemporaryShape(input1_operand)); + NNADAPTER_CHECK(temporary_shape0.data); + NNADAPTER_CHECK(temporary_shape0.data[0]); + NNADAPTER_CHECK(temporary_shape1.data); + NNADAPTER_CHECK(temporary_shape1.data[0]); + NNAdapterOperandDimensionType dimension_type; + dimension_type.count = output_operand->type.dimensions.data[0]; + dimension_type.dynamic_count = + input0_operand->type.dimensions.dynamic_count; + int status = -1; + status = math::elementwise( + eltwise_type, + temporary_shape0.data, + std::vector({static_cast(temporary_shape0.count)}), + temporary_shape1.data, + std::vector({static_cast(temporary_shape1.count)}), + static_cast(fuse_code), + dimension_type.data); + NNADAPTER_CHECK_EQ(status, 0); + for (uint32_t i = 0; i < dimension_type.dynamic_count; i++) { + status = math::elementwise( + eltwise_type, + temporary_shape0.dynamic_data[i], + std::vector({static_cast(temporary_shape0.count)}), + temporary_shape1.dynamic_data[i], + std::vector({static_cast(temporary_shape1.count)}), + static_cast(fuse_code), + dimension_type.dynamic_data[i]); + NNADAPTER_CHECK_EQ(status, 0); + } + output_operand->type.lifetime = NNADAPTER_TEMPORARY_SHAPE; + SetTemporaryShape(output_operand, dimension_type); + } else if (IsTemporaryShapeOperand(input0_operand) && + IsConstantOperand(input1_operand)) { + auto& temporary_shape = *(GetTemporaryShape(input0_operand)); + NNADAPTER_CHECK(temporary_shape.data); + NNADAPTER_CHECK(temporary_shape.data[0]); + auto& input1_type = input1_operand->type; + auto input1_shape = std::vector( + input1_type.dimensions.data, + input1_type.dimensions.data + input1_type.dimensions.count); + const auto input1_buffer = input1_operand->buffer; + NNADAPTER_CHECK(input1_buffer); + const auto input1_data = reinterpret_cast(input1_buffer); + NNAdapterOperandDimensionType dimension_type; + dimension_type.count = output_operand->type.dimensions.data[0]; + dimension_type.dynamic_count = + input0_operand->type.dimensions.dynamic_count; + int status = -1; + status = math::elementwise( + eltwise_type, + temporary_shape.data, + std::vector({static_cast(temporary_shape.count)}), + input1_data, + input1_shape, + static_cast(fuse_code), + dimension_type.data); + NNADAPTER_CHECK_EQ(status, 0); + for (uint32_t i = 0; i < dimension_type.dynamic_count; i++) { + status = math::elementwise( + eltwise_type, + temporary_shape.dynamic_data[i], + std::vector({static_cast(temporary_shape.count)}), + input1_data, + input1_shape, + static_cast(fuse_code), + dimension_type.dynamic_data[i]); + NNADAPTER_CHECK_EQ(status, 0); + } + output_operand->type.lifetime = NNADAPTER_TEMPORARY_SHAPE; + SetTemporaryShape(output_operand, dimension_type); + } else if (IsTemporaryShapeOperand(input1_operand) && + IsConstantOperand(input0_operand)) { + auto& temporary_shape = *(GetTemporaryShape(input0_operand)); + NNADAPTER_CHECK(temporary_shape.data); + NNADAPTER_CHECK(temporary_shape.data[0]); + auto& input0_type = input0_operand->type; + auto input0_shape = std::vector( + input0_type.dimensions.data, + input0_type.dimensions.data + input0_type.dimensions.count); + const auto input0_buffer = input0_operand->buffer; + NNADAPTER_CHECK(input0_buffer); + const auto input0_data = reinterpret_cast(input0_buffer); + NNAdapterOperandDimensionType dimension_type; + dimension_type.count = output_operand->type.dimensions.data[0]; + dimension_type.dynamic_count = + input0_operand->type.dimensions.dynamic_count; + int status = -1; + status = math::elementwise( + eltwise_type, + input0_data, + input0_shape, + temporary_shape.data, + std::vector({static_cast(temporary_shape.count)}), + static_cast(fuse_code), + dimension_type.data); + NNADAPTER_CHECK_EQ(status, 0); + for (uint32_t i = 0; i < dimension_type.dynamic_count; i++) { + status = math::elementwise( + eltwise_type, + input0_data, + input0_shape, + temporary_shape.dynamic_data[i], + std::vector({static_cast(temporary_shape.count)}), + static_cast(fuse_code), + dimension_type.dynamic_data[i]); + NNADAPTER_CHECK_EQ(status, 0); + } + output_operand->type.lifetime = NNADAPTER_TEMPORARY_SHAPE; + SetTemporaryShape(output_operand, dimension_type); + } + + NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand); + return NNADAPTER_NO_ERROR; +} + } // namespace operation } // namespace nnadapter diff --git a/lite/backends/nnadapter/nnadapter/src/operation/slice.cc b/lite/backends/nnadapter/nnadapter/src/operation/slice.cc index c0770b03143..98bc7449c23 100644 --- a/lite/backends/nnadapter/nnadapter/src/operation/slice.cc +++ b/lite/backends/nnadapter/nnadapter/src/operation/slice.cc @@ -63,6 +63,16 @@ NNADAPTER_EXPORT int ExecuteSlice(core::Operation* operation) { steps, reinterpret_cast(output_buffer)); break; + case NNADAPTER_INT64: + status = math::slice(reinterpret_cast(input_operand->buffer), + input_shape, + axes_count, + axes, + starts, + ends, + steps, + reinterpret_cast(output_buffer)); + break; default: NNADAPTER_LOG(FATAL) << "Unsupported precision code(" << OperandPrecisionCodeToString(input_precision) diff --git a/lite/backends/nnadapter/nnadapter/src/operation/split.cc b/lite/backends/nnadapter/nnadapter/src/operation/split.cc index 94bc1b8c542..b3060d2ce96 100644 --- a/lite/backends/nnadapter/nnadapter/src/operation/split.cc +++ b/lite/backends/nnadapter/nnadapter/src/operation/split.cc @@ -39,10 +39,9 @@ NNADAPTER_EXPORT int PrepareSplit(core::Operation* operation) { for (size_t i = 0; i < output_count; i++) { CopyOperandTypeExceptQuantParams(&output_operands[i]->type, input_operand->type); - auto& out_dimensions = output_operands[i]->type.dimensions; out_dimensions.data[axis] = split[i]; - for (uint32_t j = 0; j < out_dimensions.dynamic_count; i++) { + for (uint32_t j = 0; j < out_dimensions.dynamic_count; j++) { out_dimensions.dynamic_data[j][axis] = split[i]; } diff --git a/lite/backends/nnadapter/nnadapter/src/utility/debug.cc b/lite/backends/nnadapter/nnadapter/src/utility/debug.cc index e00120c9c10..16bc4fb4dc3 100644 --- a/lite/backends/nnadapter/nnadapter/src/utility/debug.cc +++ b/lite/backends/nnadapter/nnadapter/src/utility/debug.cc @@ -198,6 +198,7 @@ NNADAPTER_EXPORT std::string Visualize(core::Model* model) { case NNADAPTER_MUL: case NNADAPTER_POW: case NNADAPTER_SUB: + case NNADAPTER_FLOOR_DIV: input_args = {"input0", "input1", "fuse_code"}; output_args = {"output"}; break; @@ -303,6 +304,8 @@ NNADAPTER_EXPORT std::string Visualize(core::Model* model) { case NNADAPTER_SQUARE: case NNADAPTER_SWISH: case NNADAPTER_TANH: + case NNADAPTER_SIN: + case NNADAPTER_COS: input_args = {"input"}; output_args = {"output"}; break; @@ -696,6 +699,7 @@ NNADAPTER_EXPORT std::string OperationTypeToString( NNADAPTER_TYPE_TO_STRING(CONCAT); NNADAPTER_TYPE_TO_STRING(CONV_2D); NNADAPTER_TYPE_TO_STRING(CONV_2D_TRANSPOSE); + NNADAPTER_TYPE_TO_STRING(COS); NNADAPTER_TYPE_TO_STRING(CUM_SUM); NNADAPTER_TYPE_TO_STRING(DEFORMABLE_CONV_2D); NNADAPTER_TYPE_TO_STRING(DEQUANTIZE); @@ -707,6 +711,7 @@ NNADAPTER_EXPORT std::string OperationTypeToString( NNADAPTER_TYPE_TO_STRING(FILL_LIKE); NNADAPTER_TYPE_TO_STRING(FLATTEN); NNADAPTER_TYPE_TO_STRING(FLOOR); + NNADAPTER_TYPE_TO_STRING(FLOOR_DIV); NNADAPTER_TYPE_TO_STRING(FULLY_CONNECTED); NNADAPTER_TYPE_TO_STRING(GATHER); NNADAPTER_TYPE_TO_STRING(GELU); @@ -750,6 +755,7 @@ NNADAPTER_EXPORT std::string OperationTypeToString( NNADAPTER_TYPE_TO_STRING(ROI_ALIGN); NNADAPTER_TYPE_TO_STRING(SHAPE); NNADAPTER_TYPE_TO_STRING(SIGMOID); + NNADAPTER_TYPE_TO_STRING(SIN); NNADAPTER_TYPE_TO_STRING(SLICE); NNADAPTER_TYPE_TO_STRING(STACK); NNADAPTER_TYPE_TO_STRING(SOFTMAX); diff --git a/lite/kernels/nnadapter/converter/all.h b/lite/kernels/nnadapter/converter/all.h index e95c52b162e..315c197ab74 100644 --- a/lite/kernels/nnadapter/converter/all.h +++ b/lite/kernels/nnadapter/converter/all.h @@ -507,6 +507,12 @@ REGISTER_CONVERTER(roi_align, ConvertRoiAlign, "cambricon_mlu,kunlunxin_xtcl"); REGISTER_CONVERTER(multiclass_nms3, ConvertMulticlassNms, "cambricon_mlu,intel_openvino"); +REGISTER_CONVERTER(sin, ConvertUnaryActivations, "huawei_ascend_npu"); +REGISTER_CONVERTER(cos, ConvertUnaryActivations, "huawei_ascend_npu"); +REGISTER_CONVERTER(silu, ConvertUnaryActivations, "huawei_ascend_npu"); +REGISTER_CONVERTER(elementwise_floordiv, + ConvertElementwise, + "huawei_ascend_npu"); // TODO(shentanyue): open later // REGISTER_CONVERTER(roi_align, ConvertRoiAlign, "huawei_ascend_npu"); // REGISTER_CONVERTER(grid_sample, ConvertGridSample, "huawei_ascend_npu"); diff --git a/lite/kernels/nnadapter/converter/elementwise.cc b/lite/kernels/nnadapter/converter/elementwise.cc index 70500c8466b..ec542df9a68 100644 --- a/lite/kernels/nnadapter/converter/elementwise.cc +++ b/lite/kernels/nnadapter/converter/elementwise.cc @@ -139,6 +139,9 @@ int ConvertElementwise(Converter* converter, OpInfo* op, Scope* scope) { } else if (op_type == "elementwise_pow" || op_type == "fusion_elementwise_pow_activation") { eltwise_operation_type = NNADAPTER_POW; + } else if (op_type == "elementwise_floordiv" || + op_type == "fusion_elementwise_floordiv_activation") { + eltwise_operation_type = NNADAPTER_FLOOR_DIV; } else { LOG(WARNING) << "Unsupported elementwise op type: " << op_type; return UNSUPPORTED_FEATURE; diff --git a/lite/kernels/nnadapter/utility.cc b/lite/kernels/nnadapter/utility.cc index b57111bc9c2..cfba7123cb7 100644 --- a/lite/kernels/nnadapter/utility.cc +++ b/lite/kernels/nnadapter/utility.cc @@ -544,6 +544,12 @@ NNAdapterOperationType ConvertUnaryActTypeToNNOperationType( unary_act_op_code = NNADAPTER_FLOOR; } else if (unary_act_op_type == "square") { unary_act_op_code = NNADAPTER_SQUARE; + } else if (unary_act_op_type == "sin") { + unary_act_op_code = NNADAPTER_SIN; + } else if (unary_act_op_type == "cos") { + unary_act_op_code = NNADAPTER_COS; + } else if (unary_act_op_type == "silu") { + unary_act_op_code = NNADAPTER_SWISH; } else { LOG(WARNING) << "Unable to convert a unary activation type(" << unary_act_op_type << ") to a NNAdapter operation type!";