Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HuaweiAscendNPU] Add cos/sin/silu/floordiv ops, and fix python-gil conflict for Ascend #9768

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions docs/develop_guides/nnadapter.md
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,14 @@ typedef struct Device {

W_out = (W_in - 1) * stride_width - padding_width_left - padding_width_right + (dilation_width * (filter_width - 1) + 1)) + 1 + output_padding_width

- NNADAPTER_COS

逐元素取余弦值: `output` = cos(`input`) 。
- 输入:
- 0 : input ,输入操作数,类型: NNADAPTER_FLOAT32 、 NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。
- 输出:
- 0 : output ,输出操作数,与输入操作数 `input` 的形状和类型相同。

- NNADAPTER_CUM_SUM

沿给定 `axis` 轴计算累加和。
Expand Down Expand Up @@ -1222,6 +1230,16 @@ typedef struct Device {
- 输出:
- 0 : output ,输出操作数,与输入操作数 `input` 的形状和类型相同。

- NNADAPTER_FLOOR_DIV

逐元素相除并向下取整: `output` = floor (`input0` / `input1`) ,广播规则与 Numpy https://numpy.org/doc/stable/user/basics.broadcasting.html 相同。
- 输入:
- 0 : input0 ,输入操作数 0 ,类型: NNADAPTER_FLOAT32 、NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。
- 1 : input1 ,输入操作数 1 ,类型与输入操作数 `input0` 相同。
- 2 : fuse_code ,融合的激活函数类型,形状: [1] ,类型: NNADAPTER_INT32 ,取值: NNAdapterFuseCode 类型的任意值, NNADAPTER_FUSED_NONE 、 NNADAPTER_FUSED_RELU 、 NNADAPTER_FUSED_RELU1 、 NNADAPTER_FUSED_RELU6 。
- 输出:
- 0 : output ,输出操作数,形状:由输入操作数 `input0` 和 `input1` 广播后的形状决定,类型与输入操作数 `input0` 和 `input1` 相同。

- NNADAPTER_FULLY_CONNECTED

全链接层: `output` = activation(`input` * `weight`' + `bias`) 。
Expand Down Expand Up @@ -1678,6 +1696,14 @@ typedef struct Device {
- 输出:
- 0 : output ,输出操作数,与输入操作数 `input` 的形状和类型相同。

- NNADAPTER_SIN

逐元素取正弦值: `output` = sin(`input`) 。
- 输入:
- 0 : input ,输入操作数,类型: NNADAPTER_FLOAT32 、 NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。
- 输出:
- 0 : output ,输出操作数,与输入操作数 `input` 的形状和类型相同。

- NNADAPTER_SLICE

沿着多个轴生成 `input` 的片段。类似 numpy : https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html ,沿着 `axes` 的每个轴以 `starts` 、 `ends` 、 `step` 为起始、终止、步长获取 `input` 的片段。如果 `starts[i]` 、 `ends[i]` 为负数,则需要加上输入 `input` 对应轴 `axes[i]` 的维度 `dims[axes[i]]` 。如果 `starts[i]` 或 `ends[i]` 的值大于 `dims[axes[i]]` ,将被截断到 `dims[axes[i]] - 1` 。如果 `dims[axes[i]]` 维度未知,建议将 `ends[i]` 设置为 `INT_MAX` ,反向则设置为 `INT_MIN` 。
Expand Down
52 changes: 52 additions & 0 deletions lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,21 @@ typedef enum {
*/
NNADAPTER_CONV_2D_TRANSPOSE,

/**
* Performs element-wise cosine calculation.
* The output is calculated using this formula: `output` = cos(`input`)
*
* Inputs:
* * 0: input, a NNADAPTER_FLOAT32,
* NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
*
* Outputs:
* * 0: output, a tensor of the same shape and type as `input`.
*
* Available since version 1.
*/
NNADAPTER_COS,

/**
* Performs cumulative sum of the input elements along the given `axis`.
*
Expand Down Expand Up @@ -784,6 +799,28 @@ typedef enum {
*/
NNADAPTER_FLOOR,

/**
* Performs element-wise binary floor divide(with Numpy-style broadcasting
* https://numpy.org/doc/stable/user/basics.broadcasting.html).
* The output is calculated using this formula:
* `output` = floor (`input0` / `input1`)
* Inputs:
* * 0: input0, a NNADAPTER_FLOAT32, NNADAPTER_QUANT_INT8_SYMM_PER_LAYER
* tensor.
* * 1: input1, a tensor of the compatible shape and the same type as
* `input0`.
* * 2: fuse_code, a NNADAPTER_INT32 tensor of shape [1], specifies the
* activation to the
* result, must be one of NNAdapterFuseCode values.
*
* Outputs:
* * 0: output, a tensor of the compatible shape and type as `input0` and
* `input1`.
*
* Available since version 1.
*/
NNADAPTER_FLOOR_DIV,

/**
* Add a fully connected layer.
* The output is calculated using this formula:
Expand Down Expand Up @@ -1759,6 +1796,21 @@ typedef enum {
*/
NNADAPTER_SIGMOID,

/**
* Performs element-wise sine calculation.
* The output is calculated using this formula: `output` = sin(`input`)
*
* Inputs:
* * 0: input, a NNADAPTER_FLOAT32,
* NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
*
* Outputs:
* * 0: output, a tensor of the same shape and type as `input`.
*
* Available since version 1.
*/
NNADAPTER_SIN,

/**
* Produces a slice of `input` along multiple axes. Similar to numpy:
* https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,32 @@ static int elementwise(ElementwiseTypeCode eltwise_type,
for (int64_t i = 0; i < output_count; i++) {
output_data[i] = broadcasted_input0_data[i] * broadcasted_input1_data[i];
}
} else if (eltwise_type == FLOOR_DIV) {
for (int64_t i = 0; i < output_count; i++) {
output_data[i] = static_cast<T>(
::trunc(broadcasted_input0_data[i] / broadcasted_input1_data[i]));
}
} else if (eltwise_type == DIV) {
for (int64_t i = 0; i < output_count; i++) {
output_data[i] = broadcasted_input0_data[i] / broadcasted_input1_data[i];
}
} else if (eltwise_type == MAX) {
for (int64_t i = 0; i < output_count; i++) {
output_data[i] = broadcasted_input0_data[i] > broadcasted_input1_data[i]
? broadcasted_input0_data[i]
: broadcasted_input1_data[i];
}
} else if (eltwise_type == MIN) {
for (int64_t i = 0; i < output_count; i++) {
output_data[i] = broadcasted_input0_data[i] > broadcasted_input1_data[i]
? broadcasted_input1_data[i]
: broadcasted_input0_data[i];
}
} else if (eltwise_type == POW) {
for (int64_t i = 0; i < output_count; i++) {
output_data[i] =
::pow(broadcasted_input0_data[i], broadcasted_input1_data[i]);
}
} else {
return -1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ typedef enum {
DIV = 4,
MAX = 5,
MIN = 6,
POW = 7
POW = 7,
FLOOR_DIV = 8
} ElementwiseTypeCode;

// Fused activation function types
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ REGISTER_CONVERTER(CLIP, ConvertClip)
REGISTER_CONVERTER(CONCAT, ConvertConcat)
REGISTER_CONVERTER(CONV_2D, ConvertConv2D)
REGISTER_CONVERTER(CONV_2D_TRANSPOSE, ConvertConv2DTranspose)
REGISTER_CONVERTER(COS, ConvertUnaryActivations)
REGISTER_CONVERTER(CUM_SUM, ConvertCumSum)
REGISTER_CONVERTER(DEFORMABLE_CONV_2D, ConvertDeformableConv2d)
REGISTER_CONVERTER(DEQUANTIZE, ConvertDequantize)
Expand All @@ -42,6 +43,7 @@ REGISTER_CONVERTER(FILL, ConvertFill)
REGISTER_CONVERTER(FILL_LIKE, ConvertFillLike)
REGISTER_CONVERTER(FLATTEN, ConvertFlatten)
REGISTER_CONVERTER(FLOOR, ConvertUnaryActivations)
REGISTER_CONVERTER(FLOOR_DIV, ConvertElementwise)
REGISTER_CONVERTER(FULLY_CONNECTED, ConvertFullyConnected)
REGISTER_CONVERTER(GATHER, ConvertGather)
REGISTER_CONVERTER(GELU, ConvertGelu)
Expand Down Expand Up @@ -82,6 +84,7 @@ REGISTER_CONVERTER(RESIZE_NEAREST, ConvertResizeNearest)
REGISTER_CONVERTER(ROI_ALIGN, ConvertRoiAlign)
REGISTER_CONVERTER(SHAPE, ConvertShape)
REGISTER_CONVERTER(SIGMOID, ConvertUnaryActivations)
REGISTER_CONVERTER(SIN, ConvertUnaryActivations)
REGISTER_CONVERTER(SLICE, ConvertSlice)
REGISTER_CONVERTER(SOFTMAX, ConvertSoftmax)
REGISTER_CONVERTER(SOFTPLUS, ConvertSoftplus)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ int ConvertElementwise(Converter* converter, core::Operation* operation) {
CONVERT_ELEMENTWISE(MAX, Maximum);
CONVERT_ELEMENTWISE(MIN, Minimum);
CONVERT_ELEMENTWISE(POW, Pow);
CONVERT_ELEMENTWISE(FLOOR_DIV, FloorDiv);
#undef CONVERT_ELEMENTWISE
default:
NNADAPTER_LOG(FATAL) << "Unsupported element-wise operation type "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ int ConvertExpand(Converter* converter, core::Operation* operation) {
shape_data);
shape_operator = converter->AddInt32ConstantOperator(expand_shape);
} else {
NNADAPTER_LOG(FATAL) << "Unsupported shape lifetime: "
<< OperandLifetimeCodeToString(
shape_operand->type.lifetime);
return NNADAPTER_INVALID_PARAMETER;
shape_operator = converter->GetMappedOperator(shape_operand);
if (!shape_operator) {
shape_operator = converter->ConvertOperand(shape_operand);
}
}
auto expand_op = converter->AddOperator<ge::op::Expand>(output_operand);
SET_INPUT(expand_op, x, input_operator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ int ConvertMatMul(Converter* converter, core::Operation* operation) {
MAT_MUL_OPERATION_EXTRACT_INPUTS_OUTPUTS
// TODO(zhupengyang): support by reshape or squeeze
NNADAPTER_CHECK_NE(x_operand->type.dimensions.count, 1);
NNADAPTER_CHECK_NE(y_operand->type.dimensions.count, 1);

// Convert to GE operators
auto x_operator = converter->GetMappedOperator(x_operand);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,17 @@ int ConvertReshape(Converter* converter, core::Operation* operation) {
}
std::shared_ptr<Operator> shape_operator = nullptr;
if (IsTemporaryShapeOperand(shape_operand)) {
auto& temporary_shape = *(GetTemporaryShape(shape_operand));
auto shape_count = temporary_shape.count;
auto shape = GetShape(input_operand, output_operand, shape_count);
shape_operator = converter->AddInt32ConstantOperator(shape);
if (IsOperandWithDynamicShape(shape_operand)) {
shape_operator = converter->GetMappedOperator(shape_operand);
if (!shape_operator) {
shape_operator = converter->ConvertOperand(shape_operand);
}
} else {
auto& temporary_shape = *(GetTemporaryShape(shape_operand));
auto shape_count = temporary_shape.count;
auto shape = GetShape(input_operand, output_operand, shape_count);
shape_operator = converter->AddInt32ConstantOperator(shape);
}
} else if (IsConstantOperand(shape_operand)) {
auto shape_count = shape_operand->length / sizeof(int32_t);
auto shape = GetShape(input_operand, output_operand, shape_count);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ int ConvertUnaryActivations(Converter* converter, core::Operation* operation) {
CONVERT_UNARY_ACTIVATION(EXP, Exp);
CONVERT_UNARY_ACTIVATION(FLOOR, Floor);
CONVERT_UNARY_ACTIVATION(SQUARE, Square);
CONVERT_UNARY_ACTIVATION(SIN, Sin);
CONVERT_UNARY_ACTIVATION(COS, Cos);
#undef CONVERT_UNARY_ACTIVATION
default:
NNADAPTER_LOG(FATAL) << "Unsupported activation operation type "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

#include "driver/huawei_ascend_npu/model_client.h"

#if defined(LITE_WITH_PYTHON)
#include <pybind11/pybind11.h>
#endif
#include <memory>
#include <sstream>
#include <string>
Expand Down Expand Up @@ -286,6 +289,9 @@ bool AclModelClient::Process(uint32_t input_count,
std::vector<NNAdapterOperandType>* output_types,
core::Argument* output_arguments,
DynamicShapeMode dynamic_shape_mode) {
#if defined(LITE_WITH_PYTHON)
pybind11::gil_scoped_release no_gil;
#endif
if (!model_desc_) {
NNADAPTER_LOG(FATAL) << "No ACL model is loaded.";
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
// limitations under the License.

#include "driver/huawei_ascend_npu/utility.h"
#if defined(LITE_WITH_PYTHON)
#include <pybind11/pybind11.h>
#endif
#include <map>
#include <mutex> // NOLINT
#include <utility>
Expand Down Expand Up @@ -237,6 +240,9 @@ bool BuildOMModelToBuffer(
const std::string& optional_shape_str,
const DynamicShapeMode dynamic_shape_mode,
AscendConfigParams* config_params) {
#if defined(LITE_WITH_PYTHON)
pybind11::gil_scoped_release no_gil;
#endif
// Should initialize the GE graph builder before model building
InitializeGraphBuilder(config_params);
// Convert the CANN IR graph to the CANN om model
Expand Down
12 changes: 12 additions & 0 deletions lite/backends/nnadapter/nnadapter/src/operation/all.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ REGISTER_OPERATION(CONV_2D_TRANSPOSE,
PrepareConv2DTranspose,
ExecuteConv2DTranspose)
REGISTER_OPERATION(CUM_SUM, ValidateCumSum, PrepareCumSum, ExecuteCumSum)
REGISTER_OPERATION(COS,
ValidateUnaryActivations,
PrepareUnaryActivations,
ExecuteUnaryActivations)
REGISTER_OPERATION(DEFORMABLE_CONV_2D,
ValidateDeformableConv2D,
PrepareDeformableConv2D,
Expand Down Expand Up @@ -95,6 +99,10 @@ REGISTER_OPERATION(FLOOR,
ValidateUnaryActivations,
PrepareUnaryActivations,
ExecuteUnaryActivations)
REGISTER_OPERATION(FLOOR_DIV,
ValidateElementwise,
PrepareElementwise,
ExecuteElementwise)
REGISTER_OPERATION(FULLY_CONNECTED,
ValidateFullyConnected,
PrepareFullyConnected,
Expand Down Expand Up @@ -216,6 +224,10 @@ REGISTER_OPERATION(SIGMOID,
ValidateUnaryActivations,
PrepareUnaryActivations,
ExecuteUnaryActivations)
REGISTER_OPERATION(SIN,
ValidateUnaryActivations,
PrepareUnaryActivations,
ExecuteUnaryActivations)
REGISTER_OPERATION(SLICE, ValidateSlice, PrepareSlice, ExecuteSlice)
REGISTER_OPERATION(SOFTMAX, ValidateSoftmax, PrepareSoftmax, ExecuteSoftmax)
REGISTER_OPERATION(SOFTPLUS, ValidateSoftplus, PrepareSoftplus, ExecuteSoftplus)
Expand Down
Loading