PaddlePaddle · shentanyue · Nov 30, 2022 · Nov 24, 2022 · Nov 28, 2022 · Nov 28, 2022
@@ -1102,6 +1102,14 @@ typedef struct Device {
 
       W_out = (W_in - 1) * stride_width - padding_width_left - padding_width_right + (dilation_width * (filter_width - 1) + 1)) + 1 + output_padding_width
 
+- NNADAPTER_COS
+
+  逐元素取余弦值： `output` = cos(`input`) 。
+  - 输入：
+    - 0 ： input ，输入操作数，类型： NNADAPTER_FLOAT32 、 NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。
+  - 输出：
+    - 0 ： output ，输出操作数，与输入操作数 `input` 的形状和类型相同。
+
 - NNADAPTER_CUM_SUM
 
   沿给定 `axis` 轴计算累加和。
@@ -1222,6 +1230,16 @@ typedef struct Device {
   - 输出：
     - 0 ： output ，输出操作数，与输入操作数 `input` 的形状和类型相同。
 
+- NNADAPTER_FLOOR_DIV
+
+  逐元素相除并向下取整： `output` = floor (`input0` / `input1`) ，广播规则与 Numpy https://numpy.org/doc/stable/user/basics.broadcasting.html 相同。
+  - 输入：
+    - 0 ： input0 ，输入操作数 0 ，类型： NNADAPTER_FLOAT32 、NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。
+    - 1 ： input1 ，输入操作数 1 ，类型与输入操作数 `input0` 相同。
+    - 2 ： fuse_code ，融合的激活函数类型，形状： [1] ，类型： NNADAPTER_INT32 ，取值： NNAdapterFuseCode 类型的任意值， NNADAPTER_FUSED_NONE 、 NNADAPTER_FUSED_RELU 、 NNADAPTER_FUSED_RELU1 、 NNADAPTER_FUSED_RELU6 。
+  - 输出：
+    - 0 ： output ，输出操作数，形状：由输入操作数 `input0` 和  `input1` 广播后的形状决定，类型与输入操作数 `input0` 和 `input1` 相同。
+
 - NNADAPTER_FULLY_CONNECTED
 
   全链接层： `output` = activation(`input` * `weight`' + `bias`) 。
@@ -1678,6 +1696,14 @@ typedef struct Device {
   - 输出：
     - 0 ： output ，输出操作数，与输入操作数 `input` 的形状和类型相同。
 
+- NNADAPTER_SIN
+
+  逐元素取正弦值： `output` = sin(`input`) 。
+  - 输入：
+    - 0 ： input ，输入操作数，类型： NNADAPTER_FLOAT32 、 NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。
+  - 输出：
+    - 0 ： output ，输出操作数，与输入操作数 `input` 的形状和类型相同。
+
 - NNADAPTER_SLICE
 
   沿着多个轴生成 `input` 的片段。类似 numpy ： https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html ，沿着 `axes` 的每个轴以 `starts` 、 `ends` 、 `step` 为起始、终止、步长获取 `input` 的片段。如果 `starts[i]` 、 `ends[i]` 为负数，则需要加上输入 `input` 对应轴 `axes[i]` 的维度 `dims[axes[i]]` 。如果 `starts[i]` 或 `ends[i]` 的值大于 `dims[axes[i]]` ，将被截断到 `dims[axes[i]] - 1` 。如果 `dims[axes[i]]` 维度未知，建议将 `ends[i]` 设置为 `INT_MAX` ，反向则设置为 `INT_MIN` 。

@@ -540,6 +540,21 @@ typedef enum {
    */
   NNADAPTER_CONV_2D_TRANSPOSE,
 
+  /**
+   * Performs element-wise cosine calculation.
+   * The output is calculated using this formula: `output` = cos(`input`)
+   *
+   * Inputs:
+   * * 0: input, a NNADAPTER_FLOAT32,
+   * NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
+   *
+   * Outputs:
+   * * 0: output, a tensor of the same shape and type as `input`.
+   *
+   * Available since version 1.
+   */
+  NNADAPTER_COS,
+
   /**
    * Performs cumulative sum of the input elements along the given `axis`.
    *
@@ -784,6 +799,28 @@ typedef enum {
    */
   NNADAPTER_FLOOR,
 
+  /**
+   * Performs element-wise binary floor divide(with Numpy-style broadcasting
+   * https://numpy.org/doc/stable/user/basics.broadcasting.html).
+   * The output is calculated using this formula:
+   *      `output` = floor (`input0` / `input1`)
+   * Inputs:
+   * * 0: input0, a NNADAPTER_FLOAT32, NNADAPTER_QUANT_INT8_SYMM_PER_LAYER
+   * tensor.
+   * * 1: input1, a tensor of the compatible shape and the same type as
+   * `input0`.
+   * * 2: fuse_code, a NNADAPTER_INT32 tensor of shape [1], specifies the
+   * activation to the
+   * result, must be one of NNAdapterFuseCode values.
+   *
+   * Outputs:
+   * * 0: output, a tensor of the compatible shape and type as `input0` and
+   * `input1`.
+   *
+   * Available since version 1.
+   */
+  NNADAPTER_FLOOR_DIV,
+
   /**
    * Add a fully connected layer.
    * The output is calculated using this formula:
@@ -1759,6 +1796,21 @@ typedef enum {
    */
   NNADAPTER_SIGMOID,
 
+  /**
+   * Performs element-wise sine calculation.
+   * The output is calculated using this formula: `output` = sin(`input`)
+   *
+   * Inputs:
+   * * 0: input, a NNADAPTER_FLOAT32,
+   * NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
+   *
+   * Outputs:
+   * * 0: output, a tensor of the same shape and type as `input`.
+   *
+   * Available since version 1.
+   */
+  NNADAPTER_SIN,
+
   /**
    * Produces a slice of `input` along multiple axes. Similar to numpy:
    * https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html.

@@ -58,6 +58,32 @@ static int elementwise(ElementwiseTypeCode eltwise_type,
     for (int64_t i = 0; i < output_count; i++) {
       output_data[i] = broadcasted_input0_data[i] * broadcasted_input1_data[i];
     }
+  } else if (eltwise_type == FLOOR_DIV) {
+    for (int64_t i = 0; i < output_count; i++) {
+      output_data[i] = static_cast<T>(
+          ::trunc(broadcasted_input0_data[i] / broadcasted_input1_data[i]));
+    }
+  } else if (eltwise_type == DIV) {
+    for (int64_t i = 0; i < output_count; i++) {
+      output_data[i] = broadcasted_input0_data[i] / broadcasted_input1_data[i];
+    }
+  } else if (eltwise_type == MAX) {
+    for (int64_t i = 0; i < output_count; i++) {
+      output_data[i] = broadcasted_input0_data[i] > broadcasted_input1_data[i]
+                           ? broadcasted_input0_data[i]
+                           : broadcasted_input1_data[i];
+    }
+  } else if (eltwise_type == MIN) {
+    for (int64_t i = 0; i < output_count; i++) {
+      output_data[i] = broadcasted_input0_data[i] > broadcasted_input1_data[i]
+                           ? broadcasted_input1_data[i]
+                           : broadcasted_input0_data[i];
+    }
+  } else if (eltwise_type == POW) {
+    for (int64_t i = 0; i < output_count; i++) {
+      output_data[i] =
+          ::pow(broadcasted_input0_data[i], broadcasted_input1_data[i]);
+    }
   } else {
     return -1;
   }

@@ -47,7 +47,8 @@ typedef enum {
   DIV = 4,
   MAX = 5,
   MIN = 6,
-  POW = 7
+  POW = 7,
+  FLOOR_DIV = 8
 } ElementwiseTypeCode;
 
 // Fused activation function types

@@ -31,6 +31,7 @@ REGISTER_CONVERTER(CLIP, ConvertClip)
 REGISTER_CONVERTER(CONCAT, ConvertConcat)
 REGISTER_CONVERTER(CONV_2D, ConvertConv2D)
 REGISTER_CONVERTER(CONV_2D_TRANSPOSE, ConvertConv2DTranspose)
+REGISTER_CONVERTER(COS, ConvertUnaryActivations)
 REGISTER_CONVERTER(CUM_SUM, ConvertCumSum)
 REGISTER_CONVERTER(DEFORMABLE_CONV_2D, ConvertDeformableConv2d)
 REGISTER_CONVERTER(DEQUANTIZE, ConvertDequantize)
@@ -42,6 +43,7 @@ REGISTER_CONVERTER(FILL, ConvertFill)
 REGISTER_CONVERTER(FILL_LIKE, ConvertFillLike)
 REGISTER_CONVERTER(FLATTEN, ConvertFlatten)
 REGISTER_CONVERTER(FLOOR, ConvertUnaryActivations)
+REGISTER_CONVERTER(FLOOR_DIV, ConvertElementwise)
 REGISTER_CONVERTER(FULLY_CONNECTED, ConvertFullyConnected)
 REGISTER_CONVERTER(GATHER, ConvertGather)
 REGISTER_CONVERTER(GELU, ConvertGelu)
@@ -82,6 +84,7 @@ REGISTER_CONVERTER(RESIZE_NEAREST, ConvertResizeNearest)
 REGISTER_CONVERTER(ROI_ALIGN, ConvertRoiAlign)
 REGISTER_CONVERTER(SHAPE, ConvertShape)
 REGISTER_CONVERTER(SIGMOID, ConvertUnaryActivations)
+REGISTER_CONVERTER(SIN, ConvertUnaryActivations)
 REGISTER_CONVERTER(SLICE, ConvertSlice)
 REGISTER_CONVERTER(SOFTMAX, ConvertSoftmax)
 REGISTER_CONVERTER(SOFTPLUS, ConvertSoftplus)

@@ -49,6 +49,7 @@ int ConvertElementwise(Converter* converter, core::Operation* operation) {
     CONVERT_ELEMENTWISE(MAX, Maximum);
     CONVERT_ELEMENTWISE(MIN, Minimum);
     CONVERT_ELEMENTWISE(POW, Pow);
+    CONVERT_ELEMENTWISE(FLOOR_DIV, FloorDiv);
 #undef CONVERT_ELEMENTWISE
     default:
       NNADAPTER_LOG(FATAL) << "Unsupported element-wise operation type "

@@ -63,10 +63,10 @@ int ConvertExpand(Converter* converter, core::Operation* operation) {
         shape_data);
     shape_operator = converter->AddInt32ConstantOperator(expand_shape);
   } else {
-    NNADAPTER_LOG(FATAL) << "Unsupported shape lifetime: "
-                         << OperandLifetimeCodeToString(
-                                shape_operand->type.lifetime);
-    return NNADAPTER_INVALID_PARAMETER;
+    shape_operator = converter->GetMappedOperator(shape_operand);
+    if (!shape_operator) {
+      shape_operator = converter->ConvertOperand(shape_operand);
+    }
   }
   auto expand_op = converter->AddOperator<ge::op::Expand>(output_operand);
   SET_INPUT(expand_op, x, input_operator);

@@ -24,7 +24,6 @@ int ConvertMatMul(Converter* converter, core::Operation* operation) {
   MAT_MUL_OPERATION_EXTRACT_INPUTS_OUTPUTS
   // TODO(zhupengyang): support by reshape or squeeze
   NNADAPTER_CHECK_NE(x_operand->type.dimensions.count, 1);
-  NNADAPTER_CHECK_NE(y_operand->type.dimensions.count, 1);
 
   // Convert to GE operators
   auto x_operator = converter->GetMappedOperator(x_operand);

@@ -73,10 +73,17 @@ int ConvertReshape(Converter* converter, core::Operation* operation) {
   }
   std::shared_ptr<Operator> shape_operator = nullptr;
   if (IsTemporaryShapeOperand(shape_operand)) {
-    auto& temporary_shape = *(GetTemporaryShape(shape_operand));
-    auto shape_count = temporary_shape.count;
-    auto shape = GetShape(input_operand, output_operand, shape_count);
-    shape_operator = converter->AddInt32ConstantOperator(shape);
+    if (IsOperandWithDynamicShape(shape_operand)) {
+      shape_operator = converter->GetMappedOperator(shape_operand);
+      if (!shape_operator) {
+        shape_operator = converter->ConvertOperand(shape_operand);
+      }
+    } else {
+      auto& temporary_shape = *(GetTemporaryShape(shape_operand));
+      auto shape_count = temporary_shape.count;
+      auto shape = GetShape(input_operand, output_operand, shape_count);
+      shape_operator = converter->AddInt32ConstantOperator(shape);
+    }
   } else if (IsConstantOperand(shape_operand)) {
     auto shape_count = shape_operand->length / sizeof(int32_t);
     auto shape = GetShape(input_operand, output_operand, shape_count);

@@ -44,6 +44,8 @@ int ConvertUnaryActivations(Converter* converter, core::Operation* operation) {
     CONVERT_UNARY_ACTIVATION(EXP, Exp);
     CONVERT_UNARY_ACTIVATION(FLOOR, Floor);
     CONVERT_UNARY_ACTIVATION(SQUARE, Square);
+    CONVERT_UNARY_ACTIVATION(SIN, Sin);
+    CONVERT_UNARY_ACTIVATION(COS, Cos);
 #undef CONVERT_UNARY_ACTIVATION
     default:
       NNADAPTER_LOG(FATAL) << "Unsupported activation operation type "

@@ -14,6 +14,9 @@
 
 #include "driver/huawei_ascend_npu/model_client.h"
 
+#if defined(LITE_WITH_PYTHON)
+#include <pybind11/pybind11.h>
+#endif
 #include <memory>
 #include <sstream>
 #include <string>
@@ -286,6 +289,9 @@ bool AclModelClient::Process(uint32_t input_count,
                              std::vector<NNAdapterOperandType>* output_types,
                              core::Argument* output_arguments,
                              DynamicShapeMode dynamic_shape_mode) {
+#if defined(LITE_WITH_PYTHON)
+  pybind11::gil_scoped_release no_gil;
+#endif
   if (!model_desc_) {
     NNADAPTER_LOG(FATAL) << "No ACL model is loaded.";
     return false;

@@ -13,6 +13,9 @@
 // limitations under the License.
 
 #include "driver/huawei_ascend_npu/utility.h"
+#if defined(LITE_WITH_PYTHON)
+#include <pybind11/pybind11.h>
+#endif
 #include <map>
 #include <mutex>  // NOLINT
 #include <utility>
@@ -237,6 +240,9 @@ bool BuildOMModelToBuffer(
     const std::string& optional_shape_str,
     const DynamicShapeMode dynamic_shape_mode,
     AscendConfigParams* config_params) {
+#if defined(LITE_WITH_PYTHON)
+  pybind11::gil_scoped_release no_gil;
+#endif
   // Should initialize the GE graph builder before model building
   InitializeGraphBuilder(config_params);
   // Convert the CANN IR graph to the CANN om model

@@ -65,6 +65,10 @@ REGISTER_OPERATION(CONV_2D_TRANSPOSE,
                    PrepareConv2DTranspose,
                    ExecuteConv2DTranspose)
 REGISTER_OPERATION(CUM_SUM, ValidateCumSum, PrepareCumSum, ExecuteCumSum)
+REGISTER_OPERATION(COS,
+                   ValidateUnaryActivations,
+                   PrepareUnaryActivations,
+                   ExecuteUnaryActivations)
 REGISTER_OPERATION(DEFORMABLE_CONV_2D,
                    ValidateDeformableConv2D,
                    PrepareDeformableConv2D,
@@ -95,6 +99,10 @@ REGISTER_OPERATION(FLOOR,
                    ValidateUnaryActivations,
                    PrepareUnaryActivations,
                    ExecuteUnaryActivations)
+REGISTER_OPERATION(FLOOR_DIV,
+                   ValidateElementwise,
+                   PrepareElementwise,
+                   ExecuteElementwise)
 REGISTER_OPERATION(FULLY_CONNECTED,
                    ValidateFullyConnected,
                    PrepareFullyConnected,
@@ -216,6 +224,10 @@ REGISTER_OPERATION(SIGMOID,
                    ValidateUnaryActivations,
                    PrepareUnaryActivations,
                    ExecuteUnaryActivations)
+REGISTER_OPERATION(SIN,
+                   ValidateUnaryActivations,
+                   PrepareUnaryActivations,
+                   ExecuteUnaryActivations)
 REGISTER_OPERATION(SLICE, ValidateSlice, PrepareSlice, ExecuteSlice)
 REGISTER_OPERATION(SOFTMAX, ValidateSoftmax, PrepareSoftmax, ExecuteSoftmax)
 REGISTER_OPERATION(SOFTPLUS, ValidateSoftplus, PrepareSoftplus, ExecuteSoftplus)