diff --git a/python/tvm/relay/backend/contrib/ethosu/codegen.py b/python/tvm/relay/backend/contrib/ethosu/codegen.py index fe7bfc3fcdd0..f968d6a1f385 100644 --- a/python/tvm/relay/backend/contrib/ethosu/codegen.py +++ b/python/tvm/relay/backend/contrib/ethosu/codegen.py @@ -31,6 +31,8 @@ from tvm.relay.backend.contrib.ethosu.op import op_attrs from tvm.relay.backend.contrib.ethosu import op +from . import _ffi_api + class OptimizeLUTs(ExprMutator): """A pass to merge an identity operator with a LUT based activation function with @@ -299,6 +301,17 @@ def __call__(self, *args, **kwargs): pass +def IdentityOptimizer(): # pylint: disable=invalid-name + """Pass that removes redundant identities + + Return + ------ + Pass + The module pass. + """ + return _ffi_api.IdentityOptimizer() + + @tvm._ffi.register_func("relay.ext.ethos-u.constant_updater") def constant_updater(expr, symbol): # pylint: disable=unused-argument """ @@ -330,6 +343,7 @@ def relay_to_tir_func(ext_func: relay.Function) -> tvm.tir.PrimFunc: mod["main"] = ext_func mod = LegalizeEthosU()(mod) mod = LUTsOptimizer()(mod) + mod = IdentityOptimizer()(mod) mod = LayoutOptimizer()(mod) mod = relay.transform.InferType()(mod) # We are currently using copy_constants scheduler In the long run, diff --git a/src/relay/backend/contrib/ethosu/codegen.cc b/src/relay/backend/contrib/ethosu/codegen.cc index 0fdbb7063e3f..ca41ccd14257 100644 --- a/src/relay/backend/contrib/ethosu/codegen.cc +++ b/src/relay/backend/contrib/ethosu/codegen.cc @@ -38,6 +38,7 @@ #include #include +#include "../../../op/contrib/ethosu/op_attrs.h" #include "../../../op/make_op.h" #include "utils.h" @@ -100,6 +101,81 @@ tvm::transform::Pass RelayToTIR() { return tvm::transform::CreateModulePass(pass_func, 0, "relay.contrib.ethos-u.RelayToTIR", {}); } +/*! + * \brief This mutator removes identity operations that are not necessary. Specifically, an + * identity operation can be removed when it is immediately followed by an NPU compute + * operation. + */ +class RemoveRedundantIdentities : public MixedModeMutator { + public: + Expr Rewrite_(const CallNode* pre, const Expr& post) override { + Call call = Downcast(post); + + // only consider rewrite if current op is an NPU compute op. + if (!call->op->IsInstance()) { + return post; + } + const auto* op = call->op.as(); + std::string op_name = op->name; + if (op_name.substr(0, 15) != "contrib.ethosu." || op_name == "contrib.ethosu.identity") { + return post; + } + + // check if we can rewrite parent identity operations to current call. + bool needs_rewrite = false; + Array new_args; + for (const auto& arg : call->args) { + if (const auto* parent_callnode = arg.as()) { + if (const auto* parent_op = parent_callnode->op.as()) { + Call parent_call = GetRef(parent_callnode); + if (parent_op->name == "contrib.ethosu.identity" && IdentityDoesNothing(parent_call)) { + needs_rewrite = true; + new_args.push_back(parent_call->args[0]); + continue; + } + } + } + new_args.push_back(arg); + } + + if (needs_rewrite) { + return Call(call->op, new_args, call->attrs, call->type_args); + } + return post; + } + + private: + bool IdentityDoesNothing(const Call& call) { + const auto* attrs = call->attrs.as(); + bool does_not_requantize = attrs->ifm_scale == 1.0 && attrs->ifm_zero_point == 0 && + attrs->ofm_scale == 1.0 && attrs->ofm_zero_point == 0; + bool has_no_activation = attrs->activation == "NONE"; + return does_not_requantize && has_no_activation; + } +}; + +/*! + * \brief A pass to remove redundant identity operations. + */ +tvm::transform::Pass IdentityOptimizer() { + runtime::TypedPackedFunc pass_func = + [=](IRModule mod, transform::PassContext ctx) { + for (auto gv : mod->GetGlobalVars()) { + Function main_func = Downcast(mod->Lookup(gv)); + auto new_main_body = RemoveRedundantIdentities().VisitExpr(main_func->body); + if (!new_main_body.same_as(main_func->body)) { + Function new_main_func = WithFields(main_func, main_func->params, new_main_body); + mod->Update(gv, new_main_func); + } + } + return mod; + }; + return tvm::transform::CreateModulePass(pass_func, 0, + "relay.backend.contrib.ethos-u.IdentityOptimizer", {}); +} + +TVM_REGISTER_GLOBAL("relay.ext.ethos-u.IdentityOptimizer").set_body_typed(IdentityOptimizer); + /*! * \brief This function lowers the IRModule with PrimFunc * with the target of the microNPU to a C-source runtime module diff --git a/src/relay/op/contrib/ethosu/binary_elementwise.cc b/src/relay/op/contrib/ethosu/binary_elementwise.cc index 9a681b7cdc88..618daeea00e8 100644 --- a/src/relay/op/contrib/ethosu/binary_elementwise.cc +++ b/src/relay/op/contrib/ethosu/binary_elementwise.cc @@ -24,6 +24,7 @@ #include #include "common.h" +#include "op_attrs.h" namespace tvm { namespace relay { @@ -31,103 +32,6 @@ namespace op { namespace contrib { namespace ethosu { -/*! \brief Attributes used by the Ethos(TM)-U NPU binary elementwise operators */ -struct EthosuBinaryElementwiseAttrs : public tvm::AttrsNode { - String operator_type; - double ifm_scale; - int ifm_zero_point; - double ifm2_scale; - int ifm2_zero_point; - double ofm_scale; - int ofm_zero_point; - IndexExpr ifm_channels; - IndexExpr ifm2_channels; - bool reversed_operands; - String activation; - int clip_min; - int clip_max; - String rounding_mode; - String ifm_layout; - String ifm2_layout; - String ofm_layout; - String ofm_dtype; - - TVM_DECLARE_ATTRS(EthosuBinaryElementwiseAttrs, "relay.attrs.EthosuBinaryElementwiseAttrs") { - TVM_ATTR_FIELD(operator_type) - .describe( - "The type of the binary elementwise operator." - "'ADD'" - "'SUB'" - "'MUL'" - "'MIN'" - "'MAX'" - "'SHR'" - "'SHL'"); - TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ifm_zero_point) - .describe("The quantization zero point for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ifm2_scale) - .describe("The quantization scale for the Input Feature Map tensor 2."); - TVM_ATTR_FIELD(ifm2_zero_point) - .describe("The quantization zero point for the Input Feature Map tensor 2."); - TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); - TVM_ATTR_FIELD(ofm_zero_point) - .describe("The quantization zero point for the Output Feature Map tensor."); - TVM_ATTR_FIELD(ifm_channels).describe("The number of the Input Feature Map channels."); - TVM_ATTR_FIELD(ifm2_channels).describe("The number of the Input Feature Map 2 channels."); - TVM_ATTR_FIELD(reversed_operands) - .describe("True if IFM2 is the first operand and IFM is the second operand.") - .set_default(false); - TVM_ATTR_FIELD(activation) - .describe( - "The activation function to use. " - "'NONE' - no activation function. " - "'CLIP' - clip the output between clip_min and clip_max. " - "'TANH' - tanh activation function. " - "'SIGMOID' - sigmoid activation function. " - "'LUT' - use a look-up table to perform the activation function." - "Available activations for activation type:" - "{int8, uint8}: 'NONE', 'CLIP', 'TANH', 'SIGMOID', 'LUT'" - "{int32}: 'NONE'") - .set_default("NONE"); - TVM_ATTR_FIELD(clip_min) - .describe("The minimum clipping value if activation = 'CLIP'.") - .set_default(0); - TVM_ATTR_FIELD(clip_max) - .describe("The maximum clipping value if activation = 'CLIP'.") - .set_default(0); - TVM_ATTR_FIELD(rounding_mode) - .describe( - "The rounding mode to apply to the Output Feature Map tensor. " - "'TFL' - Tensorflow Lite rounding scheme. " - "'TRUNCATE' - Truncate towards zero." - "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") - .set_default("TFL"); - TVM_ATTR_FIELD(ifm_layout) - .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") - .set_default("NHWC"); - TVM_ATTR_FIELD(ifm2_layout) - .describe("The layout of the Input Feature Map tensor 2. Can be 'NHWC' or 'NHCWB16'.") - .set_default("NHWC"); - TVM_ATTR_FIELD(ofm_layout) - .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") - .set_default("NHWC"); - TVM_ATTR_FIELD(ofm_dtype).describe( - "The Output Feature Map tensor type." - "MUL, ADD, SUB {IFM}->{OFM}:" - " {uint8, int8 int32} -> {uint8, int8, int32}, any pairing" - "MAX, MIN:" - " IFM and OFM must be of the same type, one of:" - " {int8, uint8}" - "SHR {IFM}->{OFM}:" - " {int32}->{int8, uint8, int32}, any pairing" - "SHL:" - " {int32}->{int32} only"); - } -}; - -TVM_REGISTER_NODE_TYPE(EthosuBinaryElementwiseAttrs); - bool EthosuBinaryElementwiseRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { const int ifm_index = 0; diff --git a/src/relay/op/contrib/ethosu/convolution.cc b/src/relay/op/contrib/ethosu/convolution.cc index 90bbf90d13c7..96e15709f3d2 100644 --- a/src/relay/op/contrib/ethosu/convolution.cc +++ b/src/relay/op/contrib/ethosu/convolution.cc @@ -31,6 +31,7 @@ #include "../../../qnn/utils.h" #include "common.h" +#include "op_attrs.h" namespace tvm { namespace relay { @@ -38,90 +39,6 @@ namespace op { namespace contrib { namespace ethosu { -/*! \brief Attributes used by the Ethos(TM)-U NPU convolution operator */ -struct EthosuConv2DAttrs : public tvm::AttrsNode { - double ifm_scale; - int ifm_zero_point; - int weight_zero_point; - double ofm_scale; - int ofm_zero_point; - Array kernel_shape; - IndexExpr ofm_channels; - Array strides; - Array padding; - Array dilation; - String activation; - int clip_min; - int clip_max; - String rounding_mode; - String upscale; - String ifm_layout; - String ofm_layout; - - TVM_DECLARE_ATTRS(EthosuConv2DAttrs, "relay.attrs.EthosuConv2DAttrs") { - TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ifm_zero_point) - .describe("The quantization zero point for the Input Feature Map tensor."); - TVM_ATTR_FIELD(weight_zero_point) - .describe("The quantization zero point for the weight tensor."); - TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); - TVM_ATTR_FIELD(ofm_zero_point) - .describe("The quantization zero point for the Output Feature Map tensor."); - TVM_ATTR_FIELD(kernel_shape) - .describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).") - .set_default(NullValue>()); - TVM_ATTR_FIELD(ofm_channels) - .describe("The number of the Output Feature Map channels.") - .set_default(NullValue()); - TVM_ATTR_FIELD(strides) - .set_default(Array({1, 1})) - .describe("The 2 dimensional strides as (stride_height, stride_width)."); - TVM_ATTR_FIELD(padding) - .set_default(Array({0, 0, 0, 0})) - .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right)."); - TVM_ATTR_FIELD(dilation) - .set_default(Array({1, 1})) - .describe("The 2 dimensional dilation as (dilation_height, dilation_width)."); - TVM_ATTR_FIELD(activation) - .describe( - "The activation function to use. " - "'NONE' - no activation function. " - "'CLIP' - clip the output between clip_min and clip_max. " - "'TANH' - tanh activation function. " - "'SIGMOID' - sigmoid activation function. " - "'LUT' - use a look-up table to perform the activation function.") - .set_default("NONE"); - TVM_ATTR_FIELD(clip_min) - .describe("The minimum clipping value if activation = 'CLIP'.") - .set_default(0); - TVM_ATTR_FIELD(clip_max) - .describe("The maximum clipping value if activation = 'CLIP'.") - .set_default(0); - TVM_ATTR_FIELD(rounding_mode) - .describe( - "The rounding mode to apply to the Output Feature Map tensor. " - "'TFL' - Tensorflow Lite rounding scheme. " - "'TRUNCATE' - Truncate towards zero." - "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") - .set_default("TFL"); - TVM_ATTR_FIELD(upscale) - .describe( - "The 2x2 upscaling mode to apply to the Input Feature Map tensor. " - "'NONE' - no upscaling. " - "'NEAREST' - upscale using nearest neighbour. " - "'ZEROS' - upscale using zeros.") - .set_default("NONE"); - TVM_ATTR_FIELD(ifm_layout) - .set_default("NHWC") - .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); - TVM_ATTR_FIELD(ofm_layout) - .set_default("NHWC") - .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); - } -}; - -TVM_REGISTER_NODE_TYPE(EthosuConv2DAttrs); - bool EthosuConv2DRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { CHECK_EQ(types.size(), 5); diff --git a/src/relay/op/contrib/ethosu/depthwise.cc b/src/relay/op/contrib/ethosu/depthwise.cc index 7e9fed5041be..f33141d31e74 100644 --- a/src/relay/op/contrib/ethosu/depthwise.cc +++ b/src/relay/op/contrib/ethosu/depthwise.cc @@ -30,6 +30,7 @@ #include "../../../qnn/utils.h" #include "../../nn/convolution.h" #include "common.h" +#include "op_attrs.h" namespace tvm { namespace relay { @@ -37,94 +38,6 @@ namespace op { namespace contrib { namespace ethosu { -/*! \brief Attributes used by the Ethos(TM)-U NPU depthwise operator */ -struct EthosuDepthwiseConv2DAttrs : public tvm::AttrsNode { - double ifm_scale; - int ifm_zero_point; - int weight_zero_point; - double ofm_scale; - int ofm_zero_point; - Array kernel_shape; - IndexExpr ofm_channels; - Array strides; - Array padding; - Array dilation; - String activation; - int clip_min; - int clip_max; - String rounding_mode; - String upscale; - String ifm_layout; - String ofm_layout; - String ofm_dtype; - - TVM_DECLARE_ATTRS(EthosuDepthwiseConv2DAttrs, "relay.attrs.EthosuDepthwiseConv2DAttrs") { - TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ifm_zero_point) - .describe("The quantization zero point for the Output Feature Map tensor."); - TVM_ATTR_FIELD(weight_zero_point) - .describe("The quantization zero point for the weight tensor."); - TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); - TVM_ATTR_FIELD(ofm_zero_point) - .describe("The quantization zero point for the Output Feature Map tensor."); - TVM_ATTR_FIELD(kernel_shape) - .describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).") - .set_default(NullValue >()); - TVM_ATTR_FIELD(ofm_channels) - .describe("The number of OFM channels.") - .set_default(NullValue()); - TVM_ATTR_FIELD(strides) - .describe("The 2 dimensional strides as (stride_height, stride_width).") - .set_default(Array({1, 1})); - TVM_ATTR_FIELD(padding) - .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right)") - .set_default(Array({0, 0, 0, 0})); - TVM_ATTR_FIELD(dilation) - .describe("The 2 dimensional dilation as (dilation_height, dilation_width).") - .set_default(Array({1, 1})); - TVM_ATTR_FIELD(activation) - .describe( - "Description: The activation function to use." - "'NONE' - no activation function." - "'CLIP' - clip the output between clip_min and clip_max." - "'TANH - tanh activation function." - "'SIGMOID' - sigmoid activation function." - "'LUT' - use a look-up table to perform the activation function.") - .set_default("NONE"); - TVM_ATTR_FIELD(clip_min) - .describe("The minimum clipping value if activation = CLIP.") - .set_default(0); - TVM_ATTR_FIELD(clip_max) - .describe("The maximum clipping value if activation = CLIP.") - .set_default(0); - TVM_ATTR_FIELD(rounding_mode) - .describe( - "The rounding mode to apply to the Output Feature Map tensor. " - "'TFL' - Tensorflow Lite rounding scheme. " - "'TRUNCATE' - Truncate towards zero." - "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") - .set_default("TFL"); - TVM_ATTR_FIELD(upscale) - .describe( - "The 2x2 upscaling mode to apply to the Input Feature Map tensor. " - "'NONE' - no upscaling. " - "'NEAREST' - upscale using nearest neighbour. " - "'ZEROS' - upscale using zeros.") - .set_default("NONE"); - TVM_ATTR_FIELD(ifm_layout) - .set_default("NHWC") - .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); - TVM_ATTR_FIELD(ofm_layout) - .set_default("NHWC") - .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); - TVM_ATTR_FIELD(ofm_dtype) - .describe("The Output Feature Map tensor data type. Can be 'int8', 'uint8' or 'int16'.") - .set_default("int8"); - } -}; - -TVM_REGISTER_NODE_TYPE(EthosuDepthwiseConv2DAttrs); - bool EthosuDepthwiseConv2DRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { ICHECK_EQ(types.size(), 5); diff --git a/src/relay/op/contrib/ethosu/identity.cc b/src/relay/op/contrib/ethosu/identity.cc index 350e8028f201..9b00978d43d8 100644 --- a/src/relay/op/contrib/ethosu/identity.cc +++ b/src/relay/op/contrib/ethosu/identity.cc @@ -24,6 +24,7 @@ #include #include "common.h" +#include "op_attrs.h" namespace tvm { namespace relay { @@ -31,34 +32,6 @@ namespace op { namespace contrib { namespace ethosu { -/*! \brief Attributes used by the NPU identity operator */ -struct EthosuIdentityAttrs : public tvm::AttrsNode { - double ifm_scale; - int ifm_zero_point; - double ofm_scale; - int ofm_zero_point; - String activation; - - TVM_DECLARE_ATTRS(EthosuIdentityAttrs, "relay.attrs.EthosuIdentityAttrs") { - TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ifm_zero_point) - .describe("The quantization zero point for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); - TVM_ATTR_FIELD(ofm_zero_point) - .describe("The quantization zero point for the Output Feature Map tensor."); - TVM_ATTR_FIELD(activation) - .describe( - "The activation function to use. " - "'NONE' - no activation function. " - "'TANH' - tanh activation function. " - "'SIGMOID' - sigmoid activation function. " - "'LUT' - use a look-up table to perform the activation function.") - .set_default("NONE"); - } -}; - -TVM_REGISTER_NODE_TYPE(EthosuIdentityAttrs); - bool EthosuIdentityRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { const int ifm_index = 0; diff --git a/src/relay/op/contrib/ethosu/op_attrs.h b/src/relay/op/contrib/ethosu/op_attrs.h new file mode 100644 index 000000000000..4b039f6f060d --- /dev/null +++ b/src/relay/op/contrib/ethosu/op_attrs.h @@ -0,0 +1,480 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/op/contrib/ethosu/op_attrs.h + * \brief Attributes for the Arm(R) Ethos(TM)-U NPU operators. + */ + +#ifndef TVM_RELAY_OP_CONTRIB_ETHOSU_OP_ATTRS_H_ +#define TVM_RELAY_OP_CONTRIB_ETHOSU_OP_ATTRS_H_ + +#include + +namespace tvm { +namespace relay { +namespace op { +namespace contrib { +namespace ethosu { + +/*! \brief Attributes used by the Ethos(TM)-U NPU binary elementwise operators */ +struct EthosuBinaryElementwiseAttrs : public tvm::AttrsNode { + String operator_type; + double ifm_scale; + int ifm_zero_point; + double ifm2_scale; + int ifm2_zero_point; + double ofm_scale; + int ofm_zero_point; + IndexExpr ifm_channels; + IndexExpr ifm2_channels; + bool reversed_operands; + String activation; + int clip_min; + int clip_max; + String rounding_mode; + String ifm_layout; + String ifm2_layout; + String ofm_layout; + String ofm_dtype; + + TVM_DECLARE_ATTRS(EthosuBinaryElementwiseAttrs, "relay.attrs.EthosuBinaryElementwiseAttrs") { + TVM_ATTR_FIELD(operator_type) + .describe( + "The type of the binary elementwise operator." + "'ADD'" + "'SUB'" + "'MUL'" + "'MIN'" + "'MAX'" + "'SHR'" + "'SHL'"); + TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ifm_zero_point) + .describe("The quantization zero point for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ifm2_scale) + .describe("The quantization scale for the Input Feature Map tensor 2."); + TVM_ATTR_FIELD(ifm2_zero_point) + .describe("The quantization zero point for the Input Feature Map tensor 2."); + TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); + TVM_ATTR_FIELD(ofm_zero_point) + .describe("The quantization zero point for the Output Feature Map tensor."); + TVM_ATTR_FIELD(ifm_channels).describe("The number of the Input Feature Map channels."); + TVM_ATTR_FIELD(ifm2_channels).describe("The number of the Input Feature Map 2 channels."); + TVM_ATTR_FIELD(reversed_operands) + .describe("True if IFM2 is the first operand and IFM is the second operand.") + .set_default(false); + TVM_ATTR_FIELD(activation) + .describe( + "The activation function to use. " + "'NONE' - no activation function. " + "'CLIP' - clip the output between clip_min and clip_max. " + "'TANH' - tanh activation function. " + "'SIGMOID' - sigmoid activation function. " + "'LUT' - use a look-up table to perform the activation function." + "Available activations for activation type:" + "{int8, uint8}: 'NONE', 'CLIP', 'TANH', 'SIGMOID', 'LUT'" + "{int32}: 'NONE'") + .set_default("NONE"); + TVM_ATTR_FIELD(clip_min) + .describe("The minimum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(clip_max) + .describe("The maximum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(rounding_mode) + .describe( + "The rounding mode to apply to the Output Feature Map tensor. " + "'TFL' - Tensorflow Lite rounding scheme. " + "'TRUNCATE' - Truncate towards zero." + "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") + .set_default("TFL"); + TVM_ATTR_FIELD(ifm_layout) + .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") + .set_default("NHWC"); + TVM_ATTR_FIELD(ifm2_layout) + .describe("The layout of the Input Feature Map tensor 2. Can be 'NHWC' or 'NHCWB16'.") + .set_default("NHWC"); + TVM_ATTR_FIELD(ofm_layout) + .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") + .set_default("NHWC"); + TVM_ATTR_FIELD(ofm_dtype).describe( + "The Output Feature Map tensor type." + "MUL, ADD, SUB {IFM}->{OFM}:" + " {uint8, int8 int32} -> {uint8, int8, int32}, any pairing" + "MAX, MIN:" + " IFM and OFM must be of the same type, one of:" + " {int8, uint8}" + "SHR {IFM}->{OFM}:" + " {int32}->{int8, uint8, int32}, any pairing" + "SHL:" + " {int32}->{int32} only"); + } +}; + +TVM_REGISTER_NODE_TYPE(EthosuBinaryElementwiseAttrs); + +/*! \brief Attributes used by the Ethos(TM)-U NPU convolution operator */ +struct EthosuConv2DAttrs : public tvm::AttrsNode { + double ifm_scale; + int ifm_zero_point; + int weight_zero_point; + double ofm_scale; + int ofm_zero_point; + Array kernel_shape; + IndexExpr ofm_channels; + Array strides; + Array padding; + Array dilation; + String activation; + int clip_min; + int clip_max; + String rounding_mode; + String upscale; + String ifm_layout; + String ofm_layout; + + TVM_DECLARE_ATTRS(EthosuConv2DAttrs, "relay.attrs.EthosuConv2DAttrs") { + TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ifm_zero_point) + .describe("The quantization zero point for the Input Feature Map tensor."); + TVM_ATTR_FIELD(weight_zero_point) + .describe("The quantization zero point for the weight tensor."); + TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); + TVM_ATTR_FIELD(ofm_zero_point) + .describe("The quantization zero point for the Output Feature Map tensor."); + TVM_ATTR_FIELD(kernel_shape) + .describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).") + .set_default(NullValue>()); + TVM_ATTR_FIELD(ofm_channels) + .describe("The number of the Output Feature Map channels.") + .set_default(NullValue()); + TVM_ATTR_FIELD(strides) + .set_default(Array({1, 1})) + .describe("The 2 dimensional strides as (stride_height, stride_width)."); + TVM_ATTR_FIELD(padding) + .set_default(Array({0, 0, 0, 0})) + .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right)."); + TVM_ATTR_FIELD(dilation) + .set_default(Array({1, 1})) + .describe("The 2 dimensional dilation as (dilation_height, dilation_width)."); + TVM_ATTR_FIELD(activation) + .describe( + "The activation function to use. " + "'NONE' - no activation function. " + "'CLIP' - clip the output between clip_min and clip_max. " + "'TANH' - tanh activation function. " + "'SIGMOID' - sigmoid activation function. " + "'LUT' - use a look-up table to perform the activation function.") + .set_default("NONE"); + TVM_ATTR_FIELD(clip_min) + .describe("The minimum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(clip_max) + .describe("The maximum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(rounding_mode) + .describe( + "The rounding mode to apply to the Output Feature Map tensor. " + "'TFL' - Tensorflow Lite rounding scheme. " + "'TRUNCATE' - Truncate towards zero." + "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") + .set_default("TFL"); + TVM_ATTR_FIELD(upscale) + .describe( + "The 2x2 upscaling mode to apply to the Input Feature Map tensor. " + "'NONE' - no upscaling. " + "'NEAREST' - upscale using nearest neighbour. " + "'ZEROS' - upscale using zeros.") + .set_default("NONE"); + TVM_ATTR_FIELD(ifm_layout) + .set_default("NHWC") + .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); + TVM_ATTR_FIELD(ofm_layout) + .set_default("NHWC") + .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); + } +}; + +TVM_REGISTER_NODE_TYPE(EthosuConv2DAttrs); + +/*! \brief Attributes used by the Ethos(TM)-U NPU depthwise operator */ +struct EthosuDepthwiseConv2DAttrs : public tvm::AttrsNode { + double ifm_scale; + int ifm_zero_point; + int weight_zero_point; + double ofm_scale; + int ofm_zero_point; + Array kernel_shape; + IndexExpr ofm_channels; + Array strides; + Array padding; + Array dilation; + String activation; + int clip_min; + int clip_max; + String rounding_mode; + String upscale; + String ifm_layout; + String ofm_layout; + String ofm_dtype; + + TVM_DECLARE_ATTRS(EthosuDepthwiseConv2DAttrs, "relay.attrs.EthosuDepthwiseConv2DAttrs") { + TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ifm_zero_point) + .describe("The quantization zero point for the Output Feature Map tensor."); + TVM_ATTR_FIELD(weight_zero_point) + .describe("The quantization zero point for the weight tensor."); + TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); + TVM_ATTR_FIELD(ofm_zero_point) + .describe("The quantization zero point for the Output Feature Map tensor."); + TVM_ATTR_FIELD(kernel_shape) + .describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).") + .set_default(NullValue>()); + TVM_ATTR_FIELD(ofm_channels) + .describe("The number of OFM channels.") + .set_default(NullValue()); + TVM_ATTR_FIELD(strides) + .describe("The 2 dimensional strides as (stride_height, stride_width).") + .set_default(Array({1, 1})); + TVM_ATTR_FIELD(padding) + .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right)") + .set_default(Array({0, 0, 0, 0})); + TVM_ATTR_FIELD(dilation) + .describe("The 2 dimensional dilation as (dilation_height, dilation_width).") + .set_default(Array({1, 1})); + TVM_ATTR_FIELD(activation) + .describe( + "Description: The activation function to use." + "'NONE' - no activation function." + "'CLIP' - clip the output between clip_min and clip_max." + "'TANH - tanh activation function." + "'SIGMOID' - sigmoid activation function." + "'LUT' - use a look-up table to perform the activation function.") + .set_default("NONE"); + TVM_ATTR_FIELD(clip_min) + .describe("The minimum clipping value if activation = CLIP.") + .set_default(0); + TVM_ATTR_FIELD(clip_max) + .describe("The maximum clipping value if activation = CLIP.") + .set_default(0); + TVM_ATTR_FIELD(rounding_mode) + .describe( + "The rounding mode to apply to the Output Feature Map tensor. " + "'TFL' - Tensorflow Lite rounding scheme. " + "'TRUNCATE' - Truncate towards zero." + "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") + .set_default("TFL"); + TVM_ATTR_FIELD(upscale) + .describe( + "The 2x2 upscaling mode to apply to the Input Feature Map tensor. " + "'NONE' - no upscaling. " + "'NEAREST' - upscale using nearest neighbour. " + "'ZEROS' - upscale using zeros.") + .set_default("NONE"); + TVM_ATTR_FIELD(ifm_layout) + .set_default("NHWC") + .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); + TVM_ATTR_FIELD(ofm_layout) + .set_default("NHWC") + .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); + TVM_ATTR_FIELD(ofm_dtype) + .describe("The Output Feature Map tensor data type. Can be 'int8', 'uint8' or 'int16'.") + .set_default("int8"); + } +}; + +TVM_REGISTER_NODE_TYPE(EthosuDepthwiseConv2DAttrs); + +/*! \brief Attributes used by the NPU identity operator */ +struct EthosuIdentityAttrs : public tvm::AttrsNode { + double ifm_scale; + int ifm_zero_point; + double ofm_scale; + int ofm_zero_point; + String activation; + + TVM_DECLARE_ATTRS(EthosuIdentityAttrs, "relay.attrs.EthosuIdentityAttrs") { + TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ifm_zero_point) + .describe("The quantization zero point for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); + TVM_ATTR_FIELD(ofm_zero_point) + .describe("The quantization zero point for the Output Feature Map tensor."); + TVM_ATTR_FIELD(activation) + .describe( + "The activation function to use. " + "'NONE' - no activation function. " + "'TANH' - tanh activation function. " + "'SIGMOID' - sigmoid activation function. " + "'LUT' - use a look-up table to perform the activation function.") + .set_default("NONE"); + } +}; + +TVM_REGISTER_NODE_TYPE(EthosuIdentityAttrs); + +/*! \brief Attributes used by the Ethos(TM)-U NPU pooling operator */ +struct EthosuPoolingAttrs : public tvm::AttrsNode { + String pooling_type; + double ifm_scale; + int ifm_zero_point; + double ofm_scale; + int ofm_zero_point; + Array pool_shape; + IndexExpr ofm_channels; + Array strides; + Array padding; + String activation; + int clip_min; + int clip_max; + String rounding_mode; + String upscale; + String ifm_layout; + String ofm_layout; + + TVM_DECLARE_ATTRS(EthosuPoolingAttrs, "relay.attrs.EthosuPoolingAttrs") { + TVM_ATTR_FIELD(pooling_type) + .describe("The type of the pooling. 'AVG' - average pool, 'MAX' - max pool."); + TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ifm_zero_point) + .describe("The quantization zero point for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); + TVM_ATTR_FIELD(ofm_zero_point) + .describe("The quantization zero point for the Output Feature Map tensor."); + TVM_ATTR_FIELD(pool_shape) + .describe("The 2 dimensional pool shape as (pool_shape_height, pool_shape_width).") + .set_default(NullValue>()); + TVM_ATTR_FIELD(ofm_channels) + .describe(" The number of the Output Feature Map channels.") + .set_default(NullValue()); + TVM_ATTR_FIELD(strides) + .set_default(Array({1, 1})) + .describe("The 2 dimensional strides as (stride_height, stride_width)."); + TVM_ATTR_FIELD(padding) + .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).") + .set_default(Array({0, 0, 0, 0})); + TVM_ATTR_FIELD(activation) + .describe( + "The activation function to use. " + "'NONE' - no activation function. " + "'CLIP' - clip the output between clip_min and clip_max. " + "'TANH' - tanh activation function. " + "'SIGMOID' - sigmoid activation function. " + "'LUT' - use a look-up table to perform the activation function.") + .set_default("NONE"); + TVM_ATTR_FIELD(clip_min) + .describe("The minimum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(clip_max) + .describe("The maximum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(rounding_mode) + .describe( + "The rounding mode to apply to the Output Feature Map tensor. " + "'TFL' - Tensorflow Lite rounding scheme. " + "'TRUNCATE' - Truncate towards zero." + "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") + .set_default("TFL"); + TVM_ATTR_FIELD(upscale) + .describe( + "The 2x2 upscaling mode to apply to the Input Feature Map tensor. " + "'NONE' - no upscaling. " + "'NEAREST' - upscale using nearest neighbour. " + "'ZEROS' - upscale using zeros.") + .set_default("NONE"); + TVM_ATTR_FIELD(ifm_layout) + .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") + .set_default("NHWC"); + TVM_ATTR_FIELD(ofm_layout) + .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") + .set_default("NHWC"); + } +}; + +TVM_REGISTER_NODE_TYPE(EthosuPoolingAttrs); + +/*! \brief Attributes used by the NPU unary elementwise operator */ +struct EthosuUnaryElementwiseAttrs : public tvm::AttrsNode { + String operator_type; + double ifm_scale; + int ifm_zero_point; + double ofm_scale; + int ofm_zero_point; + IndexExpr ofm_channels; + String activation; + int clip_min; + int clip_max; + String rounding_mode; + String ifm_layout; + String ofm_layout; + + TVM_DECLARE_ATTRS(EthosuUnaryElementwiseAttrs, "relay.attrs.EthosuUnaryElementwiseAttrs") { + TVM_ATTR_FIELD(operator_type) + .describe( + "The type of the unary elementwise operator." + "'ABS'" + "'CLZ'"); + TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ifm_zero_point) + .describe("The quantization zero point for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); + TVM_ATTR_FIELD(ofm_zero_point) + .describe("The quantization zero point for the Output Feature Map tensor."); + TVM_ATTR_FIELD(ofm_channels).describe("The number of OFM channels."); + TVM_ATTR_FIELD(activation) + .describe( + "The activation function to use. " + "'NONE' - no activation function. " + "'CLIP' - clip the output between clip_min and clip_max. " + "'TANH' - tanh activation function. " + "'SIGMOID' - sigmoid activation function. " + "'LUT' - use a look-up table to perform the activation function.") + .set_default("NONE"); + TVM_ATTR_FIELD(clip_min) + .describe("The minimum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(clip_max) + .describe("The maximum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(rounding_mode) + .describe( + "The rounding mode to apply to the Output Feature Map tensor. " + "'TFL' - Tensorflow Lite rounding scheme. " + "'TRUNCATE' - Truncate towards zero." + "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") + .set_default("TFL"); + TVM_ATTR_FIELD(ifm_layout) + .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") + .set_default("NHWC"); + TVM_ATTR_FIELD(ofm_layout) + .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") + .set_default("NHWC"); + } +}; + +TVM_REGISTER_NODE_TYPE(EthosuUnaryElementwiseAttrs); + +} // namespace ethosu +} // namespace contrib +} // namespace op +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_OP_CONTRIB_ETHOSU_OP_ATTRS_H_ diff --git a/src/relay/op/contrib/ethosu/pooling.cc b/src/relay/op/contrib/ethosu/pooling.cc index 3175e4ddffc4..8ad5909f0c17 100644 --- a/src/relay/op/contrib/ethosu/pooling.cc +++ b/src/relay/op/contrib/ethosu/pooling.cc @@ -24,6 +24,7 @@ #include #include "common.h" +#include "op_attrs.h" namespace tvm { namespace relay { @@ -31,86 +32,6 @@ namespace op { namespace contrib { namespace ethosu { -/*! \brief Attributes used by the Ethos(TM)-U NPU pooling operator */ -struct EthosuPoolingAttrs : public tvm::AttrsNode { - String pooling_type; - double ifm_scale; - int ifm_zero_point; - double ofm_scale; - int ofm_zero_point; - Array pool_shape; - IndexExpr ofm_channels; - Array strides; - Array padding; - String activation; - int clip_min; - int clip_max; - String rounding_mode; - String upscale; - String ifm_layout; - String ofm_layout; - - TVM_DECLARE_ATTRS(EthosuPoolingAttrs, "relay.attrs.EthosuPoolingAttrs") { - TVM_ATTR_FIELD(pooling_type) - .describe("The type of the pooling. 'AVG' - average pool, 'MAX' - max pool."); - TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ifm_zero_point) - .describe("The quantization zero point for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); - TVM_ATTR_FIELD(ofm_zero_point) - .describe("The quantization zero point for the Output Feature Map tensor."); - TVM_ATTR_FIELD(pool_shape) - .describe("The 2 dimensional pool shape as (pool_shape_height, pool_shape_width).") - .set_default(NullValue >()); - TVM_ATTR_FIELD(ofm_channels) - .describe(" The number of the Output Feature Map channels.") - .set_default(NullValue()); - TVM_ATTR_FIELD(strides) - .set_default(Array({1, 1})) - .describe("The 2 dimensional strides as (stride_height, stride_width)."); - TVM_ATTR_FIELD(padding) - .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).") - .set_default(Array({0, 0, 0, 0})); - TVM_ATTR_FIELD(activation) - .describe( - "The activation function to use. " - "'NONE' - no activation function. " - "'CLIP' - clip the output between clip_min and clip_max. " - "'TANH' - tanh activation function. " - "'SIGMOID' - sigmoid activation function. " - "'LUT' - use a look-up table to perform the activation function.") - .set_default("NONE"); - TVM_ATTR_FIELD(clip_min) - .describe("The minimum clipping value if activation = 'CLIP'.") - .set_default(0); - TVM_ATTR_FIELD(clip_max) - .describe("The maximum clipping value if activation = 'CLIP'.") - .set_default(0); - TVM_ATTR_FIELD(rounding_mode) - .describe( - "The rounding mode to apply to the Output Feature Map tensor. " - "'TFL' - Tensorflow Lite rounding scheme. " - "'TRUNCATE' - Truncate towards zero." - "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") - .set_default("TFL"); - TVM_ATTR_FIELD(upscale) - .describe( - "The 2x2 upscaling mode to apply to the Input Feature Map tensor. " - "'NONE' - no upscaling. " - "'NEAREST' - upscale using nearest neighbour. " - "'ZEROS' - upscale using zeros.") - .set_default("NONE"); - TVM_ATTR_FIELD(ifm_layout) - .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") - .set_default("NHWC"); - TVM_ATTR_FIELD(ofm_layout) - .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") - .set_default("NHWC"); - } -}; - -TVM_REGISTER_NODE_TYPE(EthosuPoolingAttrs); - bool EthosuPoolingRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { int ifm_index = 0; diff --git a/src/relay/op/contrib/ethosu/unary_elementwise.cc b/src/relay/op/contrib/ethosu/unary_elementwise.cc index a346f095283c..62a8a6a85ab3 100644 --- a/src/relay/op/contrib/ethosu/unary_elementwise.cc +++ b/src/relay/op/contrib/ethosu/unary_elementwise.cc @@ -24,6 +24,7 @@ #include #include "common.h" +#include "op_attrs.h" namespace tvm { namespace relay { @@ -31,67 +32,6 @@ namespace op { namespace contrib { namespace ethosu { -/*! \brief Attributes used by the NPU unary elementwise operator */ -struct EthosuUnaryElementwiseAttrs : public tvm::AttrsNode { - String operator_type; - double ifm_scale; - int ifm_zero_point; - double ofm_scale; - int ofm_zero_point; - IndexExpr ofm_channels; - String activation; - int clip_min; - int clip_max; - String rounding_mode; - String ifm_layout; - String ofm_layout; - - TVM_DECLARE_ATTRS(EthosuUnaryElementwiseAttrs, "relay.attrs.EthosuUnaryElementwiseAttrs") { - TVM_ATTR_FIELD(operator_type) - .describe( - "The type of the unary elementwise operator." - "'ABS'" - "'CLZ'"); - TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ifm_zero_point) - .describe("The quantization zero point for the Input Feature Map tensor."); - TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); - TVM_ATTR_FIELD(ofm_zero_point) - .describe("The quantization zero point for the Output Feature Map tensor."); - TVM_ATTR_FIELD(ofm_channels).describe("The number of OFM channels."); - TVM_ATTR_FIELD(activation) - .describe( - "The activation function to use. " - "'NONE' - no activation function. " - "'CLIP' - clip the output between clip_min and clip_max. " - "'TANH' - tanh activation function. " - "'SIGMOID' - sigmoid activation function. " - "'LUT' - use a look-up table to perform the activation function.") - .set_default("NONE"); - TVM_ATTR_FIELD(clip_min) - .describe("The minimum clipping value if activation = 'CLIP'.") - .set_default(0); - TVM_ATTR_FIELD(clip_max) - .describe("The maximum clipping value if activation = 'CLIP'.") - .set_default(0); - TVM_ATTR_FIELD(rounding_mode) - .describe( - "The rounding mode to apply to the Output Feature Map tensor. " - "'TFL' - Tensorflow Lite rounding scheme. " - "'TRUNCATE' - Truncate towards zero." - "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") - .set_default("TFL"); - TVM_ATTR_FIELD(ifm_layout) - .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") - .set_default("NHWC"); - TVM_ATTR_FIELD(ofm_layout) - .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.") - .set_default("NHWC"); - } -}; - -TVM_REGISTER_NODE_TYPE(EthosuUnaryElementwiseAttrs); - bool EthosuUnaryElementwiseRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { const int ifm_index = 0; diff --git a/tests/python/contrib/test_ethosu/test_identity_optimizer.py b/tests/python/contrib/test_ethosu/test_identity_optimizer.py new file mode 100644 index 000000000000..833b8d089dc8 --- /dev/null +++ b/tests/python/contrib/test_ethosu/test_identity_optimizer.py @@ -0,0 +1,321 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Test the identity optimizer pass that removes redundant identity +operations from the microNPU codegen. +""" +import pytest + +pytest.importorskip("ethosu.vela") + +import tensorflow as tf + +import tvm +from tvm import relay +from tvm.relay.op.contrib.ethosu import partition_for_ethosu +from tvm.relay.backend.contrib.ethosu.codegen import relay_to_tir_func +from tvm.relay.backend.contrib.ethosu.codegen import IdentityOptimizer + +from . import infra +from .test_codegen import _compare_tvm_with_tflite + + +def _optimize(expr, optimize=True): + """Create IRModule and run identity optimizer pass.""" + mod = tvm.IRModule.from_expr(expr) + mod = relay.transform.InferType()(mod) + if optimize: + mod = IdentityOptimizer()(mod) + entry = mod["main"] + return entry if isinstance(expr, relay.Function) else entry.body + + +def _assert_structural_equal(a, b): + """Check structural equality of two Relay expressions.""" + reason = ( + "Actual and expected relay functions are not equal. " + "IdentityOptimizer is not correctly removing redundant " + "identity operations." + ) + assert tvm.ir.structural_equal(a, b), reason + + +def test_simple_reshape_identity_removal(): + """Check identity is removed when there is a reshape in + the graph and a compute operation follows.""" + + def get_graph(get_expected=False): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = infra.make_ethosu_conv2d(x, 4, 4, (1, 1), (0, 0), (1, 1), (1, 1)) + x = relay.reshape(x, newshape=(1, 4, 4, 1)) + if not get_expected: + x = infra.make_ethosu_identity(x) + x = infra.make_ethosu_unary_elementwise(x, 1, "ABS") + return relay.Function(relay.analysis.free_vars(x), x) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(get_expected=True), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_simple_strided_slice_identity_removal(): + """Check identity is removed when there is a strided slice + in the graph and a compute operation follows.""" + + def get_graph(get_expected=False): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = infra.make_ethosu_pooling(x, "MAX", (1, 1), 4, (1, 1), (0, 0)) + x = relay.strided_slice(x, begin=[0, 0, 0, 0], end=[1, 2, 2, 2]) + if not get_expected: + x = infra.make_ethosu_identity(x) + x = infra.make_ethosu_pooling(x, "MAX", (1, 1), 2, (1, 1), (0, 0)) + return relay.Function(relay.analysis.free_vars(x), x) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(get_expected=True), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_no_identity(): + """Check the graph is not affected when there is no identity in the graph.""" + + def get_graph(): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = infra.make_ethosu_conv2d(x, 4, 4, (1, 1), (0, 0), (1, 1), (1, 1)) + x = infra.make_ethosu_pooling(x, "MAX", (1, 1), 4, (1, 1), (0, 0)) + x = infra.make_ethosu_depthwise_conv2d(x, 4, (1, 1), (0, 0), (1, 1), (1, 1)) + x = infra.make_ethosu_unary_elementwise(x, 4, "ABS") + return relay.Function(relay.analysis.free_vars(x), x) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_reshape_last(): + """Check that an identity as a leaf of the graph is not removed.""" + + def get_graph(): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = infra.make_ethosu_conv2d(x, 4, 4, (1, 1), (0, 0), (1, 1), (1, 1)) + x = relay.reshape(x, newshape=(1, 4, 4, 1)) + x = infra.make_ethosu_identity(x) + return relay.Function(relay.analysis.free_vars(x), x) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_requantize_identity_no_removal(): + """Check that an identity that actually performs a requantize isn't removed.""" + + def get_graph(): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = relay.reshape(x, newshape=(1, 1, 4, 4)) + x = infra.make_ethosu_identity( + x, ifm_scale=0.5, ifm_zero_point=1, ofm_scale=0.3, ofm_zero_point=2 + ) + x = infra.make_ethosu_unary_elementwise(x, 4, "ABS") + return relay.Function(relay.analysis.free_vars(x), x) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_activation_identity_no_removal(): + """Check thst an identity with an activation isn't removed.""" + + def get_graph(): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = relay.reshape(x, newshape=(1, 1, 4, 4)) + x = infra.make_ethosu_identity(x, activation="LUT") + x = infra.make_ethosu_unary_elementwise(x, 4, "ABS") + return relay.Function(relay.analysis.free_vars(x), x) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_multiple_output_identity(): + """Check that an identity is removed when it has multiple outputs.""" + + def get_graph(get_expected=False): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + if not get_expected: + x = infra.make_ethosu_identity(x) + y = infra.make_ethosu_unary_elementwise(x, 4, "ABS") + z = infra.make_ethosu_unary_elementwise(x, 4, "ABS") + out = relay.concatenate((y, z), axis=0) + return relay.Function(relay.analysis.free_vars(x), out) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(get_expected=True), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_many_output_identity(): + """Check an identity with many outputs. It cannot be removed due + to having a strided slice as output.""" + + def get_graph(get_expected=False): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = relay.reshape(x, newshape=(1, 1, 4, 4)) + identity = infra.make_ethosu_identity(x) + outputs = [] + for _ in range(4): + ifm = x if get_expected else identity + outputs.append(infra.make_ethosu_unary_elementwise(ifm, 4, "ABS")) + outputs.append(relay.strided_slice(identity, begin=(0, 0, 0, 0), end=(1, 1, 4, 4))) + out = relay.concatenate(outputs, axis=0) + return relay.Function(relay.analysis.free_vars(out), out) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(get_expected=True), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_identity_before_concatenate_no_removal(): + """Check that an identity isn't removed when the operator + following it is a concatenate operation.""" + + def get_graph(): + x = relay.var("x", shape=(1, 1, 4, 4), dtype="int8") + y = relay.var("y", shape=(1, 2, 2, 4), dtype="int8") + z = relay.var("z", shape=(1, 2, 2, 4), dtype="int8") + x = relay.reshape(x, newshape=(1, 2, 2, 4)) + y = relay.strided_slice(y, begin=(0, 0, 0, 0), end=(1, 2, 2, 4)) + x = infra.make_ethosu_identity(x) + y = infra.make_ethosu_identity(y) + out = relay.concatenate([x, y, z], axis=0) + return relay.Function(relay.analysis.free_vars(out), out) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_identity_removal_with_multiple_transform_ops(): + """Check that only an identity directly parent to a compute + operation is removed.""" + + def get_graph(get_expected=False): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = relay.strided_slice(x, begin=[0, 0, 0, 0], end=[1, 2, 2, 2]) + x = infra.make_ethosu_identity(x) + x = relay.reshape(x, newshape=(1, 1, 1, 8)) + if not get_expected: + x = infra.make_ethosu_identity(x) + x = infra.make_ethosu_unary_elementwise(x, 8, "ABS") + return relay.Function(relay.analysis.free_vars(x), x) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(get_expected=True), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_identity_removal_on_binary_elementwise(): + """Check identities before binary elementwise are removed correctly.""" + + def get_graph(get_expected=False): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + y = relay.var("y", shape=(1, 2, 2, 4), dtype="int8") + if not get_expected: + x = infra.make_ethosu_identity(x) + y = infra.make_ethosu_identity(y) + z = infra.make_ethosu_binary_elementwise(x, y, 4, 4, "ADD", "int8") + return relay.Function(relay.analysis.free_vars(z), z) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(get_expected=True), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_identity_single_removal_on_binary_elementwise(): + """Check that identity on the second input of the binary elementwise + operation is removed while the other input has no identity.""" + + def get_graph(get_expected=False): + x = relay.var("x", shape=(1, 4, 1, 4), dtype="int8") + y = relay.var("y", shape=(1, 2, 2, 4), dtype="int8") + y = relay.reshape(y, newshape=(1, 4, 1, 4)) + if not get_expected: + y = infra.make_ethosu_identity(y) + z = infra.make_ethosu_binary_elementwise(x, y, 4, 4, "ADD", "int8") + return relay.Function(relay.analysis.free_vars(z), z) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(get_expected=True), optimize=False) + _assert_structural_equal(actual, expected) + + +def test_layout_optimizer_runs_in_compilation_pipeline(): + """Checks that the identity optimization pass is run as part of the NPU compilation pipeline.""" + + def get_graph(): + x = relay.var("x", shape=(1, 4, 4, 4), dtype="int8") + x = relay.reshape(x, newshape=(1, 1, 16, 4)) + x = relay.nn.max_pool2d(x, layout="NHWC") + func = relay.Function(relay.analysis.free_vars(x), x) + return tvm.IRModule.from_expr(func) + + mod = get_graph() + mod = partition_for_ethosu(mod) + + external_gv_name = mod["main"].body.op.name_hint + external_func = mod[external_gv_name] + prim_func = relay_to_tir_func(external_func) + + # Check for hints in the TIR prim func that the identity optimization pass + # has ran. There should not be an identity in the prim func. + assert prim_func.body.value.args[0] == "ethosu_pooling" + + +def test_same_output(): + """Check that the output remains the same when the identity + optimizer pass removes some identities inserted during legalization.""" + ifm_shapes = [(1, 1, 25, 8), (1, 5, 5, 8)] + + @tf.function + def model(x, y): + x = tf.reshape(x, (1, 5, 5, 8)) + z = tf.add(x, y) + z = tf.reshape(z, (1, 1, 25, 8)) + return z + + _compare_tvm_with_tflite(model, ifm_shapes, "ethos-u55-256") + + +def test_multi_output_identity_has_same_output(): + """Check that the output remains the same with an identity with + multiple outputs.""" + ifm_shape = (1, 1, 64, 16) + + @tf.function + def model(x): + x = tf.reshape(x, (1, 8, 8, 16)) + outputs = [] + for _ in range(4): + outputs.append(tf.nn.max_pool2d(x, 1, 1, "VALID")) + outputs.append(tf.reshape(x, (1, 8, 8, 16))) + y = tf.concat(outputs, axis=0) + return y + + _compare_tvm_with_tflite(model, [ifm_shape], "ethos-u55-256")