Skip to content

Commit

Permalink
[microNPU] Introduce a pass to remove redundant identity operations (#…
Browse files Browse the repository at this point in the history
…10254)

* [microNPU] Introduce a pass to remove redundant identity operations

Introduces a  pass that aims to remove identity operations,
introduced during legalization, that are immediately followed by an NPU
compute operation e.g. Convolution.

Change-Id: Ia3b2c7bebf8cba1f827af8e3f3335677ba8f6371

* fix lint

Change-Id: Idf9341ce757b849f8819944dab2fb3b1496a2caf

* Addressing comments

Changes in test_identity_optimizer.py:
* Fixed typo in docstring
* Removed print
* Fixed same output test to use correct input shape

Changes in codegen.cc:
* Remove unnecessary constructor

Change-Id: Ie4a053725110ce52d8be039ca1ce48084bc66545

* skip tests when required packages are not available

Change-Id: I0a88d92dd31ca3dd07a2a495f18c10a2ebf2fc9e

* support multiple output identities and add more tests

Change-Id: Ib54031fe1c70159728876a23f96b72adb2ea17b0
  • Loading branch information
lhutton1 authored Mar 8, 2022
1 parent acf8be7 commit 5e81389
Show file tree
Hide file tree
Showing 10 changed files with 897 additions and 438 deletions.
14 changes: 14 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
from tvm.relay.backend.contrib.ethosu.op import op_attrs
from tvm.relay.backend.contrib.ethosu import op

from . import _ffi_api


class OptimizeLUTs(ExprMutator):
"""A pass to merge an identity operator with a LUT based activation function with
Expand Down Expand Up @@ -299,6 +301,17 @@ def __call__(self, *args, **kwargs):
pass


def IdentityOptimizer(): # pylint: disable=invalid-name
"""Pass that removes redundant identities
Return
------
Pass
The module pass.
"""
return _ffi_api.IdentityOptimizer()


@tvm._ffi.register_func("relay.ext.ethos-u.constant_updater")
def constant_updater(expr, symbol): # pylint: disable=unused-argument
"""
Expand Down Expand Up @@ -330,6 +343,7 @@ def relay_to_tir_func(ext_func: relay.Function) -> tvm.tir.PrimFunc:
mod["main"] = ext_func
mod = LegalizeEthosU()(mod)
mod = LUTsOptimizer()(mod)
mod = IdentityOptimizer()(mod)
mod = LayoutOptimizer()(mod)
mod = relay.transform.InferType()(mod)
# We are currently using copy_constants scheduler In the long run,
Expand Down
76 changes: 76 additions & 0 deletions src/relay/backend/contrib/ethosu/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <utility>
#include <vector>

#include "../../../op/contrib/ethosu/op_attrs.h"
#include "../../../op/make_op.h"
#include "utils.h"

Expand Down Expand Up @@ -100,6 +101,81 @@ tvm::transform::Pass RelayToTIR() {
return tvm::transform::CreateModulePass(pass_func, 0, "relay.contrib.ethos-u.RelayToTIR", {});
}

/*!
* \brief This mutator removes identity operations that are not necessary. Specifically, an
* identity operation can be removed when it is immediately followed by an NPU compute
* operation.
*/
class RemoveRedundantIdentities : public MixedModeMutator {
public:
Expr Rewrite_(const CallNode* pre, const Expr& post) override {
Call call = Downcast<Call>(post);

// only consider rewrite if current op is an NPU compute op.
if (!call->op->IsInstance<OpNode>()) {
return post;
}
const auto* op = call->op.as<OpNode>();
std::string op_name = op->name;
if (op_name.substr(0, 15) != "contrib.ethosu." || op_name == "contrib.ethosu.identity") {
return post;
}

// check if we can rewrite parent identity operations to current call.
bool needs_rewrite = false;
Array<Expr> new_args;
for (const auto& arg : call->args) {
if (const auto* parent_callnode = arg.as<CallNode>()) {
if (const auto* parent_op = parent_callnode->op.as<OpNode>()) {
Call parent_call = GetRef<Call>(parent_callnode);
if (parent_op->name == "contrib.ethosu.identity" && IdentityDoesNothing(parent_call)) {
needs_rewrite = true;
new_args.push_back(parent_call->args[0]);
continue;
}
}
}
new_args.push_back(arg);
}

if (needs_rewrite) {
return Call(call->op, new_args, call->attrs, call->type_args);
}
return post;
}

private:
bool IdentityDoesNothing(const Call& call) {
const auto* attrs = call->attrs.as<tvm::relay::op::contrib::ethosu::EthosuIdentityAttrs>();
bool does_not_requantize = attrs->ifm_scale == 1.0 && attrs->ifm_zero_point == 0 &&
attrs->ofm_scale == 1.0 && attrs->ofm_zero_point == 0;
bool has_no_activation = attrs->activation == "NONE";
return does_not_requantize && has_no_activation;
}
};

/*!
* \brief A pass to remove redundant identity operations.
*/
tvm::transform::Pass IdentityOptimizer() {
runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
[=](IRModule mod, transform::PassContext ctx) {
for (auto gv : mod->GetGlobalVars()) {
Function main_func = Downcast<Function>(mod->Lookup(gv));
auto new_main_body = RemoveRedundantIdentities().VisitExpr(main_func->body);
if (!new_main_body.same_as(main_func->body)) {
Function new_main_func = WithFields(main_func, main_func->params, new_main_body);
mod->Update(gv, new_main_func);
}
}
return mod;
};
return tvm::transform::CreateModulePass(pass_func, 0,
"relay.backend.contrib.ethos-u.IdentityOptimizer", {});
}

TVM_REGISTER_GLOBAL("relay.ext.ethos-u.IdentityOptimizer").set_body_typed(IdentityOptimizer);

/*!
* \brief This function lowers the IRModule with PrimFunc
* with the target of the microNPU to a C-source runtime module
Expand Down
98 changes: 1 addition & 97 deletions src/relay/op/contrib/ethosu/binary_elementwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,110 +24,14 @@
#include <tvm/relay/op.h>

#include "common.h"
#include "op_attrs.h"

namespace tvm {
namespace relay {
namespace op {
namespace contrib {
namespace ethosu {

/*! \brief Attributes used by the Ethos(TM)-U NPU binary elementwise operators */
struct EthosuBinaryElementwiseAttrs : public tvm::AttrsNode<EthosuBinaryElementwiseAttrs> {
String operator_type;
double ifm_scale;
int ifm_zero_point;
double ifm2_scale;
int ifm2_zero_point;
double ofm_scale;
int ofm_zero_point;
IndexExpr ifm_channels;
IndexExpr ifm2_channels;
bool reversed_operands;
String activation;
int clip_min;
int clip_max;
String rounding_mode;
String ifm_layout;
String ifm2_layout;
String ofm_layout;
String ofm_dtype;

TVM_DECLARE_ATTRS(EthosuBinaryElementwiseAttrs, "relay.attrs.EthosuBinaryElementwiseAttrs") {
TVM_ATTR_FIELD(operator_type)
.describe(
"The type of the binary elementwise operator."
"'ADD'"
"'SUB'"
"'MUL'"
"'MIN'"
"'MAX'"
"'SHR'"
"'SHL'");
TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
TVM_ATTR_FIELD(ifm_zero_point)
.describe("The quantization zero point for the Input Feature Map tensor.");
TVM_ATTR_FIELD(ifm2_scale)
.describe("The quantization scale for the Input Feature Map tensor 2.");
TVM_ATTR_FIELD(ifm2_zero_point)
.describe("The quantization zero point for the Input Feature Map tensor 2.");
TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
TVM_ATTR_FIELD(ofm_zero_point)
.describe("The quantization zero point for the Output Feature Map tensor.");
TVM_ATTR_FIELD(ifm_channels).describe("The number of the Input Feature Map channels.");
TVM_ATTR_FIELD(ifm2_channels).describe("The number of the Input Feature Map 2 channels.");
TVM_ATTR_FIELD(reversed_operands)
.describe("True if IFM2 is the first operand and IFM is the second operand.")
.set_default(false);
TVM_ATTR_FIELD(activation)
.describe(
"The activation function to use. "
"'NONE' - no activation function. "
"'CLIP' - clip the output between clip_min and clip_max. "
"'TANH' - tanh activation function. "
"'SIGMOID' - sigmoid activation function. "
"'LUT' - use a look-up table to perform the activation function."
"Available activations for activation type:"
"{int8, uint8}: 'NONE', 'CLIP', 'TANH', 'SIGMOID', 'LUT'"
"{int32}: 'NONE'")
.set_default("NONE");
TVM_ATTR_FIELD(clip_min)
.describe("The minimum clipping value if activation = 'CLIP'.")
.set_default(0);
TVM_ATTR_FIELD(clip_max)
.describe("The maximum clipping value if activation = 'CLIP'.")
.set_default(0);
TVM_ATTR_FIELD(rounding_mode)
.describe(
"The rounding mode to apply to the Output Feature Map tensor. "
"'TFL' - Tensorflow Lite rounding scheme. "
"'TRUNCATE' - Truncate towards zero."
"'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
.set_default("TFL");
TVM_ATTR_FIELD(ifm_layout)
.describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
.set_default("NHWC");
TVM_ATTR_FIELD(ifm2_layout)
.describe("The layout of the Input Feature Map tensor 2. Can be 'NHWC' or 'NHCWB16'.")
.set_default("NHWC");
TVM_ATTR_FIELD(ofm_layout)
.describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
.set_default("NHWC");
TVM_ATTR_FIELD(ofm_dtype).describe(
"The Output Feature Map tensor type."
"MUL, ADD, SUB {IFM}->{OFM}:"
" {uint8, int8 int32} -> {uint8, int8, int32}, any pairing"
"MAX, MIN:"
" IFM and OFM must be of the same type, one of:"
" {int8, uint8}"
"SHR {IFM}->{OFM}:"
" {int32}->{int8, uint8, int32}, any pairing"
"SHL:"
" {int32}->{int32} only");
}
};

TVM_REGISTER_NODE_TYPE(EthosuBinaryElementwiseAttrs);

bool EthosuBinaryElementwiseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
const TypeReporter& reporter) {
const int ifm_index = 0;
Expand Down
85 changes: 1 addition & 84 deletions src/relay/op/contrib/ethosu/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,97 +31,14 @@

#include "../../../qnn/utils.h"
#include "common.h"
#include "op_attrs.h"

namespace tvm {
namespace relay {
namespace op {
namespace contrib {
namespace ethosu {

/*! \brief Attributes used by the Ethos(TM)-U NPU convolution operator */
struct EthosuConv2DAttrs : public tvm::AttrsNode<EthosuConv2DAttrs> {
double ifm_scale;
int ifm_zero_point;
int weight_zero_point;
double ofm_scale;
int ofm_zero_point;
Array<IndexExpr> kernel_shape;
IndexExpr ofm_channels;
Array<IndexExpr> strides;
Array<IndexExpr> padding;
Array<IndexExpr> dilation;
String activation;
int clip_min;
int clip_max;
String rounding_mode;
String upscale;
String ifm_layout;
String ofm_layout;

TVM_DECLARE_ATTRS(EthosuConv2DAttrs, "relay.attrs.EthosuConv2DAttrs") {
TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
TVM_ATTR_FIELD(ifm_zero_point)
.describe("The quantization zero point for the Input Feature Map tensor.");
TVM_ATTR_FIELD(weight_zero_point)
.describe("The quantization zero point for the weight tensor.");
TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
TVM_ATTR_FIELD(ofm_zero_point)
.describe("The quantization zero point for the Output Feature Map tensor.");
TVM_ATTR_FIELD(kernel_shape)
.describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).")
.set_default(NullValue<Array<IndexExpr>>());
TVM_ATTR_FIELD(ofm_channels)
.describe("The number of the Output Feature Map channels.")
.set_default(NullValue<IndexExpr>());
TVM_ATTR_FIELD(strides)
.set_default(Array<IndexExpr>({1, 1}))
.describe("The 2 dimensional strides as (stride_height, stride_width).");
TVM_ATTR_FIELD(padding)
.set_default(Array<IndexExpr>({0, 0, 0, 0}))
.describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).");
TVM_ATTR_FIELD(dilation)
.set_default(Array<IndexExpr>({1, 1}))
.describe("The 2 dimensional dilation as (dilation_height, dilation_width).");
TVM_ATTR_FIELD(activation)
.describe(
"The activation function to use. "
"'NONE' - no activation function. "
"'CLIP' - clip the output between clip_min and clip_max. "
"'TANH' - tanh activation function. "
"'SIGMOID' - sigmoid activation function. "
"'LUT' - use a look-up table to perform the activation function.")
.set_default("NONE");
TVM_ATTR_FIELD(clip_min)
.describe("The minimum clipping value if activation = 'CLIP'.")
.set_default(0);
TVM_ATTR_FIELD(clip_max)
.describe("The maximum clipping value if activation = 'CLIP'.")
.set_default(0);
TVM_ATTR_FIELD(rounding_mode)
.describe(
"The rounding mode to apply to the Output Feature Map tensor. "
"'TFL' - Tensorflow Lite rounding scheme. "
"'TRUNCATE' - Truncate towards zero."
"'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
.set_default("TFL");
TVM_ATTR_FIELD(upscale)
.describe(
"The 2x2 upscaling mode to apply to the Input Feature Map tensor. "
"'NONE' - no upscaling. "
"'NEAREST' - upscale using nearest neighbour. "
"'ZEROS' - upscale using zeros.")
.set_default("NONE");
TVM_ATTR_FIELD(ifm_layout)
.set_default("NHWC")
.describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.");
TVM_ATTR_FIELD(ofm_layout)
.set_default("NHWC")
.describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.");
}
};

TVM_REGISTER_NODE_TYPE(EthosuConv2DAttrs);

bool EthosuConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
const TypeReporter& reporter) {
CHECK_EQ(types.size(), 5);
Expand Down
Loading

0 comments on commit 5e81389

Please sign in to comment.