Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[microNPU] Introduce a pass to remove redundant identity operations #10254

Merged
merged 5 commits into from
Mar 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
from tvm.relay.backend.contrib.ethosu.op import op_attrs
from tvm.relay.backend.contrib.ethosu import op

from . import _ffi_api


class OptimizeLUTs(ExprMutator):
"""A pass to merge an identity operator with a LUT based activation function with
Expand Down Expand Up @@ -299,6 +301,17 @@ def __call__(self, *args, **kwargs):
pass


def IdentityOptimizer(): # pylint: disable=invalid-name
"""Pass that removes redundant identities

Return
------
Pass
The module pass.
"""
return _ffi_api.IdentityOptimizer()


@tvm._ffi.register_func("relay.ext.ethos-u.constant_updater")
def constant_updater(expr, symbol): # pylint: disable=unused-argument
"""
Expand Down Expand Up @@ -330,6 +343,7 @@ def relay_to_tir_func(ext_func: relay.Function) -> tvm.tir.PrimFunc:
mod["main"] = ext_func
mod = LegalizeEthosU()(mod)
mod = LUTsOptimizer()(mod)
mod = IdentityOptimizer()(mod)
mod = LayoutOptimizer()(mod)
mod = relay.transform.InferType()(mod)
# We are currently using copy_constants scheduler In the long run,
Expand Down
76 changes: 76 additions & 0 deletions src/relay/backend/contrib/ethosu/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <utility>
#include <vector>

#include "../../../op/contrib/ethosu/op_attrs.h"
#include "../../../op/make_op.h"
#include "utils.h"

Expand Down Expand Up @@ -100,6 +101,81 @@ tvm::transform::Pass RelayToTIR() {
return tvm::transform::CreateModulePass(pass_func, 0, "relay.contrib.ethos-u.RelayToTIR", {});
}

/*!
* \brief This mutator removes identity operations that are not necessary. Specifically, an
* identity operation can be removed when it is immediately followed by an NPU compute
* operation.
*/
class RemoveRedundantIdentities : public MixedModeMutator {
public:
Expr Rewrite_(const CallNode* pre, const Expr& post) override {
Call call = Downcast<Call>(post);

// only consider rewrite if current op is an NPU compute op.
if (!call->op->IsInstance<OpNode>()) {
return post;
}
const auto* op = call->op.as<OpNode>();
std::string op_name = op->name;
if (op_name.substr(0, 15) != "contrib.ethosu." || op_name == "contrib.ethosu.identity") {
return post;
}

// check if we can rewrite parent identity operations to current call.
bool needs_rewrite = false;
Array<Expr> new_args;
for (const auto& arg : call->args) {
if (const auto* parent_callnode = arg.as<CallNode>()) {
if (const auto* parent_op = parent_callnode->op.as<OpNode>()) {
Call parent_call = GetRef<Call>(parent_callnode);
if (parent_op->name == "contrib.ethosu.identity" && IdentityDoesNothing(parent_call)) {
needs_rewrite = true;
new_args.push_back(parent_call->args[0]);
continue;
}
}
}
new_args.push_back(arg);
}

if (needs_rewrite) {
return Call(call->op, new_args, call->attrs, call->type_args);
}
return post;
}

private:
bool IdentityDoesNothing(const Call& call) {
const auto* attrs = call->attrs.as<tvm::relay::op::contrib::ethosu::EthosuIdentityAttrs>();
bool does_not_requantize = attrs->ifm_scale == 1.0 && attrs->ifm_zero_point == 0 &&
attrs->ofm_scale == 1.0 && attrs->ofm_zero_point == 0;
bool has_no_activation = attrs->activation == "NONE";
return does_not_requantize && has_no_activation;
}
};

/*!
* \brief A pass to remove redundant identity operations.
*/
tvm::transform::Pass IdentityOptimizer() {
runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
[=](IRModule mod, transform::PassContext ctx) {
for (auto gv : mod->GetGlobalVars()) {
Function main_func = Downcast<Function>(mod->Lookup(gv));
auto new_main_body = RemoveRedundantIdentities().VisitExpr(main_func->body);
if (!new_main_body.same_as(main_func->body)) {
Function new_main_func = WithFields(main_func, main_func->params, new_main_body);
mod->Update(gv, new_main_func);
}
}
return mod;
};
return tvm::transform::CreateModulePass(pass_func, 0,
"relay.backend.contrib.ethos-u.IdentityOptimizer", {});
}

TVM_REGISTER_GLOBAL("relay.ext.ethos-u.IdentityOptimizer").set_body_typed(IdentityOptimizer);

/*!
* \brief This function lowers the IRModule with PrimFunc
* with the target of the microNPU to a C-source runtime module
Expand Down
98 changes: 1 addition & 97 deletions src/relay/op/contrib/ethosu/binary_elementwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,110 +24,14 @@
#include <tvm/relay/op.h>

#include "common.h"
#include "op_attrs.h"

namespace tvm {
namespace relay {
namespace op {
namespace contrib {
namespace ethosu {

/*! \brief Attributes used by the Ethos(TM)-U NPU binary elementwise operators */
struct EthosuBinaryElementwiseAttrs : public tvm::AttrsNode<EthosuBinaryElementwiseAttrs> {
String operator_type;
double ifm_scale;
int ifm_zero_point;
double ifm2_scale;
int ifm2_zero_point;
double ofm_scale;
int ofm_zero_point;
IndexExpr ifm_channels;
IndexExpr ifm2_channels;
bool reversed_operands;
String activation;
int clip_min;
int clip_max;
String rounding_mode;
String ifm_layout;
String ifm2_layout;
String ofm_layout;
String ofm_dtype;

TVM_DECLARE_ATTRS(EthosuBinaryElementwiseAttrs, "relay.attrs.EthosuBinaryElementwiseAttrs") {
TVM_ATTR_FIELD(operator_type)
.describe(
"The type of the binary elementwise operator."
"'ADD'"
"'SUB'"
"'MUL'"
"'MIN'"
"'MAX'"
"'SHR'"
"'SHL'");
TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
TVM_ATTR_FIELD(ifm_zero_point)
.describe("The quantization zero point for the Input Feature Map tensor.");
TVM_ATTR_FIELD(ifm2_scale)
.describe("The quantization scale for the Input Feature Map tensor 2.");
TVM_ATTR_FIELD(ifm2_zero_point)
.describe("The quantization zero point for the Input Feature Map tensor 2.");
TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
TVM_ATTR_FIELD(ofm_zero_point)
.describe("The quantization zero point for the Output Feature Map tensor.");
TVM_ATTR_FIELD(ifm_channels).describe("The number of the Input Feature Map channels.");
TVM_ATTR_FIELD(ifm2_channels).describe("The number of the Input Feature Map 2 channels.");
TVM_ATTR_FIELD(reversed_operands)
.describe("True if IFM2 is the first operand and IFM is the second operand.")
.set_default(false);
TVM_ATTR_FIELD(activation)
.describe(
"The activation function to use. "
"'NONE' - no activation function. "
"'CLIP' - clip the output between clip_min and clip_max. "
"'TANH' - tanh activation function. "
"'SIGMOID' - sigmoid activation function. "
"'LUT' - use a look-up table to perform the activation function."
"Available activations for activation type:"
"{int8, uint8}: 'NONE', 'CLIP', 'TANH', 'SIGMOID', 'LUT'"
"{int32}: 'NONE'")
.set_default("NONE");
TVM_ATTR_FIELD(clip_min)
.describe("The minimum clipping value if activation = 'CLIP'.")
.set_default(0);
TVM_ATTR_FIELD(clip_max)
.describe("The maximum clipping value if activation = 'CLIP'.")
.set_default(0);
TVM_ATTR_FIELD(rounding_mode)
.describe(
"The rounding mode to apply to the Output Feature Map tensor. "
"'TFL' - Tensorflow Lite rounding scheme. "
"'TRUNCATE' - Truncate towards zero."
"'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
.set_default("TFL");
TVM_ATTR_FIELD(ifm_layout)
.describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
.set_default("NHWC");
TVM_ATTR_FIELD(ifm2_layout)
.describe("The layout of the Input Feature Map tensor 2. Can be 'NHWC' or 'NHCWB16'.")
.set_default("NHWC");
TVM_ATTR_FIELD(ofm_layout)
.describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
.set_default("NHWC");
TVM_ATTR_FIELD(ofm_dtype).describe(
"The Output Feature Map tensor type."
"MUL, ADD, SUB {IFM}->{OFM}:"
" {uint8, int8 int32} -> {uint8, int8, int32}, any pairing"
"MAX, MIN:"
" IFM and OFM must be of the same type, one of:"
" {int8, uint8}"
"SHR {IFM}->{OFM}:"
" {int32}->{int8, uint8, int32}, any pairing"
"SHL:"
" {int32}->{int32} only");
}
};

TVM_REGISTER_NODE_TYPE(EthosuBinaryElementwiseAttrs);

bool EthosuBinaryElementwiseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
const TypeReporter& reporter) {
const int ifm_index = 0;
Expand Down
85 changes: 1 addition & 84 deletions src/relay/op/contrib/ethosu/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,97 +31,14 @@

#include "../../../qnn/utils.h"
#include "common.h"
#include "op_attrs.h"

namespace tvm {
namespace relay {
namespace op {
namespace contrib {
namespace ethosu {

/*! \brief Attributes used by the Ethos(TM)-U NPU convolution operator */
struct EthosuConv2DAttrs : public tvm::AttrsNode<EthosuConv2DAttrs> {
double ifm_scale;
int ifm_zero_point;
int weight_zero_point;
double ofm_scale;
int ofm_zero_point;
Array<IndexExpr> kernel_shape;
IndexExpr ofm_channels;
Array<IndexExpr> strides;
Array<IndexExpr> padding;
Array<IndexExpr> dilation;
String activation;
int clip_min;
int clip_max;
String rounding_mode;
String upscale;
String ifm_layout;
String ofm_layout;

TVM_DECLARE_ATTRS(EthosuConv2DAttrs, "relay.attrs.EthosuConv2DAttrs") {
TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
TVM_ATTR_FIELD(ifm_zero_point)
.describe("The quantization zero point for the Input Feature Map tensor.");
TVM_ATTR_FIELD(weight_zero_point)
.describe("The quantization zero point for the weight tensor.");
TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
TVM_ATTR_FIELD(ofm_zero_point)
.describe("The quantization zero point for the Output Feature Map tensor.");
TVM_ATTR_FIELD(kernel_shape)
.describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).")
.set_default(NullValue<Array<IndexExpr>>());
TVM_ATTR_FIELD(ofm_channels)
.describe("The number of the Output Feature Map channels.")
.set_default(NullValue<IndexExpr>());
TVM_ATTR_FIELD(strides)
.set_default(Array<IndexExpr>({1, 1}))
.describe("The 2 dimensional strides as (stride_height, stride_width).");
TVM_ATTR_FIELD(padding)
.set_default(Array<IndexExpr>({0, 0, 0, 0}))
.describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).");
TVM_ATTR_FIELD(dilation)
.set_default(Array<IndexExpr>({1, 1}))
.describe("The 2 dimensional dilation as (dilation_height, dilation_width).");
TVM_ATTR_FIELD(activation)
.describe(
"The activation function to use. "
"'NONE' - no activation function. "
"'CLIP' - clip the output between clip_min and clip_max. "
"'TANH' - tanh activation function. "
"'SIGMOID' - sigmoid activation function. "
"'LUT' - use a look-up table to perform the activation function.")
.set_default("NONE");
TVM_ATTR_FIELD(clip_min)
.describe("The minimum clipping value if activation = 'CLIP'.")
.set_default(0);
TVM_ATTR_FIELD(clip_max)
.describe("The maximum clipping value if activation = 'CLIP'.")
.set_default(0);
TVM_ATTR_FIELD(rounding_mode)
.describe(
"The rounding mode to apply to the Output Feature Map tensor. "
"'TFL' - Tensorflow Lite rounding scheme. "
"'TRUNCATE' - Truncate towards zero."
"'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
.set_default("TFL");
TVM_ATTR_FIELD(upscale)
.describe(
"The 2x2 upscaling mode to apply to the Input Feature Map tensor. "
"'NONE' - no upscaling. "
"'NEAREST' - upscale using nearest neighbour. "
"'ZEROS' - upscale using zeros.")
.set_default("NONE");
TVM_ATTR_FIELD(ifm_layout)
.set_default("NHWC")
.describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.");
TVM_ATTR_FIELD(ofm_layout)
.set_default("NHWC")
.describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.");
}
};

TVM_REGISTER_NODE_TYPE(EthosuConv2DAttrs);

bool EthosuConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
const TypeReporter& reporter) {
CHECK_EQ(types.size(), 5);
Expand Down
Loading