From 2cb769560bfb1a67cc37c18895023b76af61d68e Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Fri, 25 Mar 2022 07:05:45 +0000 Subject: [PATCH] [microNPU] Remove identity operations between non-compute operations (#10411) Builds upon the work in #10254 to remove identity operations sandwiched between two non-compute operations (reshape/strided slice - concatenate is handled differently), under certain conditions. Specifically, an identity operation is not removed when the dimensionality between the two non-compute operations is reduced, due to non-congruent values being accessed incorrectly. For example, ``` strided_slice(dims=4) -> identity -> reshape(dims=4) ``` becomes... ``` strided_slice -> reshape ``` but, ``` strided_slice(dims=4) -> identity -> reshape(dims=2) ``` remains as... ``` strided_slice -> identity -> reshape ``` Change-Id: Ie28ba384fcb3230d6f4651c0c19e2b9526ebcc42 --- .../relay/backend/contrib/ethosu/codegen.py | 1 + src/relay/backend/contrib/ethosu/codegen.cc | 61 ++++++++++++++++--- .../test_ethosu/test_identity_optimizer.py | 47 ++++++++++++-- 3 files changed, 97 insertions(+), 12 deletions(-) diff --git a/python/tvm/relay/backend/contrib/ethosu/codegen.py b/python/tvm/relay/backend/contrib/ethosu/codegen.py index e8b5cc23aff2..d06622e646ce 100644 --- a/python/tvm/relay/backend/contrib/ethosu/codegen.py +++ b/python/tvm/relay/backend/contrib/ethosu/codegen.py @@ -347,6 +347,7 @@ def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule: mod = OutlineCompilerFunctions("ethos-u")(mod) mod = LegalizeEthosU()(mod) mod = LUTsOptimizer()(mod) + mod = relay.transform.InferType()(mod) mod = IdentityOptimizer()(mod) mod = LayoutOptimizer()(mod) mod = relay.transform.InferType()(mod) diff --git a/src/relay/backend/contrib/ethosu/codegen.cc b/src/relay/backend/contrib/ethosu/codegen.cc index 7044669d23b5..dfcf54f7b76c 100644 --- a/src/relay/backend/contrib/ethosu/codegen.cc +++ b/src/relay/backend/contrib/ethosu/codegen.cc @@ -115,13 +115,13 @@ class RemoveRedundantIdentities : public MixedModeMutator { Expr Rewrite_(const CallNode* pre, const Expr& post) override { Call call = Downcast(post); - // only consider rewrite if current op is an NPU compute op. + // don't consider rewrite if current op is an identity or concatenate. if (!call->op->IsInstance()) { return post; } const auto* op = call->op.as(); std::string op_name = op->name; - if (op_name.substr(0, 15) != "contrib.ethosu." || op_name == "contrib.ethosu.identity") { + if (op_name == "contrib.ethosu.identity" || op_name == "concatenate") { return post; } @@ -129,10 +129,19 @@ class RemoveRedundantIdentities : public MixedModeMutator { bool needs_rewrite = false; Array new_args; for (const auto& arg : call->args) { - if (const auto* parent_callnode = arg.as()) { + Expr current_arg = arg; + + // expand tuple to get parent op if we run into one - nested tuples are not supported. + if (const auto* tuple_get_item = arg.as()) { + const auto* tuple = tuple_get_item->tuple.as(); + current_arg = tuple->fields[tuple_get_item->index]; + } + + if (const auto* parent_callnode = current_arg.as()) { if (const auto* parent_op = parent_callnode->op.as()) { Call parent_call = GetRef(parent_callnode); - if (parent_op->name == "contrib.ethosu.identity" && IdentityDoesNothing(parent_call)) { + if (parent_op->name == "contrib.ethosu.identity" && IdentityDoesNothing(parent_call) && + CheckIdentityBetweenTransformOperations(call, parent_call)) { needs_rewrite = true; new_args.push_back(parent_call->args[0]); continue; @@ -143,7 +152,10 @@ class RemoveRedundantIdentities : public MixedModeMutator { } if (needs_rewrite) { - return Call(call->op, new_args, call->attrs, call->type_args); + Call new_call = Call(call->op, new_args, call->attrs, call->type_args); + // since we are only removing an identity, we know the type information has not changed + new_call->checked_type_ = call->checked_type_; + return new_call; } return post; } @@ -156,6 +168,41 @@ class RemoveRedundantIdentities : public MixedModeMutator { bool has_no_activation = attrs->activation == "NONE"; return does_not_requantize && has_no_activation; } + + bool CheckIdentityBetweenTransformOperations(const Call& call, const Call& identity_call) { + const auto* op = call->op.as(); + std::vector nc_ops = {"reshape", "strided_slice"}; + + if (op && (std::find(nc_ops.begin(), nc_ops.end(), op->name) != nc_ops.end())) { + // check if the parent to identity operation is also a non-compute operation, + // if it isn't we can safely remove the identity in question by returning true. + const auto* identity_arg = identity_call->args[0].as(); + if (!identity_arg) { + return true; + } + const auto* identity_arg_op = identity_arg->op.as(); + if (!identity_arg_op || + !(std::find(nc_ops.begin(), nc_ops.end(), identity_arg_op->name) != nc_ops.end())) { + return true; + } + + const auto* call_tt = call->checked_type_.as(); + const auto* identity_arg_tt = identity_arg->checked_type_.as(); + CHECK(call_tt && identity_arg_tt) + << "InferType should be run before RemoveRedundantIdentities"; + + // we can only remove the identity operation if the second non-compute operation + // in the sequence does not reduce the dimensionality of the output to the first + // non-compute operation. Doing so could lead to data being accessed incorrectly + // by the subsequent compute operation due to the reduction in dimensionality. + size_t first_transform_op_dims = identity_arg_tt->shape.size(); + size_t second_transform_op_dims = call_tt->shape.size(); + if (second_transform_op_dims < first_transform_op_dims) { + return false; + } + } + return true; + } }; /*! @@ -177,8 +224,8 @@ tvm::transform::Pass IdentityOptimizer() { } return mod; }; - return tvm::transform::CreateModulePass(pass_func, 0, - "relay.backend.contrib.ethos-u.IdentityOptimizer", {}); + return tvm::transform::CreateModulePass( + pass_func, 0, "relay.backend.contrib.ethos-u.IdentityOptimizer", {"InferType"}); } TVM_REGISTER_GLOBAL("relay.ext.ethos-u.IdentityOptimizer").set_body_typed(IdentityOptimizer); diff --git a/tests/python/contrib/test_ethosu/test_identity_optimizer.py b/tests/python/contrib/test_ethosu/test_identity_optimizer.py index a2bb4f465a8a..8a42fe85991f 100644 --- a/tests/python/contrib/test_ethosu/test_identity_optimizer.py +++ b/tests/python/contrib/test_ethosu/test_identity_optimizer.py @@ -179,12 +179,14 @@ def test_many_output_identity(): def get_graph(get_expected=False): x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") x = relay.reshape(x, newshape=(1, 1, 4, 4)) - identity = infra.make_ethosu_identity(x) + if not get_expected: + x = infra.make_ethosu_identity(x) outputs = [] for _ in range(4): - ifm = x if get_expected else identity - outputs.append(infra.make_ethosu_unary_elementwise(ifm, 4, "ABS")) - outputs.append(relay.strided_slice(identity, begin=(0, 0, 0, 0), end=(1, 1, 4, 4))) + outputs.append(infra.make_ethosu_unary_elementwise(x, 4, "ABS")) + ss = relay.strided_slice(x, begin=(0, 0, 0, 0), end=(1, 1, 4, 4)) + identity_2 = infra.make_ethosu_identity(ss) + outputs.append(identity_2) out = relay.concatenate(outputs, axis=0) return relay.Function(relay.analysis.free_vars(out), out) @@ -220,7 +222,8 @@ def test_identity_removal_with_multiple_transform_ops(): def get_graph(get_expected=False): x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") x = relay.strided_slice(x, begin=[0, 0, 0, 0], end=[1, 2, 2, 2]) - x = infra.make_ethosu_identity(x) + if not get_expected: + x = infra.make_ethosu_identity(x) x = relay.reshape(x, newshape=(1, 1, 1, 8)) if not get_expected: x = infra.make_ethosu_identity(x) @@ -267,6 +270,25 @@ def get_graph(get_expected=False): _assert_structural_equal(actual, expected) +def test_multiple_transform_ops_with_reduction_in_dimensionality(): + """Removal of an identity operation between two transform operations is usually okay. + However, if the dimensionality of the input is reduced by the second transformation + operation, it can lead to an output mismatch. Checking that the pass doesn't remove + an identity given this case.""" + + def get_graph(): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = relay.strided_slice(x, begin=(0, 0, 0, 0), end=(1, 2, 2, 2)) + x = infra.make_ethosu_identity(x) + x = relay.reshape(x, newshape=(1, 2, 4)) + x = infra.make_ethosu_identity(x) + return relay.Function(relay.analysis.free_vars(x), x) + + actual = _optimize(get_graph()) + expected = _optimize(get_graph(), optimize=False) + _assert_structural_equal(actual, expected) + + def test_identity_optimizer_runs_in_compilation_pipeline(): """Checks that the identity optimization pass is run as part of the NPU compilation pipeline.""" @@ -320,3 +342,18 @@ def model(x): return y _compare_tvm_with_tflite(model, [ifm_shape], "ethos-u55-256") + + +def test_multiple_transform_ops_same_output(): + """Check case of identity removal between transform ops and + then without, making sure they have the same output.""" + ifm_shape = (1, 2, 2, 4) + + @tf.function + def model(x): + x = tf.reshape(x, (1, 1, 4, 4)) + x = tf.slice(x, (0, 0, 0, 0), (1, 1, 4, 3)) + x = tf.reshape(x, (12,)) + return x + + _compare_tvm_with_tflite(model, [ifm_shape], "ethos-u55-256")