[ETHOSN] Add support for Requantize (apache#12384)

This commit adds support for the requantize operator for the Arm(R) Ethos(TM)-N NPU.
daobook · Nov 25, 2022 · 6525394 · 6525394
1 parent 66d519b
commit 6525394
Show file tree

Hide file tree

Showing 6 changed files with 183 additions and 0 deletions.
diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py
@@ -153,6 +153,12 @@ def qnn_leaky_relu_pattern():
         pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant())
         return pattern
 
+    def qnn_requantize_pattern():
+        pattern = is_op("qnn.requantize")(
+            wildcard(), is_constant(), is_constant(), is_constant(), is_constant()
+        )
+        return pattern
+
     def check_conv2d(extract):
         """Check if a conv2d is supported by Ethos-N."""
         if not ethosn_available():
@@ -202,6 +208,13 @@ def check_leaky_relu(extract):
 
         return support.leaky_relu(extract)
 
+    def check_requantize(extract):
+        """Check if requantize is supported."""
+        if not ethosn_available():
+            return False
+
+        return support.requantize(extract)
+
     return [
         ("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d),
         ("ethos-n.qnn_avg_pool2d", qnn_avg_pool2d_pattern(), check_avg_pool2d),
@@ -210,6 +223,7 @@ def check_leaky_relu(extract):
         ("ethos-n.qnn_mean", qnn_mean_pattern(), check_mean),
         ("ethos-n.qnn_tanh", qnn_tanh_pattern(), check_tanh),
         ("ethos-n.qnn_leaky_relu", qnn_leaky_relu_pattern(), check_leaky_relu),
+        ("ethos-n.qnn_requantize", qnn_requantize_pattern(), check_requantize),
     ]
 
 

diff --git a/src/relay/backend/contrib/ethosn/codegen.cc b/src/relay/backend/contrib/ethosn/codegen.cc
@@ -143,6 +143,10 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) {
     params.input_info = GetTensorInfo(tensor_table_, call);
     err = EthosnAPI::Relu(call, &params);
     tensor_table_[cn->args[0]] = {params.input_info};
+  } else if (IsEthosnFunc(call, "ethos-n.qnn_requantize")) {
+    RequantizeParams params;
+    err += EthosnAPI::Requantize(cn->op.as<FunctionNode>()->body, &params);
+    tensor_table_[cn->args[0]] = {params.input_info};
   } else {
     err = EthosnError("unknown operator");
   }
@@ -314,6 +318,9 @@ sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) {
   } else if (IsEthosnOp(call, "clip")) {
     if ((err = MakeReluLayer(call, &tensor))) ReportFatalError(call, err);
     return MakeOps(tensor);
+  } else if (IsEthosnFunc(call, "ethos-n.qnn_requantize")) {
+    if ((err = MakeRequantizeLayer(call, &tensor))) ReportFatalError(call, err);
+    return MakeOps(tensor);
   } else {
     ReportFatalError(call, EthosnError("unknown operator"));
     return {};
@@ -596,6 +603,24 @@ EthosnError ConstructNetworkVisitor::MakeReluLayer(const Call& call,
   return EthosnError();
 }
 
+EthosnError ConstructNetworkVisitor::MakeRequantizeLayer(const Call& call,
+                                                         sl::TensorAndId<sl::Operand>* out) {
+  RequantizeParams params;
+  params.input_info = GetTensorInfo(tensor_table_, call);
+  if (auto err = EthosnAPI::Requantize(call->op.as<FunctionNode>()->body, &params)) {
+    return err;
+  }
+
+  auto input = operand_table_[call->args[0]][0];
+
+  try {
+    *out = AddRequantize(network_, *input, params.requantize_info);
+  } catch (const sl::NotSupportedException& e) {
+    return EthosnError(e.what());
+  }
+  return EthosnError();
+}
+
 runtime::Module EthosnCompiler::CreateRuntimeModule(const ObjectRef& ref) {
   std::vector<runtime::ethosn::OrderedCompiledNetwork> cmms;
   if (ref->IsInstance<FunctionNode>()) {
@@ -918,6 +943,20 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.relu")
       err += EthosnError(reason);
     });
 
+TVM_REGISTER_GLOBAL("relay.ethos-n.support.requantize")
+    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
+      Call call = args[0];
+      RequantizeParams params;
+      auto err = EthosnAPI::Requantize(call, &params);
+      err += EthosnCompiler::SupportedSetup();
+      char reason[kReasonMaxLength];
+      reason[0] = '\0';
+      *rv = !err && EthosnCompiler::GetSupported()->IsRequantizeSupported(
+                        params.requantize_info, params.input_info, &params.output_info, reason,
+                        sizeof(reason));
+      err += EthosnError(reason);
+    });
+
 TVM_REGISTER_GLOBAL("relay.ethos-n.query").set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
 #if defined ETHOSN_HW
   *rv = true;

diff --git a/src/relay/backend/contrib/ethosn/codegen_ethosn.h b/src/relay/backend/contrib/ethosn/codegen_ethosn.h
@@ -212,6 +212,7 @@ class ConstructNetworkVisitor : public MixedModeVisitor, private ErrorReportingP
   EthosnError MakeDepthToSpaceLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
   EthosnError MakeReluLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
   EthosnError MakeLeakyReLULayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
+  EthosnError MakeRequantizeLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
 
   /*! \brief A look-up table from Expr to layers. */
   std::map<Expr, std::vector<std::shared_ptr<sl::Operand>>> operand_table_;

diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.cc b/src/relay/backend/contrib/ethosn/ethosn_api.cc
@@ -39,6 +39,7 @@
 #include "ethosn_api_version.h"
 #include "ethosn_support_library/Support.hpp"
 #include "ethosn_support_library/SupportQueries.hpp"
+#include "tvm/relay/qnn/attrs.h"
 
 namespace tvm {
 namespace relay {
@@ -676,6 +677,40 @@ EthosnError EthosnAPI::Relu(const Expr& expr, ReluParams* params) {
   return err;
 }
 
+EthosnError EthosnAPI::Requantize(const Expr& expr, RequantizeParams* params) {
+  Call call = Downcast<Call>(expr);
+  const auto* input_dtype = call->args[0]->checked_type().as<TensorTypeNode>();
+  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
+  sl::DataType input_data_type;
+  EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape);
+  err += Tvm2Npu(input_dtype->dtype, &input_data_type);
+
+  float input_sc, output_sc;
+  int input_zp, output_zp;
+  err += AsConstant(call->args[1], &input_sc);
+  err += AsConstant(call->args[2], &input_zp);
+  err += AsConstant(call->args[3], &output_sc);
+  err += AsConstant(call->args[4], &output_zp);
+
+  sl::QuantizationInfo input_q_info;
+  err += Tvm2Npu(input_zp, input_sc, &input_q_info);
+  params->input_info =
+      sl::TensorInfo(input_tensor_shape, input_data_type, sl::DataFormat::NHWC, input_q_info);
+
+  sl::QuantizationInfo requantize_q_info;
+  err += Tvm2Npu(output_zp, output_sc, &requantize_q_info);
+  params->requantize_info = sl::RequantizeInfo(requantize_q_info);
+
+  sl::TensorInfo output_info = params->input_info;
+  output_info.m_QuantizationInfo = params->requantize_info.m_OutputQuantizationInfo;
+  if (params->requantize_info.m_OutputDataType.has_value()) {
+    output_info.m_DataType = params->requantize_info.m_OutputDataType.value();
+  }
+  params->output_info = output_info;
+
+  return err;
+}
+
 EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& padding, sl::Padding* npu_padding) {
   std::array<uint32_t, 4> dim;
   if (EthosnError err = AsArray<IndexExpr, uint32_t>(padding, &dim)) {

diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.h b/src/relay/backend/contrib/ethosn/ethosn_api.h
@@ -140,6 +140,12 @@ struct ReluParams {
   sl::TensorInfo output_info;
 };
 
+struct RequantizeParams {
+  sl::RequantizeInfo requantize_info;
+  sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
+};
+
 /*!
  * \brief A wrapper around std::stringstream to build an EthosnError.
  */
@@ -233,6 +239,8 @@ class EthosnAPI {
   static EthosnError DepthToSpace(const Expr& expr, DepthToSpaceParams* params);
   /*! \brief Extract the Support Library relu params from a Relay relu call */
   static EthosnError Relu(const Expr& expr, ReluParams* params);
+  /*! \brief Extract the Support Library requantize params from a Relay qnn.requantize call */
+  static EthosnError Requantize(const Expr& expr, RequantizeParams* params);
 
  private:
   /*! \brief Convert a TVM IndexExpr array to a SL tensor shape */

diff --git a/tests/python/contrib/test_ethosn/test_requantize.py b/tests/python/contrib/test_ethosn/test_requantize.py
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Arm(R) Ethos(TM)-N integration requantize tests"""
+
+import pytest
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.testing import requires_ethosn
+from . import infrastructure as tei
+
+
+def _get_model(shape, input_zp, input_sc, output_zp, output_sc, in_dtype, out_dtype):
+    a = relay.var("a", shape=shape, dtype=in_dtype)
+    model = relay.qnn.op.requantize(
+        data=a,
+        input_scale=relay.const(input_sc, "float32"),
+        input_zero_point=relay.const(input_zp, "int32"),
+        output_scale=relay.const(output_sc, "float32"),
+        output_zero_point=relay.const(output_zp, "int32"),
+        out_dtype=out_dtype,
+    )
+    return model
+
+
+@requires_ethosn
+@pytest.mark.parametrize("in_dtype", ["int8", "uint8"])
+@pytest.mark.parametrize("out_dtype", ["int8", "uint8"])
+@pytest.mark.parametrize("shape", [(1, 52, 52, 3)])
+def test_requantize(in_dtype, out_dtype, shape):
+    np.random.seed(0)
+    low = 0 if in_dtype == "uint8" else -5
+    high = low + 10
+    input_zp = (high + low) / 2
+    inputs = {
+        "a": tvm.nd.array(np.random.randint(low=low, high=high, size=shape, dtype=in_dtype)),
+    }
+    outputs = []
+    for npu in [False, True]:
+        model = _get_model(
+            shape=shape,
+            input_zp=input_zp,
+            input_sc=0.002,
+            output_zp=10,
+            output_sc=0.008,
+            in_dtype=in_dtype,
+            out_dtype=out_dtype,
+        )
+        mod = tei.make_module(model, [])
+        x = tei.build_and_run(mod, inputs, 1, {}, npu=npu)
+        outputs.append(x)
+
+    tei.verify(outputs, out_dtype, 1)
+
+
+@requires_ethosn
+def test_requantize_failure():
+    input_sc = 0.8
+    output_sc = (input_sc / 128) - 0.0001
+    model = _get_model(
+        shape=(1, 52, 52, 3),
+        input_zp=0,
+        input_sc=input_sc,
+        output_zp=0,
+        output_sc=output_sc,
+        in_dtype="int8",
+        out_dtype="int8",
+    )
+    model = tei.make_ethosn_composite(model, "ethos-n.qnn_requantize")
+    mod = tei.make_ethosn_partition(model)
+    tei.test_error(mod, {}, "Output scale must be bigger than input scale / 128")