apache · mbaret · Jan 20, 2021 · Jan 8, 2021
diff --git a/python/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/arm_compute_lib.py
@@ -16,7 +16,6 @@
 # under the License.
 # pylint: disable=invalid-name, unused-argument
 """Arm Compute Library supported operators."""
-import numpy as np
 import tvm
 
 from tvm._ffi import register_func
@@ -382,7 +381,7 @@ def dense(expr):
         return False
     if attrs.out_dtype != "float32" and attrs.out_dtype != "":
         return False
-    return not require_padding([*args, expr.checked_type])
+    return True
 
 
 def qnn_dense(expr):
@@ -396,7 +395,7 @@ def qnn_dense(expr):
         return False
     if attrs.out_dtype != "int32":
         return False
-    return not require_padding([*args, expr.checked_type])
+    return True
 
 
 @tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib")
@@ -408,33 +407,7 @@ def max_pool2d(expr):
     typ = args[0].checked_type
     if typ.dtype not in ["float32", "uint8"]:
         return False
-    return not require_padding([*args, expr.checked_type])
-
-
-def require_padding(inputs):
-    """Checks whether supplied data will require padding.
-    Most of the operators ACL up to 20.11 uses padded data.
-    """
-
-    def _check(shape, dtype):
-        """NEON has 128bits/16bytes per vector"""
-        if len(shape) == 0:
-            return False
-        return (shape[-1] * np.dtype(dtype).itemsize) % 16 != 0
-
-    for i in inputs:
-        if isinstance(i, (tvm.relay.expr.Var, tvm.relay.expr.Call)):
-            if _check(i.checked_type.shape, i.checked_type.dtype):
-                return True
-        elif isinstance(i, tvm.relay.expr.Constant):
-            if _check(i.data.shape, i.data.dtype):
-                return True
-        elif isinstance(i, tvm.ir.tensor_type.TensorType):
-            if _check(i.shape, i.dtype):
-                return True
-        else:
-            raise RuntimeException("Not supported input type: %s" % type(i))
-    return False
+    return True
 
 
 @tvm.ir.register_op_attr("nn.avg_pool2d", "target.arm_compute_lib")
@@ -452,7 +425,7 @@ def avg_pool2d(expr, from_quantized_composite=False):
     if attrs.layout != "NHWC":
         return False
 
-    return not require_padding([*args, expr.checked_type])
+    return True
 
 
 @tvm.ir.register_op_attr("nn.global_max_pool2d", "target.arm_compute_lib")
@@ -464,7 +437,7 @@ def global_max_pool2d(expr):
         return False
     if attrs.layout != "NHWC":
         return False
-    return not require_padding([*args, expr.checked_type])
+    return True
 
 
 @tvm.ir.register_op_attr("nn.global_avg_pool2d", "target.arm_compute_lib")
@@ -476,7 +449,7 @@ def global_avg_pool2d(expr):
         return False
     if attrs.layout != "NHWC":
         return False
-    return not require_padding([*args, expr.checked_type])
+    return True
 
 
 @tvm.ir.register_op_attr("maximum", "target.arm_compute_lib")

diff --git a/tests/python/contrib/test_arm_compute_lib/infrastructure.py b/tests/python/contrib/test_arm_compute_lib/infrastructure.py
@@ -275,7 +275,7 @@ def extract_acl_modules(module):
 def verify_codegen(
     module,
     known_good_codegen,
-    num_acl_modules,
+    num_acl_modules=1,
     tvm_ops=0,
     target="llvm -mtriple=aarch64-linux-gnu -mattr=+neon",
 ):

diff --git a/tests/python/contrib/test_arm_compute_lib/test_dense.py b/tests/python/contrib/test_arm_compute_lib/test_dense.py
@@ -101,7 +101,7 @@ def _get_qnn_model(
     out = relay.qnn.op.requantize(
         out,
         relay.const(input_sc * kernel_sc, "float32"),  # input scale
-        relay.const(input_zp * kernel_zp, "int32"),  # input zero point
+        relay.const(0, "int32"),  # input zero point
         relay.const(output_sc, "float32"),  # output scale
         relay.const(output_zp, "int32"),  # output zero point
         out_dtype="uint8",
@@ -182,20 +182,18 @@ def test_dense():
 
     device = Device()
     np.random.seed(0)
-
     dtype = "float32"
     trials = [
-        [(1, 128), (16, 128), 16, True, 1],
-        [(1, 128), (16, 128), 16, False, 1],
-        [(32, 32), (32, 32), 32, True, 1],
-        [(32, 32), (32, 32), 32, False, 1],
-        [(1, 64), (1, 64), 1, True, 0],
-        [(1, 64), (1, 64), 1, False, 0],
-        [(11, 2), (2, 2), 2, True, 0],
-        [(11, 2), (2, 2), 2, False, 0],
+        [(1, 128), (16, 128), 16, True],
+        [(1, 128), (16, 128), 16, False],
+        [(32, 32), (32, 32), 32, True],
+        [(32, 32), (32, 32), 32, False],
+        [(1, 64), (1, 64), 1, True],
+        [(1, 64), (1, 64), 1, False],
+        [(11, 2), (2, 2), 2, True],
+        [(11, 2), (2, 2), 2, False],
     ]
-
-    for shape, weight_shape, units, composite, acl_partitions in trials:
+    for shape, weight_shape, units, composite in trials:
         outputs = []
         inputs = {"a": tvm.nd.array(np.random.uniform(-128, 127, shape).astype(dtype))}
         func, params = _get_model(
@@ -210,11 +208,8 @@ def test_dense():
                     params,
                     device,
                     enable_acl=acl,
-                    tvm_ops=(1 - acl_partitions) * (2 - int(not composite)),
-                    acl_partitions=acl_partitions,
                 )[0]
             )
-
         config = {
             "shape": shape,
             "weight_shape": weight_shape,
@@ -230,27 +225,25 @@ def test_codegen_dense():
         return
 
     np.random.seed(0)
-
     dtype = "float32"
     trials = [
-        [(1, 128), (16, 128), 16, True, 1],
-        [(1, 128), (16, 128), 16, False, 1],
-        [(32, 32), (32, 32), 32, True, 1],
-        [(32, 32), (32, 32), 32, False, 1],
-        [(1, 64), (1, 64), 1, True, 0],
-        [(1, 64), (1, 64), 1, False, 0],
+        [(1, 128), (16, 128), 16, True],
+        [(1, 128), (16, 128), 16, False],
+        [(32, 32), (32, 32), 32, True],
+        [(32, 32), (32, 32), 32, False],
+        [(1, 64), (1, 64), 1, True],
+        [(1, 64), (1, 64), 1, False],
+        [(11, 2), (2, 2), 2, True],
+        [(11, 2), (2, 2), 2, False],
     ]
-
-    for shape, weight_shape, units, composite, acl_partitions in trials:
+    for shape, weight_shape, units, composite in trials:
         inputs = {"a"}
 
         args = (shape, weight_shape, units, dtype)
 
         func, params = _get_model(*args, var_names=iter(inputs), has_bias=composite)
         exp_codegen = _get_expected_codegen(*args, has_bias=composite)
-        verify_codegen(
-            func, exp_codegen, acl_partitions, (1 - acl_partitions) * (2 - int(not composite))
-        )
+        verify_codegen(func, exp_codegen)
 
 
 def test_qnn_dense():
@@ -264,19 +257,20 @@ def test_qnn_dense():
 
     dtype = "uint8"
     trials = [
-        [(4, 4), (4, 4), 4, True, 0],
-        [(4, 4), (4, 4), 4, False, 0],
-        [(16, 16), (4, 16), 4, True, 1],
-        [(16, 16), (4, 16), 4, False, 1],
-        [(1, 128), (16, 128), 16, True, 1],
-        [(1, 128), (16, 128), 16, False, 1],
-        [(32, 32), (32, 32), 32, True, 1],
-        [(32, 32), (32, 32), 32, False, 1],
-        [(1, 64), (1, 64), 1, True, 0],
-        [(1, 64), (1, 64), 1, False, 0],
+        [(1, 2), (2, 2), 2, True],
+        [(1, 2), (2, 2), 2, False],
+        [(4, 4), (4, 4), 4, True],
+        [(4, 4), (4, 4), 4, False],
+        [(16, 16), (4, 16), 4, True],
+        [(16, 16), (4, 16), 4, False],
+        [(1, 128), (16, 128), 16, True],
+        [(1, 128), (16, 128), 16, False],
+        [(32, 32), (32, 32), 32, True],
+        [(32, 32), (32, 32), 32, False],
+        [(1, 64), (1, 64), 1, True],
+        [(1, 64), (1, 64), 1, False],
     ]
-
-    for shape, weight_shape, units, composite, acl_partitions in trials:
+    for shape, weight_shape, units, composite in trials:
         outputs = []
         inputs = {"a": tvm.nd.array(np.random.uniform(0, 255, shape).astype(dtype))}
         input_zp = 100
@@ -310,8 +304,6 @@ def test_qnn_dense():
                     1,
                     params,
                     device,
-                    tvm_ops=(1 - acl_partitions) * (3 - int(not composite)),
-                    acl_partitions=acl_partitions,
                     enable_acl=acl,
                 )[0]
             )
@@ -340,15 +332,20 @@ def test_codegen_qnn_dense():
 
     dtype = "uint8"
     trials = [
-        [(1, 128), (16, 128), 16, True, 1],
-        [(1, 128), (16, 128), 16, False, 1],
-        [(32, 32), (32, 32), 32, True, 1],
-        [(32, 32), (32, 32), 32, False, 1],
-        [(1, 64), (1, 64), 1, True, 0],
-        [(1, 64), (1, 64), 1, False, 0],
+        [(1, 2), (2, 2), 2, True],
+        [(1, 2), (2, 2), 2, False],
+        [(4, 4), (4, 4), 4, True],
+        [(4, 4), (4, 4), 4, False],
+        [(16, 16), (4, 16), 4, True],
+        [(16, 16), (4, 16), 4, False],
+        [(1, 128), (16, 128), 16, True],
+        [(1, 128), (16, 128), 16, False],
+        [(32, 32), (32, 32), 32, True],
+        [(32, 32), (32, 32), 32, False],
+        [(1, 64), (1, 64), 1, True],
+        [(1, 64), (1, 64), 1, False],
     ]
-
-    for shape, weight_shape, units, composite, acl_partitions in trials:
+    for shape, weight_shape, units, composite in trials:
         inputs = {"a"}
         args = (shape, weight_shape, units, dtype)
 
@@ -372,9 +369,7 @@ def test_codegen_qnn_dense():
             has_bias=composite,
         )
         exp_codegen = _get_expected_codegen(*args, has_bias=composite)
-        verify_codegen(
-            func, exp_codegen, acl_partitions, (1 - acl_partitions) * (3 - int(not composite))
-        )
+        verify_codegen(func, exp_codegen)
 
 
 if __name__ == "__main__":

diff --git a/tests/python/contrib/test_arm_compute_lib/test_network.py b/tests/python/contrib/test_arm_compute_lib/test_network.py
@@ -172,7 +172,7 @@ def get_model():
         return mod, params, inputs
 
     _build_and_run_network(
-        *get_model(), device=device, tvm_ops=10, acl_partitions=30, atol=8, rtol=0
+        *get_model(), device=device, tvm_ops=9, acl_partitions=31, atol=8, rtol=0
     )