From 7a400052038751bade36491b80c89a6b90547c82 Mon Sep 17 00:00:00 2001 From: Dmitriy Smirnov Date: Thu, 27 May 2021 13:56:03 +0100 Subject: [PATCH] [BYOC][ACL] Prevent dilated pooling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added check preventing avg_pool2d and max_pool2d to be scheduled for execution via ACL* runtime if dilation other than (1, 1) is provided as ACL does not currently support dilation attribute in pooling layer. *ACL stands for "Compute Library for the ArmĀ® Architecture" Change-Id: If8f65d3a154e09f880bec73dd756d9f985a20ff2 --- .../tvm/relay/op/contrib/arm_compute_lib.py | 12 ++- .../contrib/arm_compute_lib/acl_runtime.cc | 4 +- .../test_arm_compute_lib/test_pooling.py | 95 ++++++++++++------- 3 files changed, 72 insertions(+), 39 deletions(-) diff --git a/python/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/arm_compute_lib.py index 9152b50e76869..310d22b317535 100644 --- a/python/tvm/relay/op/contrib/arm_compute_lib.py +++ b/python/tvm/relay/op/contrib/arm_compute_lib.py @@ -397,6 +397,14 @@ def qnn_dense(expr): return True +def check_dilation(attrs): + """Prevents offloading if dilation other than (1, 1)""" + if not isinstance(attrs, relay.op.op_attrs.GlobalPool2DAttrs): + if not (len(attrs.dilation) == 2 and attrs.dilation[0] == 1 and attrs.dilation[1] == 1): + return False + return True + + @tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib") def max_pool2d(expr): """Check if the external ACL codegen for maxpool2d should be used.""" @@ -406,7 +414,7 @@ def max_pool2d(expr): typ = args[0].checked_type if typ.dtype not in ["float32", "uint8"]: return False - return True + return check_dilation(attrs) @tvm.ir.register_op_attr("nn.avg_pool2d", "target.arm_compute_lib") @@ -424,7 +432,7 @@ def avg_pool2d(expr, from_quantized_composite=False): if attrs.layout != "NHWC": return False - return True + return check_dilation(attrs) @tvm.ir.register_op_attr("nn.global_max_pool2d", "target.arm_compute_lib") diff --git a/src/runtime/contrib/arm_compute_lib/acl_runtime.cc b/src/runtime/contrib/arm_compute_lib/acl_runtime.cc index 6562d1bfc62dc..5bbc536afaca1 100644 --- a/src/runtime/contrib/arm_compute_lib/acl_runtime.cc +++ b/src/runtime/contrib/arm_compute_lib/acl_runtime.cc @@ -381,9 +381,9 @@ class ACLRuntime : public JSONRuntimeBase { void CreatePoolingLayer(CachedLayer* layer, const JSONGraphNode& node) { std::vector padding = node.GetAttr>("padding"); std::vector strides = node.GetAttr>("strides"); + std::vector dilation = node.GetAttr>("dilation"); bool ceil_mode = std::stoi(node.GetAttr>("ceil_mode")[0]); arm_compute::PadStrideInfo pad_stride_info = MakeACLPadStride(padding, strides, ceil_mode); - auto attr_pool_size = node.GetAttr>("pool_size"); int pool_size_h = std::stoi(attr_pool_size[0]); int pool_size_w = std::stoi(attr_pool_size[1]); @@ -408,6 +408,8 @@ class ACLRuntime : public JSONRuntimeBase { LOG(FATAL) << "Pooling type not supported"; } + ICHECK(dilation.size() == 2 && dilation[0] == "1" && dilation[1] == "1") + << "Dilation other than (1, 1) not supported"; arm_compute::PoolingLayerInfo pool_info = arm_compute::PoolingLayerInfo(pool_type, arm_compute::Size2D(pool_size_h, pool_size_w), arm_compute::DataLayout::NHWC, pad_stride_info, exclude_pad); diff --git a/tests/python/contrib/test_arm_compute_lib/test_pooling.py b/tests/python/contrib/test_arm_compute_lib/test_pooling.py index 137484330db85..9deaa758639e1 100644 --- a/tests/python/contrib/test_arm_compute_lib/test_pooling.py +++ b/tests/python/contrib/test_arm_compute_lib/test_pooling.py @@ -169,34 +169,37 @@ def test_pooling(): fp32_dtype = ("float32", -127, 128, 0.001, 0.001) uint8_dtype = ("uint8", 0, 255, 1, 0) - + # fmt: off trials = [ - ["nn.max_pool2d", fp32_dtype, (3, 3), (2, 2), (0, 0), False, False, (27, 27, 512)], - ["nn.max_pool2d", fp32_dtype, (2, 2), (2, 2), (0, 0), False, True, (16, 16, 16)], - ["nn.max_pool2d", fp32_dtype, (3, 3), (2, 2), (1, 1), True, True, (15, 15, 16)], - ["nn.max_pool2d", fp32_dtype, (2, 2), (2, 2), (0, 1), False, False, (16, 16, 16)], - ["nn.max_pool2d", uint8_dtype, (3, 3), (2, 2), (0, 1), False, False, (16, 16, 16)], - ["nn.max_pool2d", uint8_dtype, (2, 2), (2, 2), (1, 1), True, True, (15, 15, 16)], - ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), False, False, (16, 16, 16)], - ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (0, 0), False, True, (16, 16, 16)], - ["nn.avg_pool2d", fp32_dtype, (3, 3), (2, 2), (0, 1), True, False, (15, 15, 16)], + ["nn.max_pool2d", fp32_dtype, (3, 3), (2, 2), (1, 1), (0, 0), False, False, (27, 27, 512), (0, 1),], + ["nn.max_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 0), False, True, (16, 16, 16), (0, 1),], + ["nn.max_pool2d", fp32_dtype, (3, 3), (2, 2), (1, 1), (1, 1), True, True, (15, 15, 16), (0, 1),], + ["nn.max_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 1), False, False, (16, 16, 16), (0, 1),], + ["nn.max_pool2d", uint8_dtype, (3, 3), (2, 2), (1, 1), (0, 1), False, False, (16, 16, 16), (0, 1),], + ["nn.max_pool2d", uint8_dtype, (2, 2), (2, 2), (1, 1), (1, 1), True, True, (15, 15, 16), (0, 1),], + ["nn.max_pool2d", uint8_dtype, (2, 2), (2, 2), (3, 2), (1, 1), True, True, (15, 15, 16), (1, 0),], + ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, False, (16, 16, 16), (0, 1),], + ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 0), False, True, (16, 16, 16), (0, 1),], + ["nn.avg_pool2d", fp32_dtype, (3, 3), (2, 2), (3, 2), (0, 1), True, False, (15, 15, 16), (1, 0),], # 20.05: "exclude_padding equal false is not supported for AVG Pooling with padding on quantized types" # ["nn.avg_pool2d", uint8_dtype, (2, 2), (2, 2), (1, 1), False, True, (16, 16, 16)], - ["nn.avg_pool2d", uint8_dtype, (3, 3), (2, 2), (0, 1), False, False, (16, 16, 16)], - ["nn.l2_pool2d", fp32_dtype, (2, 2), (2, 2), (0, 1), True, False, (16, 16, 16)], - ["nn.l2_pool2d", fp32_dtype, (3, 3), (2, 2), (0, 0), False, False, (16, 16, 16)], - ["nn.l2_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), False, True, (15, 15, 16)], + ["nn.avg_pool2d", uint8_dtype, (3, 3), (2, 2), (1, 1), (0, 1), False, False, (16, 16, 16), (0, 1),], + ["nn.l2_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 1), True, False, (16, 16, 16), (0, 1),], + ["nn.l2_pool2d", fp32_dtype, (3, 3), (2, 2), (1, 1), (0, 0), False, False, (16, 16, 16), (0, 1),], + ["nn.l2_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, True, (15, 15, 16), (0, 1),], ] - + # fmt: on for ( typef, (dtype, low, high, atol, rtol), size, stride, + dilation, pad, ceil_mode, count_include_pad, input_shape, + (tvm_ops, acl_partitions), ) in trials: shape = (1, *input_shape) outputs = [] @@ -205,7 +208,16 @@ def test_pooling(): } func = _get_pooling_model( - shape, dtype, typef, size, stride, pad, ceil_mode, count_include_pad, iter(inputs) + shape, + dtype, + typef, + size, + stride, + dilation, + pad, + ceil_mode, + count_include_pad, + iter(inputs), ) config = { @@ -215,15 +227,25 @@ def test_pooling(): "pooling type": typef, "dtype": dtype, "padding": pad, + "dilation": dilation, "ceil_mode": ceil_mode, "count_include_pad": count_include_pad, "inputs": inputs, } verify_saturation = True if dtype == "uint8" else False - for acl in [False, True]: outputs.append( - build_and_run(func, inputs, 1, None, device, enable_acl=acl, config=config)[0] + build_and_run( + func, + inputs, + 1, + None, + device, + enable_acl=acl, + tvm_ops=tvm_ops, + acl_partitions=acl_partitions, + config=config, + )[0] ) verify(outputs, atol=atol, rtol=rtol, config=config, verify_saturation=verify_saturation) @@ -283,25 +305,25 @@ def test_codegen_pooling(): fp32_dtype = ("float32", -127, 128) uint8_dtype = ("uint8", 0, 255) - + # fmt: off trials = [ - ["nn.max_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 0), False, True, (16, 16, 16)], - ["nn.max_pool2d", fp32_dtype, (3, 3), (2, 2), (1, 1), (1, 1), True, True, (15, 15, 16)], - ["nn.max_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 1), False, False, (16, 16, 16)], - ["nn.max_pool2d", uint8_dtype, (3, 3), (2, 2), (1, 1), (0, 1), False, False, (16, 16, 16)], - ["nn.max_pool2d", uint8_dtype, (2, 2), (2, 2), (1, 1), (1, 1), True, True, (15, 15, 16)], - ["nn.max_pool2d", uint8_dtype, (2, 2), (2, 2), (3, 2), (1, 1), True, True, (15, 15, 16)], - ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, False, (16, 16, 16)], - ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, False, (16, 16, 16)], - ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 0), False, True, (16, 16, 16)], - ["nn.avg_pool2d", fp32_dtype, (3, 3), (2, 2), (3, 2), (0, 1), True, False, (15, 15, 16)], - ["nn.avg_pool2d", uint8_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, True, (16, 16, 16)], - ["nn.avg_pool2d", uint8_dtype, (3, 3), (2, 2), (1, 1), (0, 1), False, False, (16, 16, 16)], - ["nn.l2_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 1), True, False, (15, 15, 16)], - ["nn.l2_pool2d", fp32_dtype, (3, 3), (2, 2), (1, 1), (0, 0), False, False, (16, 16, 16)], - ["nn.l2_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, True, (15, 15, 16)], + ["nn.max_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 0), False, True, (16, 16, 16), (0, 1),], + ["nn.max_pool2d", fp32_dtype, (3, 3), (2, 2), (1, 1), (1, 1), True, True, (15, 15, 16), (0, 1),], + ["nn.max_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 1), False, False, (16, 16, 16), (0, 1),], + ["nn.max_pool2d", uint8_dtype, (3, 3), (2, 2), (1, 1), (0, 1), False, False, (16, 16, 16), (0, 1),], + ["nn.max_pool2d", uint8_dtype, (2, 2), (2, 2), (1, 1), (1, 1), True, True, (15, 15, 16), (0, 1),], + ["nn.max_pool2d", uint8_dtype, (2, 2), (2, 2), (3, 2), (1, 1), True, True, (15, 15, 16), (1, 0),], + ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, False, (16, 16, 16), (0, 1),], + ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, False, (16, 16, 16), (0, 1),], + ["nn.avg_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 0), False, True, (16, 16, 16), (0, 1),], + ["nn.avg_pool2d", fp32_dtype, (3, 3), (2, 2), (3, 2), (0, 1), True, False, (15, 15, 16), (1, 0),], + ["nn.avg_pool2d", uint8_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, True, (16, 16, 16), (0, 1),], + ["nn.avg_pool2d", uint8_dtype, (3, 3), (2, 2), (1, 1), (0, 1), False, False, (16, 16, 16), (0, 1),], + ["nn.l2_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (0, 1), True, False, (15, 15, 16), (0, 1),], + ["nn.l2_pool2d", fp32_dtype, (3, 3), (2, 2), (1, 1), (0, 0), False, False, (16, 16, 16), (0, 1),], + ["nn.l2_pool2d", fp32_dtype, (2, 2), (2, 2), (1, 1), (1, 1), False, True, (15, 15, 16), (0, 1),], ] - + # fmt: on for ( typef, (dtype, low, high), @@ -312,6 +334,7 @@ def test_codegen_pooling(): ceil_mode, count_include_pad, input_shape, + (tvm_ops, acl_partitions), ) in trials: shape = (1, *input_shape) inputs = {"a"} @@ -319,7 +342,7 @@ def test_codegen_pooling(): func = _get_pooling_model(*args, iter(inputs)) exp_codegen = _get_expected_pooling_codegen(*args) - verify_codegen(func, exp_codegen, 1) + verify_codegen(func, exp_codegen, acl_partitions, tvm_ops) def test_codegen_global_pooling():