From dc557ae6cb30bf1120132ebc8f66c8a1271a388a Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Mon, 28 Jun 2021 16:28:05 +0100 Subject: [PATCH] [Relay] Fix index order in conv2d computation for Arm CPU. When dilation is larger than value 1 in conv2d with NHWC layout, the ordering of indexes when accessing data array in computation of convolution appears to be incorrect. 'data_vec' is defined as lambda n, oho, owo, kh, kw, ic, ohi, owi: But accessed as data_vec[n, oho, owo, kh, kw, ohi, owi, ic] This patch fixes the order of indexes and modifies the test so that it is suitable for running on an AArch64 CPU. --- .../tvm/topi/arm_cpu/conv2d_spatial_pack.py | 2 +- .../topi/python/test_topi_conv2d_nhwc.py | 19 +++++++------------ 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/python/tvm/topi/arm_cpu/conv2d_spatial_pack.py b/python/tvm/topi/arm_cpu/conv2d_spatial_pack.py index f4cd9d899b732..91bff512a0ab6 100644 --- a/python/tvm/topi/arm_cpu/conv2d_spatial_pack.py +++ b/python/tvm/topi/arm_cpu/conv2d_spatial_pack.py @@ -344,7 +344,7 @@ def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, out_ conv = te.compute( ovshape, lambda n, oho, owo, oco, ohi, owi, oci: te.sum( - data_vec[n, oho, owo, kh, kw, ohi, owi, ic].astype(out_dtype) + data_vec[n, oho, owo, kh, kw, ic, ohi, owi].astype(out_dtype) * kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype), axis=[ic, kh, kw], ), diff --git a/tests/python/topi/python/test_topi_conv2d_nhwc.py b/tests/python/topi/python/test_topi_conv2d_nhwc.py index cdb7c0e8d4aa4..f2e4456032f7f 100644 --- a/tests/python/topi/python/test_topi_conv2d_nhwc.py +++ b/tests/python/topi/python/test_topi_conv2d_nhwc.py @@ -58,26 +58,21 @@ def get_ref_data(): a_np, w_np, b_np = get_ref_data() - def check_device(device): - if not tvm.testing.device_enabled(device): - print("Skip because %s is not enabled" % device) - return - print("Running on target: %s" % device) - with tvm.target.Target(device): - fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_nhwc_implement) + def check_device(target, dev): + print("Running on target: %s" % target) + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _conv2d_nhwc_implement) B = fcompute(A, W, stride, padding, dilation, dtype) s = fschedule([B]) - dev = tvm.device(device, 0) a = tvm.nd.array(a_np, dev) w = tvm.nd.array(w_np, dev) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) - func = tvm.build(s, [A, W, B], device) + func = tvm.build(s, [A, W, B], target) func(a, w, b) tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5) - for device in ["llvm", "cuda"]: - check_device(device) - + for target, dev in tvm.testing.enabled_targets(): + check_device(target, dev) @tvm.testing.uses_gpu def test_conv2d_nhwc():