diff --git a/python/tvm/relay/testing/__init__.py b/python/tvm/relay/testing/__init__.py index 909712511061..2399a474de88 100644 --- a/python/tvm/relay/testing/__init__.py +++ b/python/tvm/relay/testing/__init__.py @@ -82,6 +82,7 @@ def check_grad( mean=0, mode="higher_order", target_devices=None, + executor_kind="debug", ): """Perform numerical gradient checking given a relay function. @@ -146,8 +147,12 @@ def check_grad( for target, dev in target_devices: # Eval the backward and forward functions # TODO(mbs): Evaluate a pair of functions so can share preparation between them. - bwd_func_compiled = relay.create_executor(device=dev, target=target).evaluate(bwd_func) - fwd_func_compiled = relay.create_executor(device=dev, target=target).evaluate(fwd_func) + bwd_func_compiled = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(bwd_func) + fwd_func_compiled = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(fwd_func) # Get analytic gradients. _, grads = bwd_func_compiled(*inputs) diff --git a/tests/python/relay/dyn/test_dynamic_op_level10.py b/tests/python/relay/dyn/test_dynamic_op_level10.py index d34b80303b29..5a31977b4506 100644 --- a/tests/python/relay/dyn/test_dynamic_op_level10.py +++ b/tests/python/relay/dyn/test_dynamic_op_level10.py @@ -27,9 +27,11 @@ import random import tvm.testing +executor_kind = tvm.testing.parameter("debug", "vm") + @tvm.testing.uses_gpu -def test_broadcast_to(): +def test_broadcast_to(executor_kind): def verify_more_dynamic_broadcast_to(x_shape, out_shape): rank = len(out_shape) dtype = "float32" @@ -45,12 +47,13 @@ def verify_more_dynamic_broadcast_to(x_shape, out_shape): x = np.random.uniform(size=np.prod(x_shape)).astype(dtype) ref_res = np.broadcast_to(np.reshape(x, x_shape), out_shape) for target, dev in tvm.testing.enabled_targets(): - for kind in ["vm", "debug"]: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate( - func - )(x, np.array(x_shape).astype(shape_type), np.array(out_shape).astype(shape_type)) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor( + executor_kind, mod=mod, device=dev, target=target + ).evaluate(func)( + x, np.array(x_shape).astype(shape_type), np.array(out_shape).astype(shape_type) + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) verify_more_dynamic_broadcast_to((4, 3), (3, 4, 3)) @@ -70,12 +73,11 @@ def verify_broadcast_to(x_shape, out_shape): x = np.random.uniform(size=x_shape).astype(dtype) ref_res = np.broadcast_to(x, out_shape) for target, dev in tvm.testing.enabled_targets(): - for kind in ["vm", "debug"]: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate( - func - )(x, np.array(out_shape).astype(shape_type)) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor( + executor_kind, mod=mod, device=dev, target=target + ).evaluate(func)(x, np.array(out_shape).astype(shape_type)) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) verify_broadcast_to((1,), (1, 1, 1)) verify_broadcast_to((1, 1), (4, 1, 1)) @@ -83,7 +85,7 @@ def verify_broadcast_to(x_shape, out_shape): @tvm.testing.uses_gpu -def test_dyn_broadcast_to(): +def test_dyn_broadcast_to(executor_kind): dtype = "uint8" rank = 3 shape_type = "int64" @@ -101,16 +103,15 @@ def test_dyn_broadcast_to(): dyn_shape = (1,) * rank ref_res = np.broadcast_to(x, dyn_shape) for target, dev in tvm.testing.enabled_targets(): - for kind in ["vm", "debug"]: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate(func)( - x, np.array(dyn_shape).astype(shape_type) - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor(executor_kind, mod=mod, device=dev, target=target).evaluate( + func + )(x, np.array(dyn_shape).astype(shape_type)) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) @tvm.testing.uses_gpu -def test_dyn_one_hot(): +def test_dyn_one_hot(executor_kind): def _get_oshape(indices_shape, depth, axis): oshape = [] true_axis = len(indices_shape) if axis == -1 else axis @@ -135,12 +136,11 @@ def _verify(indices_shape, depth, on_value, off_value, axis, dtype): indices_np = np.random.randint(0, depth, size=indices_shape).astype("int32") out_np = tvm.topi.testing.one_hot(indices_np, on_value, off_value, depth, axis, dtype) for target, dev in tvm.testing.enabled_targets(): - for kind in ["vm", "debug"]: - mod = tvm.ir.IRModule.from_expr(func) - out_relay = relay.create_executor( - kind, mod=mod, device=dev, target=target - ).evaluate()(indices_np, np.array(depth).astype("int32")) - tvm.testing.assert_allclose(out_relay.numpy(), out_np) + mod = tvm.ir.IRModule.from_expr(func) + out_relay = relay.create_executor( + executor_kind, mod=mod, device=dev, target=target + ).evaluate()(indices_np, np.array(depth).astype("int32")) + tvm.testing.assert_allclose(out_relay.numpy(), out_np) _verify((3,), 3, 1, 0, -1, "int32") _verify((3,), 3, 1.0, 0.0, -1, "float32") diff --git a/tests/python/relay/dyn/test_dynamic_op_level2.py b/tests/python/relay/dyn/test_dynamic_op_level2.py index fd7ab7002806..a017762ce35d 100644 --- a/tests/python/relay/dyn/test_dynamic_op_level2.py +++ b/tests/python/relay/dyn/test_dynamic_op_level2.py @@ -27,9 +27,11 @@ import tvm.topi.testing from tvm.relay.testing import run_infer_type +executor_kind = tvm.testing.parameter("debug", "vm") + @tvm.testing.uses_gpu -def test_dyn_upsampling_run(): +def test_dyn_upsampling_run(executor_kind): def verify_upsampling(dshape, scale_h, scale_w, layout, method, align_corners=False): if layout == "NCHW": @@ -58,12 +60,13 @@ def verify_upsampling(dshape, scale_h, scale_w, layout, method, align_corners=Fa func = relay.Function([x, scale_h_var, scale_w_var], z) for target, dev in tvm.testing.enabled_targets(): - for kind in ["vm", "debug"]: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()( - x_data, np.array(scale_h).astype("float32"), np.array(scale_w).astype("float32") - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6) + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor( + executor_kind, mod=mod, device=dev, target=target + ).evaluate()( + x_data, np.array(scale_h).astype("float32"), np.array(scale_w).astype("float32") + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6) verify_upsampling((1, 16, 32, 32), 3, 2.0, "NCHW", "nearest_neighbor") verify_upsampling((1, 16, 32, 32), 5, 2.0, "NCHW", "bilinear", True) @@ -85,7 +88,7 @@ def test_dyn_upsampling_infer_type_const(): @tvm.testing.uses_gpu -def test_dyn_upsampling3d_run(): +def test_dyn_upsampling3d_run(executor_kind): def verify_upsampling3d( dshape, scale_d, scale_h, scale_w, layout, method, coord_trans="asymmetric" ): @@ -124,15 +127,16 @@ def verify_upsampling3d( func = relay.Function([x, scale_d_var, scale_h_var, scale_w_var], z) for target, dev in enabled_targets(): - for kind in ["vm", "debug"]: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()( - x_data, - np.array(scale_d).astype("float32"), - np.array(scale_h).astype("float32"), - np.array(scale_w).astype("float32"), - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6) + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor( + executor_kind, mod=mod, device=dev, target=target + ).evaluate()( + x_data, + np.array(scale_d).astype("float32"), + np.array(scale_h).astype("float32"), + np.array(scale_w).astype("float32"), + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6) verify_upsampling3d((1, 1, 1, 1, 1), 2, 3, 4, "NCDHW", "nearest_neighbor") verify_upsampling3d((1, 8, 16, 16, 16), 2.0, 3.0, 4.0, "NCDHW", "nearest_neighbor") @@ -163,7 +167,7 @@ def test_dyn_upsampling3d_infer_type_const(): @tvm.testing.uses_gpu -def test_dyn_pad(): +def test_dyn_pad(executor_kind): def verify_pad(dshape, pad_width, pad_val, dtype): x = relay.var("x", relay.TensorType(dshape, dtype)) ndim = len(dshape) @@ -178,7 +182,9 @@ def verify_pad(dshape, pad_width, pad_val, dtype): ref_res = np.pad(data, pad_width, "constant", constant_values=(((pad_val,) * 2),) * ndim) pad_width = np.array(pad_width).astype("int64") - verify_func(func, [data, pad_width, np.array(pad_val).astype(dtype)], ref_res) + verify_func( + executor_kind, func, [data, pad_width, np.array(pad_val).astype(dtype)], ref_res + ) def verify_pad_default_fill(dshape, pad_width, dtype): x = relay.var("x", relay.TensorType(dshape, dtype)) @@ -193,7 +199,7 @@ def verify_pad_default_fill(dshape, pad_width, dtype): ref_res = np.pad(data, pad_width) pad_width = np.array(pad_width).astype("int64") - verify_func(func, [data, pad_width], ref_res) + verify_func(executor_kind, func, [data, pad_width], ref_res) verify_pad((4, 10, 7, 7), ((1, 1), (2, 2), (3, 3), (4, 4)), 2.0, "int32") verify_pad((2, 7), ((1, 4), (2, 2)), 4.0, "float64") diff --git a/tests/python/relay/dyn/test_dynamic_op_level3.py b/tests/python/relay/dyn/test_dynamic_op_level3.py index 0456401e8ad2..0e68cd7246ac 100644 --- a/tests/python/relay/dyn/test_dynamic_op_level3.py +++ b/tests/python/relay/dyn/test_dynamic_op_level3.py @@ -23,24 +23,25 @@ from tvm import relay, te from tvm.relay.testing import check_grad, run_infer_type +executor_kind = tvm.testing.parameter("debug", "vm") -def verify_func(func, data, ref_res, target_device=tvm.testing.enabled_targets()): + +def verify_func(executor_kind, func, data, ref_res, target_device=tvm.testing.enabled_targets()): assert isinstance(data, list) for target, dev in target_device: - for kind in ["vm", "debug"]: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()( - *data - ) - if isinstance(op_res, tvm.runtime.container.ADT): - assert len(op_res) == len( - ref_res - ), "Outputs from TVM and Python implementation must be equal " - for op_result, ref_result in zip(op_res, ref_res): - tvm.testing.assert_allclose(op_result.numpy(), ref_result, rtol=1e-5) - else: - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) - relay.backend.te_compiler.get().clear() + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor( + executor_kind, mod=mod, device=dev, target=target + ).evaluate()(*data) + if isinstance(op_res, tvm.runtime.container.ADT): + assert len(op_res) == len( + ref_res + ), "Outputs from TVM and Python implementation must be equal " + for op_result, ref_result in zip(op_res, ref_res): + tvm.testing.assert_allclose(op_result.numpy(), ref_result, rtol=1e-5) + else: + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + relay.backend.te_compiler.get().clear() def check_on_vm(target, dev, args, expected_result, mod): @@ -53,7 +54,7 @@ def check_on_vm(target, dev, args, expected_result, mod): @tvm.testing.uses_gpu -def test_dyn_reshape(): +def test_dyn_reshape(executor_kind): def verify_reshape(shape, newshape, oshape): x = relay.var("x", relay.TensorType(shape, "float32")) y = relay.var("y", relay.TensorType((len(newshape),), "int64")) @@ -69,7 +70,7 @@ def verify_reshape(shape, newshape, oshape): test_inputs=[x_data], eps=1e-3, ) - verify_func(func, [x_data, np.array(newshape).astype("int64")], ref_res) + verify_func(executor_kind, func, [x_data, np.array(newshape).astype("int64")], ref_res) verify_reshape((2, 3, 4), (8, 3), (8, 3)) verify_reshape((4, 7), (2, 7, 2), (2, 7, 2)) @@ -83,7 +84,7 @@ def verify_reshape(shape, newshape, oshape): @tvm.testing.uses_gpu -def test_dyn_shape_reshape(): +def test_dyn_shape_reshape(executor_kind): def verify_reshape(shape, newshape, oshape): x = relay.var("x", relay.TensorType(shape, "float32")) y = relay.var("y", relay.TensorType(newshape, "float32")) @@ -94,13 +95,13 @@ def verify_reshape(shape, newshape, oshape): y_data = np.random.uniform(low=-1, high=1, size=newshape).astype("float32") ref_res = np.reshape(x_data, oshape) check_grad(run_infer_type(func), inputs=[x_data, y_data], eps=1e-3) - verify_func(func, [x_data, y_data], ref_res) + verify_func(executor_kind, func, [x_data, y_data], ref_res) verify_reshape((2, 3, 4), (8, 3), (8, 3)) verify_reshape((4, 7), (2, 7, 2), (2, 7, 2)) -def test_squeeze(): +def test_squeeze(executor_kind): def verify_squeeze(shape, dtype, axis): x = relay.var("x", relay.TensorType(shape, dtype)) assert axis is not None @@ -110,14 +111,14 @@ def verify_squeeze(shape, dtype, axis): func = relay.Function([x, axis], squeeze) x_data = np.random.random_sample(shape).astype(dtype) ref_res = np.squeeze(x_data, axis=np_axis) - verify_func(func, [x_data, np.array(np_axis).astype("int64")], ref_res) + verify_func(executor_kind, func, [x_data, np.array(np_axis).astype("int64")], ref_res) verify_squeeze((1, 3, 1), "float32", [0]) verify_squeeze((1, 2, 1, 2, 1), "float32", [0, 2]) @tvm.testing.uses_gpu -def test_dyn_expand_dims(): +def test_dyn_expand_dims(executor_kind): def verify_expand_dims( dshape, dtype, oshape, axis, num_newaxis, target_device=tvm.testing.enabled_targets() ): @@ -130,7 +131,7 @@ def verify_expand_dims( data_np = np.random.uniform(size=dshape).astype(dtype) axis_np = np.array(axis).astype("int64") ref_res = data_np.reshape(oshape) - verify_func(func, [data_np, axis_np], ref_res, target_device=target_device) + verify_func(executor_kind, func, [data_np, axis_np], ref_res, target_device=target_device) for dtype in ["float16", "float32"]: verify_expand_dims((2, 2), dtype, (2, 2, 1), 2, 1) @@ -146,7 +147,7 @@ def verify_expand_dims( @tvm.testing.uses_gpu -def test_dyn_tile(): +def test_dyn_tile(executor_kind): def verify_tile(dshape, reps): x = relay.var("x", relay.TensorType(dshape, "float32")) r = relay.var("reps", relay.TensorType((len(reps),), "float32")) @@ -156,7 +157,7 @@ def verify_tile(dshape, reps): x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32") ref_res = np.tile(x_data, reps=reps) reps_data = np.array(reps).astype("float32") - verify_func(func, [x_data, np.array(reps).astype("float32")], ref_res) + verify_func(executor_kind, func, [x_data, np.array(reps).astype("float32")], ref_res) verify_tile((2, 3, 4), (3, 2, 1)) verify_tile((2, 3, 4), (1, 2)) @@ -164,7 +165,7 @@ def verify_tile(dshape, reps): @tvm.testing.uses_gpu -def test_dyn_zeros_ones(): +def test_dyn_zeros_ones(executor_kind): def verify_zeros_ones(shape, dtype): for op, ref in [(relay.zeros, np.zeros), (relay.ones, np.ones)]: rank = len(shape) @@ -175,14 +176,16 @@ def verify_zeros_ones(shape, dtype): func = relay.Function([dyn_shape], y) ref_res = ref(shape, dtype) - verify_func(func, [np.array(shape).astype("int64")], ref_res.astype("int64")) + verify_func( + executor_kind, func, [np.array(shape).astype("int64")], ref_res.astype("int64") + ) verify_zeros_ones((1, 3), "int64") verify_zeros_ones((8, 9, 1, 2), "float32") @tvm.testing.uses_gpu -def test_dyn_full(): +def test_dyn_full(executor_kind): def verify_full(fill_value, src_shape, dtype): x = relay.var("x", relay.scalar_type(dtype)) rank = len(src_shape) @@ -192,7 +195,10 @@ def verify_full(fill_value, src_shape, dtype): ref_res = np.full(src_shape, fill_value).astype(dtype) verify_func( - func, [np.array(fill_value).astype(dtype), np.array(src_shape).astype("int64")], ref_res + executor_kind, + func, + [np.array(fill_value).astype(dtype), np.array(src_shape).astype("int64")], + ref_res, ) verify_full(4, (1, 3, 4, 4), "int32") @@ -201,7 +207,7 @@ def verify_full(fill_value, src_shape, dtype): @tvm.testing.uses_gpu -def test_dyn_sparse_to_dense(): +def test_dyn_sparse_to_dense(executor_kind): def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape, xpected): sparse_indices_data = np.array(sparse_indices) sparse_values_data = np.array(sparse_values) @@ -242,7 +248,7 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_ output_shape_data, ] - verify_func(func, arguments, xpected) + verify_func(executor_kind, func, arguments, xpected) verify_sparse_to_dense(1, 3, 0, [5], [0, 3, 0, 0, 0]) # scalar verify_sparse_to_dense([0, 1, 4], [3, 3, 3], 0, [5], [3, 3, 0, 0, 3]) # vector @@ -301,7 +307,7 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_ @pytest.mark.parametrize("dtype", [np.int64, np.int32]) @pytest.mark.parametrize("use_dyn", [True, False]) def test_sparse_fill_empty_rows( - sparse_indices, sparse_values, dense_shape, default_value, dtype, use_dyn + sparse_indices, sparse_values, dense_shape, default_value, dtype, use_dyn, executor_kind ): def ref_sparse_fill_empty_rows( sparse_indices: np.ndarray, @@ -404,6 +410,7 @@ def verify_sparse_fill_empty_rows( assert empty_row_indicator_infer_type.checked_type.dtype == "bool" verify_func( + executor_kind, func, [sparse_indices_np, sparse_values_np, dense_shape_np, default_value_np], ref_res, diff --git a/tests/python/relay/dyn/test_dynamic_op_level5.py b/tests/python/relay/dyn/test_dynamic_op_level5.py index 2eeeb1d828c9..58234929c7bb 100644 --- a/tests/python/relay/dyn/test_dynamic_op_level5.py +++ b/tests/python/relay/dyn/test_dynamic_op_level5.py @@ -26,6 +26,8 @@ import tvm.topi.testing import tvm.testing +executor_kind = tvm.testing.parameter("debug", "vm") + def test_resize2d_infer_type(): n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") @@ -37,7 +39,7 @@ def test_resize2d_infer_type(): @tvm.testing.uses_gpu -def test_resize2d(): +def test_resize2d(executor_kind): def verify_resize2d(dshape, scale, method, layout): if layout == "NHWC": size = (dshape[1] * scale, dshape[2] * scale) @@ -62,12 +64,11 @@ def verify_resize2d(dshape, scale, method, layout): ) for target, dev in tvm.testing.enabled_targets(): - for kind in ["vm", "debug"]: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()( - x_data, size - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6) + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor( + executor_kind, mod=mod, device=dev, target=target + ).evaluate()(x_data, size) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6) for method in ["linear", "nearest_neighbor"]: for layout in ["NCHW", "NHWC"]: diff --git a/tests/python/relay/dyn/test_dynamic_op_level6.py b/tests/python/relay/dyn/test_dynamic_op_level6.py index 530c402b2947..ebf9c36263be 100644 --- a/tests/python/relay/dyn/test_dynamic_op_level6.py +++ b/tests/python/relay/dyn/test_dynamic_op_level6.py @@ -22,9 +22,11 @@ from tvm import relay import tvm.testing +executor_kind = tvm.testing.parameter("debug", "vm") + @tvm.testing.uses_gpu -def test_dynamic_topk(): +def test_dynamic_topk(executor_kind): def verify_topk(k, axis, ret_type, is_ascend, dtype): shape = (20, 100) x = relay.var("x", relay.TensorType(shape, "float32")) @@ -53,18 +55,17 @@ def verify_topk(k, axis, ret_type, is_ascend, dtype): np_indices = np_indices.astype(dtype) for target, dev in tvm.testing.enabled_targets(): - for kind in ["vm", "debug"]: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()( - np_data, np.array([k]).astype("float32") - ) - if ret_type == "both": - tvm.testing.assert_allclose(op_res[0].numpy(), np_values) - tvm.testing.assert_allclose(op_res[1].numpy(), np_indices) - elif ret_type == "values": - tvm.testing.assert_allclose(op_res.numpy(), np_values) - else: - tvm.testing.assert_allclose(op_res.numpy(), np_indices) + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor( + executor_kind, mod=mod, device=dev, target=target + ).evaluate()(np_data, np.array([k]).astype("float32")) + if ret_type == "both": + tvm.testing.assert_allclose(op_res[0].numpy(), np_values) + tvm.testing.assert_allclose(op_res[1].numpy(), np_indices) + elif ret_type == "values": + tvm.testing.assert_allclose(op_res.numpy(), np_values) + else: + tvm.testing.assert_allclose(op_res.numpy(), np_indices) np.random.seed(0) for k in [0, 1, 5]: diff --git a/tests/python/relay/test_op_grad_level1.py b/tests/python/relay/test_op_grad_level1.py index a31191a42c48..cb94f297cfa3 100644 --- a/tests/python/relay/test_op_grad_level1.py +++ b/tests/python/relay/test_op_grad_level1.py @@ -26,6 +26,8 @@ from tvm.relay.testing import check_grad, run_infer_type from tvm.relay.transform import gradient +executor_kind = tvm.testing.parameter("debug") + def sigmoid(x): one = np.ones_like(x) @@ -67,7 +69,7 @@ class TestUnaryOp: dtype = tvm.testing.parameter("float32", "float64") shape = tvm.testing.parameter((10, 4)) - def test_op(self, target, dev, relay_op, ref_func, shape, dtype): + def test_op(self, target, dev, executor_kind, relay_op, ref_func, shape, dtype): target = tvm.target.Target(target) if target.kind.name == "vulkan": @@ -125,9 +127,9 @@ def test_op(self, target, dev, relay_op, ref_func, shape, dtype): grad_in = np.random.rand(*shape).astype(dtype) ref_grad_out = ref_func(data_in, grad_in) - op_res, (op_grad, _) = relay.create_executor(device=dev, target=target).evaluate(bwd_func)( - data_in, grad_in - ) + op_res, (op_grad, _) = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(bwd_func)(data_in, grad_in) np.testing.assert_allclose(op_grad.numpy(), ref_grad_out, rtol=0.01) @@ -143,7 +145,7 @@ class TestBinaryOp: dtype = tvm.testing.parameter("float32", "float64") shape = tvm.testing.parameter((5, 10, 5)) - def test_binary_op(self, target, dev, relay_op, ref_func, shape, dtype): + def test_binary_op(self, target, dev, executor_kind, relay_op, ref_func, shape, dtype): t = relay.TensorType(shape, dtype=dtype) x = relay.var("x", t) y = relay.var("y", t) @@ -156,31 +158,31 @@ def test_binary_op(self, target, dev, relay_op, ref_func, shape, dtype): fwd_func = run_infer_type(fwd_func) bwd_func = run_infer_type(gradient(fwd_func)) - op_res, (op_grad0, op_grad1) = relay.create_executor(device=dev, target=target).evaluate( - bwd_func - )(x_data, y_data) + op_res, (op_grad0, op_grad1) = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(bwd_func)(x_data, y_data) np.testing.assert_allclose(op_grad0.numpy(), ref_grad0, rtol=0.01) np.testing.assert_allclose(op_grad1.numpy(), ref_grad1, rtol=0.01) -def test_softmax_grad(target, dev): +def test_softmax_grad(executor_kind, target, dev): target = tvm.target.Target(target) if target.kind.name == "vulkan": pytest.xfail("Known failure on vulkan") data = relay.var("data", relay.TensorType((1, 16), "float64")) fwd_func = relay.Function([data], relay.nn.softmax(data)) - check_grad(fwd_func, scale=1, target_devices=[(target, dev)]) + check_grad(fwd_func, scale=1, target_devices=[(target, dev)], executor_kind=executor_kind) -def test_log_softmax_grad(target, dev): +def test_log_softmax_grad(executor_kind, target, dev): target = tvm.target.Target(target) if target.kind.name == "vulkan": pytest.xfail("Known failure on vulkan") data = relay.var("data", relay.TensorType((2, 16), "float64")) fwd_func = relay.Function([data], relay.nn.log_softmax(data)) - check_grad(fwd_func, scale=1, target_devices=[(target, dev)]) + check_grad(fwd_func, scale=1, target_devices=[(target, dev)], executor_kind=executor_kind) class TestBiasAddGrad: @@ -191,25 +193,25 @@ class TestBiasAddGrad: ((4, 8), (8,), 1), ) - def test_bias_add(self, target, dev, d_shape, b_shape, axis): + def test_bias_add(self, executor_kind, target, dev, d_shape, b_shape, axis): data = relay.var("data", relay.TensorType(d_shape, "float32")) bias = relay.var("bias", relay.TensorType(b_shape, "float32")) fwd_func = relay.Function([data, bias], relay.nn.bias_add(data, bias, axis=axis)) - check_grad(fwd_func, target_devices=[(target, dev)]) + check_grad(fwd_func, target_devices=[(target, dev)], executor_kind=executor_kind) -def test_expand_dims_grad(target, dev): +def test_expand_dims_grad(executor_kind, target, dev): data = relay.var("data", shape=(2, 3), dtype="float64") fwd_func = relay.Function([data], relay.expand_dims(data, axis=1, num_newaxis=2)) - check_grad(fwd_func, target_devices=[(target, dev)]) + check_grad(fwd_func, target_devices=[(target, dev)], executor_kind=executor_kind) -def test_concatenate_grad(target, dev): +def test_concatenate_grad(executor_kind, target, dev): x = relay.var("x", shape=(2, 2, 5)) y = relay.var("y", shape=(2, 1, 5)) z = relay.var("z", shape=(2, 4, 5)) fwd_func = relay.Function([x, y, z], relay.concatenate([x, y, z], axis=1)) - check_grad(fwd_func, target_devices=[(target, dev)]) + check_grad(fwd_func, target_devices=[(target, dev)], executor_kind=executor_kind) if __name__ == "__main__": diff --git a/tests/python/relay/test_op_grad_level10.py b/tests/python/relay/test_op_grad_level10.py index 4c2c9082e044..6b2531a4a1f6 100644 --- a/tests/python/relay/test_op_grad_level10.py +++ b/tests/python/relay/test_op_grad_level10.py @@ -28,9 +28,10 @@ index_dtype = tvm.testing.parameter("int32", "int64") val_dtype = tvm.testing.parameter("float32", "float64") +executor_kind = tvm.testing.parameter("debug") -def test_cross_entropy_grad(target, dev, val_dtype): +def test_cross_entropy_grad(executor_kind, target, dev, val_dtype): target = tvm.target.Target(target) if target.kind.name == "vulkan" and val_dtype == "float64": # GLSL.std.450's Log implementation only takes 16/32-bit floats. @@ -44,10 +45,11 @@ def test_cross_entropy_grad(target, dev, val_dtype): scale=0.1, mean=1, target_devices=[(target, dev)], + executor_kind=executor_kind, ) -def test_cross_entropy_with_logits_grad(target, dev, val_dtype): +def test_cross_entropy_with_logits_grad(executor_kind, target, dev, val_dtype): x = relay.var("x", shape=(2, 5), dtype=val_dtype) y = relay.var("y", shape=(2, 5), dtype=val_dtype) check_grad( @@ -56,13 +58,16 @@ def test_cross_entropy_with_logits_grad(target, dev, val_dtype): scale=0.1, mean=1, target_devices=[(target, dev)], + executor_kind=executor_kind, ) -def test_checkpoint(target, dev): +def test_checkpoint(executor_kind, target, dev): inputs = [relay.var("x{}".format(i), shape=(1,)) for i in range(4)] output = relay.multiply(relay.add(inputs[0], inputs[1]), relay.add(inputs[2], inputs[3])) - check_grad(relay.Function(inputs, relay.annotation.checkpoint(output))) + check_grad( + relay.Function(inputs, relay.annotation.checkpoint(output)), executor_kind=executor_kind + ) scope = relay.ScopeBuilder() out_tuple = scope.let( @@ -76,7 +81,11 @@ def test_checkpoint(target, dev): ) ) out_single = scope.get() - check_grad(relay.Function(inputs, out_single), target_devices=[(target, dev)]) + check_grad( + relay.Function(inputs, out_single), + target_devices=[(target, dev)], + executor_kind=executor_kind, + ) class TestBatchMatmulGrad: @@ -87,7 +96,9 @@ class TestBatchMatmulGrad: ((2, 5, 3), (2, 4, 5), True, True), ) - def test_batch_matmul_grad(self, target, dev, a_shape, b_shape, transpose_a, transpose_b): + def test_batch_matmul_grad( + self, executor_kind, target, dev, a_shape, b_shape, transpose_a, transpose_b + ): tensor_a = relay.var("tensor_a", relay.TensorType(a_shape, "float32")) tensor_b = relay.var("tensor_b", relay.TensorType(b_shape, "float32")) check_grad( @@ -98,18 +109,20 @@ def test_batch_matmul_grad(self, target, dev, a_shape, b_shape, transpose_a, tra ), ), target_devices=[(target, dev)], + executor_kind=executor_kind, ) -def test_reverse_reshape_grad(target, dev): +def test_reverse_reshape_grad(executor_kind, target, dev): x = relay.var("x", shape=(3, 4, 5), dtype="float64") check_grad( relay.Function([x], relay.op.reverse_reshape(x, (-1, 0))), target_devices=[(target, dev)], + executor_kind=executor_kind, ) -def test_one_hot_grad(target, dev, index_dtype, val_dtype): +def test_one_hot_grad(executor_kind, target, dev, index_dtype, val_dtype): indices_shape = (3, 4) depth = 5 axis = -1 @@ -127,7 +140,13 @@ def test_one_hot_grad(target, dev, index_dtype, val_dtype): y = relay.one_hot(indices, on_val, off_val, depth, axis, val_dtype) f = relay.Function([indices, on_val, off_val], y) - check_grad(f, inputs=inputs, test_inputs=test_inputs, target_devices=[(target, dev)]) + check_grad( + f, + inputs=inputs, + test_inputs=test_inputs, + target_devices=[(target, dev)], + executor_kind=executor_kind, + ) if __name__ == "__main__": diff --git a/tests/python/relay/test_op_grad_level2.py b/tests/python/relay/test_op_grad_level2.py index fcdcfe6accd8..820f724bfc43 100644 --- a/tests/python/relay/test_op_grad_level2.py +++ b/tests/python/relay/test_op_grad_level2.py @@ -25,8 +25,10 @@ from tvm.relay.transform import gradient import tvm.testing +executor_kind = tvm.testing.parameter("debug") -def verify_max_pool2d_grad(x_shape, pool_size, strides, padding, ceil_mode): + +def verify_max_pool2d_grad(executor_kind, x_shape, pool_size, strides, padding, ceil_mode): x = relay.var("x", relay.TensorType(x_shape, "float32")) y = tvm.relay.nn.max_pool2d( x, pool_size=pool_size, strides=strides, padding=padding, ceil_mode=ceil_mode @@ -51,24 +53,41 @@ def verify_max_pool2d_grad(x_shape, pool_size, strides, padding, ceil_mode): ) for target, dev in tvm.testing.enabled_targets(): - op_res, (op_grad,) = relay.create_executor(device=dev, target=target).evaluate(bwd_func)( - data - ) + op_res, (op_grad,) = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(bwd_func)(data) np.testing.assert_allclose(op_grad.numpy(), ref_grad, rtol=0.01) @tvm.testing.uses_gpu -def test_max_pool2d_grad(): +def test_max_pool2d_grad(executor_kind): verify_max_pool2d_grad( - (1, 4, 16, 16), pool_size=(2, 2), strides=(2, 2), padding=(0, 0), ceil_mode=False + executor_kind, + (1, 4, 16, 16), + pool_size=(2, 2), + strides=(2, 2), + padding=(0, 0), + ceil_mode=False, ) verify_max_pool2d_grad( - (1, 4, 16, 16), pool_size=(1, 1), strides=(1, 1), padding=(1, 1), ceil_mode=False + executor_kind, + (1, 4, 16, 16), + pool_size=(1, 1), + strides=(1, 1), + padding=(1, 1), + ceil_mode=False, ) def verify_avg_pool2d_grad( - x_shape, pool_size, strides, padding, ceil_mode, count_include_pad, dtype="float32" + x_shape, + pool_size, + strides, + padding, + ceil_mode, + count_include_pad, + executor_kind, + dtype="float32", ): for shape_dtype in ["int32", "int64"]: @@ -101,14 +120,14 @@ def verify_avg_pool2d_grad( ) for target, dev in tvm.testing.enabled_targets(): - op_res, (op_grad,) = relay.create_executor(device=dev, target=target).evaluate( - bwd_func - )(data) + op_res, (op_grad,) = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(bwd_func)(data) np.testing.assert_allclose(op_grad.numpy(), ref_grad, rtol=0.01) @tvm.testing.uses_gpu -def test_avg_pool2d_grad(): +def test_avg_pool2d_grad(executor_kind): verify_avg_pool2d_grad( (1, 4, 16, 16), pool_size=(2, 2), @@ -116,6 +135,7 @@ def test_avg_pool2d_grad(): padding=(0, 0), ceil_mode=False, count_include_pad=True, + executor_kind=executor_kind, ) verify_avg_pool2d_grad( (1, 4, 16, 16), @@ -124,6 +144,7 @@ def test_avg_pool2d_grad(): padding=(1, 1), ceil_mode=False, count_include_pad=False, + executor_kind=executor_kind, ) verify_avg_pool2d_grad( (1, 4, 16, 16), @@ -132,11 +153,12 @@ def test_avg_pool2d_grad(): padding=(1, 1), ceil_mode=False, count_include_pad=False, + executor_kind=executor_kind, dtype="int32", ) -def verify_global_avg_pool2d_grad(x_shape): +def verify_global_avg_pool2d_grad(executor_kind, x_shape): x = relay.var("x", relay.TensorType(x_shape, "float32")) y = tvm.relay.nn.global_avg_pool2d(x) @@ -158,19 +180,21 @@ def verify_global_avg_pool2d_grad(x_shape): ) for target, dev in tvm.testing.enabled_targets(): - op_res, (op_grad,) = relay.create_executor(device=dev, target=target).evaluate(bwd_func)( - data - ) + op_res, (op_grad,) = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(bwd_func)(data) np.testing.assert_allclose(op_grad.numpy(), ref_grad, rtol=0.01) @tvm.testing.uses_gpu -def test_global_avg_pool2d_grad(): - verify_global_avg_pool2d_grad((1, 4, 16, 16)) - verify_global_avg_pool2d_grad((1, 8, 8, 24)) +def test_global_avg_pool2d_grad(executor_kind): + verify_global_avg_pool2d_grad(executor_kind, (1, 4, 16, 16)) + verify_global_avg_pool2d_grad(executor_kind, (1, 8, 8, 24)) -def verify_conv2d_grad(dshape, wshape, strides, padding, dilation, groups=1, mode="higher_order"): +def verify_conv2d_grad( + dshape, wshape, strides, padding, dilation, groups=1, mode="higher_order", executor_kind="vm" +): dtype = "float32" data = relay.var("data", shape=dshape, dtype=dtype) weight = relay.var("weight", shape=wshape, dtype=dtype) @@ -184,59 +208,73 @@ def verify_conv2d_grad(dshape, wshape, strides, padding, dilation, groups=1, mod out_dtype=dtype, ) fwd_func = relay.Function([data, weight], conv) - check_grad(fwd_func, mode=mode) + check_grad(fwd_func, mode=mode, executor_kind=executor_kind) @tvm.testing.uses_gpu -def test_conv2d_grad(): - verify_conv2d_grad((1, 4, 16, 16), (16, 4, 3, 3), [1, 1], [1, 1], [1, 1]) - verify_conv2d_grad((1, 4, 16, 16), (16, 4, 1, 1), [1, 1], [0, 0], [1, 1]) - verify_conv2d_grad((1, 4, 16, 16), (16, 4, 1, 1), [2, 2], [0, 0], [1, 1]) - verify_conv2d_grad((1, 4, 16, 16), (16, 4, 3, 3), [1, 1], [1, 1], [1, 1], mode="first_order") +def test_conv2d_grad(executor_kind): + verify_conv2d_grad( + (1, 4, 16, 16), (16, 4, 3, 3), [1, 1], [1, 1], [1, 1], executor_kind=executor_kind + ) + verify_conv2d_grad( + (1, 4, 16, 16), (16, 4, 1, 1), [1, 1], [0, 0], [1, 1], executor_kind=executor_kind + ) + verify_conv2d_grad( + (1, 4, 16, 16), (16, 4, 1, 1), [2, 2], [0, 0], [1, 1], executor_kind=executor_kind + ) + verify_conv2d_grad( + (1, 4, 16, 16), + (16, 4, 3, 3), + [1, 1], + [1, 1], + [1, 1], + mode="first_order", + executor_kind=executor_kind, + ) -def verify_dense_grad(d_shape, w_shape): +def verify_dense_grad(d_shape, w_shape, executor_kind): data = relay.var("data", relay.TensorType(d_shape, "float32")) weight = relay.var("weight", relay.TensorType(w_shape, "float32")) fwd_func = relay.Function([data, weight], relay.nn.dense(data, weight)) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_dense_grad(): - verify_dense_grad((1, 8), (16, 8)) - verify_dense_grad((1, 4), (3, 4)) - verify_dense_grad((5, 4), (3, 4)) +def test_dense_grad(executor_kind): + verify_dense_grad((1, 8), (16, 8), executor_kind) + verify_dense_grad((1, 4), (3, 4), executor_kind) + verify_dense_grad((5, 4), (3, 4), executor_kind) -def verify_matmul_grad(a_shape, b_shape, transpose_a, transpose_b): +def verify_matmul_grad(a_shape, b_shape, transpose_a, transpose_b, executor_kind): tensor_a = relay.var("tensor_a", relay.TensorType(a_shape, "float32")) tensor_b = relay.var("tensor_b", relay.TensorType(b_shape, "float32")) fwd_func = relay.Function( [tensor_a, tensor_b], relay.nn.matmul(tensor_a, tensor_b, transpose_a=transpose_a, transpose_b=transpose_b), ) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_matmul_grad(): - verify_matmul_grad((1, 8), (8, 16), False, False) - verify_matmul_grad((4, 1), (4, 3), True, False) - verify_matmul_grad((4, 5), (3, 4), True, True) +def test_matmul_grad(executor_kind): + verify_matmul_grad((1, 8), (8, 16), False, False, executor_kind) + verify_matmul_grad((4, 1), (4, 3), True, False, executor_kind) + verify_matmul_grad((4, 5), (3, 4), True, True, executor_kind) -def verify_batch_flatten_grad(d_shape): +def verify_batch_flatten_grad(d_shape, executor_kind): data = relay.var("data", relay.TensorType(d_shape, "float32")) fwd_func = relay.Function([data], relay.nn.batch_flatten(data)) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_batch_flatten_grad(): - verify_batch_flatten_grad((1, 2, 3, 4)) - verify_batch_flatten_grad((1, 8)) +def test_batch_flatten_grad(executor_kind): + verify_batch_flatten_grad((1, 2, 3, 4), executor_kind) + verify_batch_flatten_grad((1, 8), executor_kind) def verify_conv2d_backward_weight( - dy_shape, x_shape, kernel_size, stride, padding, groups=1, out_channels=None + executor_kind, dy_shape, x_shape, kernel_size, stride, padding, groups=1, out_channels=None ): dtype = "float32" dy = relay.var("dy", shape=dy_shape, dtype=dtype) @@ -265,7 +303,11 @@ def verify_conv2d_backward_weight( dy_np = np.random.randn(*dy_shape).astype(dtype) x_np = np.random.randn(*x_shape).astype(dtype) - dw_np = relay.create_executor(device=dev, target=target).evaluate(dw)(dy_np, x_np).numpy() + dw_np = ( + relay.create_executor(executor_kind, device=dev, target=target) + .evaluate(dw)(dy_np, x_np) + .numpy() + ) ref_dw_np = tvm.topi.testing.conv2d_backward_weight_python( dy_np, x_np, kernel_size, stride, padding, groups=groups, channels=out_channels ) @@ -273,11 +315,22 @@ def verify_conv2d_backward_weight( np.testing.assert_allclose(dw_np, ref_dw_np, rtol=1e-4, atol=1e-4) -def test_conv2d_backward_weight(): - verify_conv2d_backward_weight((2, 8, 32, 32), (2, 4, 32, 32), (3, 3), (1, 1), (1, 1)) - verify_conv2d_backward_weight((2, 16, 15, 15), (2, 3, 32, 32), (3, 3), (2, 2), (0, 0)) +def test_conv2d_backward_weight(executor_kind): + verify_conv2d_backward_weight( + executor_kind, (2, 8, 32, 32), (2, 4, 32, 32), (3, 3), (1, 1), (1, 1) + ) + verify_conv2d_backward_weight( + executor_kind, (2, 16, 15, 15), (2, 3, 32, 32), (3, 3), (2, 2), (0, 0) + ) verify_conv2d_backward_weight( - (1, 16, 32, 32), (1, 16, 32, 32), (3, 3), (1, 1), (1, 1), groups=16, out_channels=16 + executor_kind, + (1, 16, 32, 32), + (1, 16, 32, 32), + (3, 3), + (1, 1), + (1, 1), + groups=16, + out_channels=16, ) diff --git a/tests/python/relay/test_op_grad_level3.py b/tests/python/relay/test_op_grad_level3.py index 30d849853d87..89b8199b9e22 100644 --- a/tests/python/relay/test_op_grad_level3.py +++ b/tests/python/relay/test_op_grad_level3.py @@ -24,9 +24,11 @@ from tvm.relay.transform import gradient import tvm.testing +executor_kind = tvm.testing.parameter("debug") + @tvm.testing.uses_gpu -def test_clip(): +def test_clip(executor_kind): for dtype in ("float32", "float64"): ref = lambda x: np.where( x > 10.0, np.zeros_like(x), np.where(x < 1.0, np.zeros_like(x), np.ones_like(x)) @@ -41,49 +43,49 @@ def test_clip(): bwd_func = run_infer_type(gradient(fwd_func)) for target, dev in tvm.testing.enabled_targets(): - op_res, (op_grad,) = relay.create_executor(device=dev, target=target).evaluate( - bwd_func - )(data) + op_res, (op_grad,) = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(bwd_func)(data) np.testing.assert_allclose(op_grad.numpy(), ref_grad, rtol=0.01) -def verify_transpose_grad(d_shape, axes=None): +def verify_transpose_grad(d_shape, axes=None, executor_kind="vm"): data = relay.var("data", relay.TensorType(d_shape, "float32")) fwd_func = relay.Function([data], relay.transpose(data, axes=axes)) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_transpose_grad(): - verify_transpose_grad((1, 2, 3, 4)) - verify_transpose_grad((1, 2, 3, 4), axes=(0, 2, 3, 1)) +def test_transpose_grad(executor_kind): + verify_transpose_grad((1, 2, 3, 4), executor_kind=executor_kind) + verify_transpose_grad((1, 2, 3, 4), axes=(0, 2, 3, 1), executor_kind=executor_kind) -def test_negative_grad(): +def test_negative_grad(executor_kind): data = relay.var("data", relay.TensorType((10, 4), "float32")) fwd_func = relay.Function([data], relay.negative(data)) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_cast_grad(): +def test_cast_grad(executor_kind): data = relay.var("data", relay.TensorType((10, 4), "float32")) fwd_func = relay.Function([data], relay.cast(data, "float64")) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_cast_like_grad(): +def test_cast_like_grad(executor_kind): data = relay.var("data", shape=(10, 4), dtype="float32") like = relay.var("like", shape=(1,), dtype="float64") fwd_func = relay.Function([data, like], relay.cast_like(data, like)) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_copy_grad(): +def test_copy_grad(executor_kind): data = relay.var("data", relay.TensorType((10, 4), "float64")) fwd_func = relay.Function([data], relay.copy(data)) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_take_grad(): +def test_take_grad(executor_kind): data_dtype = relay.TensorType((3, 4, 5), "float64") data = relay.var("data", data_dtype) indices = relay.var("indices", relay.TensorType((relay.Any(),), "int32")) @@ -92,28 +94,28 @@ def test_take_grad(): # take on axis fwd_func = relay.Function([data, indices], relay.take(data, indices, axis=1)) - check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs) + check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs, executor_kind=executor_kind) # take on flattened fwd_func = relay.Function([data, indices], relay.take(data, indices, axis=None)) - check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs) + check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs, executor_kind=executor_kind) -def test_stack_grad(): +def test_stack_grad(executor_kind): args = [relay.var(c, shape=(2, 3, 4), dtype="float64") for c in "xyz"] fwd_func = relay.Function(args, relay.stack(args, axis=0)) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_squeeze_grad(): +def test_squeeze_grad(executor_kind): data = relay.var("data", shape=(2, 1, 1, 3, 4, 1), dtype="float64") fwd_func = relay.Function([data], relay.squeeze(data)) fwd_func_subset = relay.Function([data], relay.squeeze(data, axis=[1, -1])) - check_grad(fwd_func) - check_grad(fwd_func_subset) + check_grad(fwd_func, executor_kind=executor_kind) + check_grad(fwd_func_subset, executor_kind=executor_kind) -def test_arange_grad(): +def test_arange_grad(executor_kind): # TODO: testing arange numerically is strange because two-sided approx can # produce different output shapes dtype = "float64" @@ -122,23 +124,25 @@ def test_arange_grad(): step = relay.var("step", relay.TensorType((), dtype)) values = [np.array(v, dtype=dtype) for v in [2.5, 9.5, 1.8]] fwd_func = relay.Function([start, stop, step], relay.arange(start, stop, step, dtype)) - check_grad(fwd_func, inputs=values) + check_grad(fwd_func, inputs=values, executor_kind=executor_kind) -def test_gather_nd_grad(): +def test_gather_nd_grad(executor_kind): data = relay.var("data", relay.TensorType((2, 3), "float64")) indices = relay.var("indices", relay.TensorType((2, 4), "int64")) fwd = relay.Function([data, indices], relay.gather_nd(data, indices)) data_np = np.random.rand(2, 3).astype("float64") indices_np = np.array([[0, 1, 1, 0], [0, 1, 0, 0]], dtype="int64") - check_grad(fwd, inputs=[data_np, indices_np], test_inputs=[data_np]) + check_grad( + fwd, inputs=[data_np, indices_np], test_inputs=[data_np], executor_kind=executor_kind + ) -def test_reshape_like_grad(): +def test_reshape_like_grad(executor_kind): data = relay.var("data", shape=(2, 3, 4), dtype="float32") shape_like = relay.var("shape_like", shape=(6, 2, 2), dtype="float32") fwd_func = relay.Function([data, shape_like], relay.reshape_like(data, shape_like)) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) def test_zeros_ones_grad_const_ints(): @@ -172,7 +176,7 @@ def test_zeros_ones_grad_const_expr(): tvm.ir.assert_structural_equal(bwd_func.ret_type, expected_ty_dyn) -def test_zeros_ones_grad_dynamic(): +def test_zeros_ones_grad_dynamic(executor_kind): rank = np.random.randint(low=1, high=5, dtype="int32") dyn_shape = np.random.randint(low=1, high=4, size=(rank,), dtype="int32") shape_data = relay.var("shape_data", shape=(rank,), dtype="int32") @@ -182,9 +186,9 @@ def test_zeros_ones_grad_dynamic(): bwd_func = run_infer_type(gradient(run_infer_type(fwd_func))) for target, dev in tvm.testing.enabled_targets(): - res, (grad,) = relay.create_executor(device=dev, target=target).evaluate(bwd_func)( - dyn_shape - ) + res, (grad,) = relay.create_executor(executor_kind, device=dev, target=target).evaluate( + bwd_func + )(dyn_shape) tvm.testing.assert_allclose(res.numpy(), op_ref(dyn_shape, dtype="float32")) tvm.testing.assert_allclose(grad.numpy(), np.zeros((rank,), dtype="int32")) diff --git a/tests/python/relay/test_op_grad_level4.py b/tests/python/relay/test_op_grad_level4.py index 17d30cacac41..9ed2ef262777 100644 --- a/tests/python/relay/test_op_grad_level4.py +++ b/tests/python/relay/test_op_grad_level4.py @@ -16,43 +16,46 @@ # under the License. import pytest import numpy as np +import tvm.testing from tvm import relay from tvm.relay.testing import check_grad, _np_randn_from_type +executor_kind = tvm.testing.parameter("debug") -def verify_reduction_grad(red_fn, d_shape, axis=None, keepdims=False, exclude=False): + +def verify_reduction_grad(executor_kind, red_fn, d_shape, axis=None, keepdims=False, exclude=False): data = relay.var("data", relay.TensorType(d_shape, "float32")) fwd_func = relay.Function([data], red_fn(data, axis=axis, keepdims=keepdims, exclude=exclude)) - check_grad(fwd_func) + check_grad(fwd_func, executor_kind=executor_kind) -def test_reduction_grad(): +def test_reduction_grad(executor_kind): def _unbiased_variance(x, axis=None, keepdims=False, exclude=False): return relay.variance(x, axis=axis, keepdims=keepdims, exclude=exclude, unbiased=True) for op in (relay.sum, relay.variance, _unbiased_variance, relay.mean): - verify_reduction_grad(op, (4, 2)) - verify_reduction_grad(op, (4, 2), axis=-1, keepdims=True) - verify_reduction_grad(op, (4, 2, 1), axis=(1, 2), exclude=True) - verify_reduction_grad(op, (4, 2, 1), axis=1) + verify_reduction_grad(executor_kind, op, (4, 2)) + verify_reduction_grad(executor_kind, op, (4, 2), axis=-1, keepdims=True) + verify_reduction_grad(executor_kind, op, (4, 2, 1), axis=(1, 2), exclude=True) + verify_reduction_grad(executor_kind, op, (4, 2, 1), axis=1) -def verify_max_grad(d_shape, axis=None, keepdims=False, exclude=False): +def verify_max_grad(executor_kind, d_shape, axis=None, keepdims=False, exclude=False): data = relay.var("data", relay.TensorType(d_shape, "float32")) fwd_func = relay.Function( [data], relay.max(data, axis=axis, keepdims=keepdims, exclude=exclude) ) - check_grad(fwd_func, scale=1e-3) + check_grad(fwd_func, scale=1e-3, executor_kind=executor_kind) -def test_max_grad(): - verify_max_grad((10, 10), axis=None) - verify_max_grad((10, 10), axis=-1) - verify_max_grad((6, 3, 2), axis=(1, 2), keepdims=True) - verify_max_grad((5, 4, 3), axis=(0, 2), exclude=True) +def test_max_grad(executor_kind): + verify_max_grad(executor_kind, (10, 10), axis=None) + verify_max_grad(executor_kind, (10, 10), axis=-1) + verify_max_grad(executor_kind, (6, 3, 2), axis=(1, 2), keepdims=True) + verify_max_grad(executor_kind, (5, 4, 3), axis=(0, 2), exclude=True) -def test_where_grad(): +def test_where_grad(executor_kind): cond_type = relay.TensorType((2, 3, 4), "int32") lhs_type = relay.TensorType((1, 3, 4), "float32") rhs_type = relay.TensorType((2, 1, 4), "float32") @@ -66,10 +69,10 @@ def test_where_grad(): lhs = relay.var("lhs", type_annotation=lhs_type) rhs = relay.var("rhs", type_annotation=rhs_type) fwd_func = relay.Function([cond, lhs, rhs], relay.where(cond, lhs, rhs)) - check_grad(fwd_func, inputs=inputs, test_inputs=inputs[1:]) + check_grad(fwd_func, inputs=inputs, test_inputs=inputs[1:], executor_kind=executor_kind) -def test_less_equal_grad(): +def test_less_equal_grad(executor_kind): x_type = relay.TensorType((2, 3, 4), "float32") y_type = relay.TensorType((3, 1), "float32") # We need to generate inputs far apart to get correct numerical gradients @@ -83,10 +86,10 @@ def test_less_equal_grad(): x = relay.var("x", type_annotation=x_type) y = relay.var("y", type_annotation=y_type) fwd_func = relay.Function([x, y], relay.less_equal(x, y)) - check_grad(fwd_func, inputs=inputs, test_inputs=inputs, eps=1e-6) + check_grad(fwd_func, inputs=inputs, test_inputs=inputs, eps=1e-6, executor_kind=executor_kind) -def test_not_equal_grad(): +def test_not_equal_grad(executor_kind): x_type = relay.TensorType((2, 3, 4), "float32") y_type = relay.TensorType((3, 1), "float32") # We need to generate inputs far apart to get correct numerical gradients @@ -100,17 +103,17 @@ def test_not_equal_grad(): x = relay.var("x", type_annotation=x_type) y = relay.var("y", type_annotation=y_type) fwd_func = relay.Function([x, y], relay.not_equal(x, y)) - check_grad(fwd_func, inputs=inputs, test_inputs=inputs, eps=1e-6) + check_grad(fwd_func, inputs=inputs, test_inputs=inputs, eps=1e-6, executor_kind=executor_kind) -def test_strided_slice_grad(): +def test_strided_slice_grad(executor_kind): def check(sh, dtype, begin, end, strides, slice_mode): x = relay.var("x", shape=sh, dtype=dtype) f = relay.Function( [x], relay.strided_slice(x, begin=begin, end=end, strides=strides, slice_mode=slice_mode), ) - check_grad(f) + check_grad(f, executor_kind=executor_kind) check((2, 3, 4), "float32", (0, 1, 0), (-1, -1, 1), (1, 1, 1), "size") check((2, 3, 4), "float32", (0, 1, 0), (2, 3, 1), (1, 1, 1), "end") diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py index d4238f81e01b..1b72e5ce5137 100644 --- a/tests/python/relay/test_op_level1.py +++ b/tests/python/relay/test_op_level1.py @@ -26,6 +26,8 @@ from tvm.contrib.nvcc import have_fp16 import tvm.testing +executor_kind = tvm.testing.parameter("graph", "vm") + def sigmoid(x): one = np.ones_like(x) @@ -286,7 +288,7 @@ def test_log_softmax(): @tvm.testing.uses_gpu -def test_concatenate(): +def test_concatenate(executor_kind): for dtype in ["float16", "float32"]: n, t, d = te.size_var("n"), te.size_var("t"), 100 x = relay.var("x", shape=(n, t, d)) @@ -336,17 +338,13 @@ def test_concatenate(): and not have_fp16(tvm.cuda(0).compute_version) ): continue - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( - x_data, y_data, t_data - ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=0.01) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)( + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( x_data, y_data, t_data ) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=0.01) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=0.01) -def test_dropout(): +def test_dropout(executor_kind): for dtype in ["float16", "float32"]: n, t, d = te.size_var("n"), te.size_var("t"), te.size_var("d") input_ty = relay.TensorType((n, t, d), dtype) @@ -361,9 +359,8 @@ def test_dropout(): y = relay.nn.dropout(x, rate=0.5) func = relay.Function([], y) for target, dev in tvm.testing.enabled_targets(): - for backend in ["debug", "graph"]: - op_res = relay.create_executor("debug", device=dev, target=target).evaluate(func)() - tvm.testing.assert_allclose(op_res.numpy(), in_np, rtol=0.01) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)() + tvm.testing.assert_allclose(op_res.numpy(), in_np, rtol=0.01) def test_batch_norm(): @@ -490,7 +487,7 @@ def test_matmul_type_check(): @tvm.testing.uses_gpu -def test_matmul(): +def test_matmul(executor_kind): for dtype in ["float16", "float32"]: # Matmul accuracy for float16 is poor if dtype == "float16": @@ -529,14 +526,10 @@ def test_matmul(): ref_res = np.dot(x_data.transpose(), w_data) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( x_data, w_data ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)( - x_data, w_data - ) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) @pytest.mark.xfail @@ -552,7 +545,7 @@ def test_dense_type_check(): @tvm.testing.uses_gpu -def test_dense(): +def test_dense(executor_kind): for dtype in ["float16", "float32"]: # Dense accuracy for float16 is poor if dtype == "float16": @@ -591,14 +584,10 @@ def test_dense(): ref_res = np.dot(x_data, w_data.T) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( - x_data, w_data - ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)( + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( x_data, w_data ) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) @tvm.testing.uses_gpu diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py index 8ee5adbb318d..7e0b8ad89f64 100644 --- a/tests/python/relay/test_op_level10.py +++ b/tests/python/relay/test_op_level10.py @@ -27,9 +27,11 @@ from tvm.relay import transform from tvm.relay.testing import run_infer_type +executor_kind = tvm.testing.parameter("graph", "vm") + @tvm.testing.uses_gpu -def test_checkpoint(): +def test_checkpoint(executor_kind): dtype = "float32" xs = [relay.var("x{}".format(i), dtype) for i in range(4)] f = relay.multiply(relay.add(xs[0], xs[1]), relay.add(xs[2], xs[3])) @@ -41,12 +43,11 @@ def test_checkpoint(): inputs = [np.random.uniform() for _ in range(len(xs))] for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - f_res = relay.create_executor(kind, device=dev, target=target).evaluate(f)(*inputs) - f_checkpoint_res = relay.create_executor(kind, device=dev, target=target).evaluate( - f_checkpoint - )(*inputs) - tvm.testing.assert_allclose(f_res.numpy(), f_checkpoint_res.numpy(), 0, 0) + f_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(f)(*inputs) + f_checkpoint_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate( + f_checkpoint + )(*inputs) + tvm.testing.assert_allclose(f_res.numpy(), f_checkpoint_res.numpy(), 0, 0) def test_checkpoint_alpha_equal(): @@ -171,7 +172,7 @@ def test_checkpoint_alpha_equal_tuple(): @tvm.testing.uses_gpu -def test_collapse_sum_like(): +def test_collapse_sum_like(executor_kind): shape = (3, 4, 5, 6) shape_like = (4, 5, 6) dtype = "float32" @@ -186,13 +187,14 @@ def test_collapse_sum_like(): y = np.random.uniform(size=shape_like).astype(dtype) ref_res = np.sum(x, 0) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x, y) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x, y + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) @tvm.testing.uses_gpu -def test_collapse_sum_to(): +def test_collapse_sum_to(executor_kind): shape = (3, 4, 5, 6) shape_to = (4, 5, 6) dtype = "float32" @@ -205,13 +207,12 @@ def test_collapse_sum_to(): x = np.random.uniform(size=shape).astype(dtype) ref_res = np.sum(x, 0) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(x) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) @tvm.testing.uses_gpu -def test_broadcast_to(): +def test_broadcast_to(executor_kind): shape = (4, 1, 6) shape_like = (3, 4, 5, 6) dtype = "float32" @@ -224,13 +225,12 @@ def test_broadcast_to(): x = np.random.uniform(size=shape).astype(dtype) ref_res = np.broadcast_to(x, shape_like) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(x) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) @tvm.testing.uses_gpu -def test_broadcast_to_const_shape_int64(): +def test_broadcast_to_const_shape_int64(executor_kind): shape_like = relay.const(np.array([1, 5]), dtype="int64") x = relay.var("x", shape=(1,), dtype="int64") z = relay.broadcast_to(x, shape=shape_like) @@ -241,13 +241,12 @@ def test_broadcast_to_const_shape_int64(): x = np.random.randint(10, size=(1,), dtype="int64") ref_res = np.broadcast_to(x, (5,)) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(f)(x) - tvm.testing.assert_allclose(op_res.numpy(), ref_res) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(f)(x) + tvm.testing.assert_allclose(op_res.numpy(), ref_res) @tvm.testing.uses_gpu -def test_broadcast_to_like(): +def test_broadcast_to_like(executor_kind): shape = (4, 1, 6) shape_like = (3, 4, 5, 6) dtype = "float32" @@ -264,9 +263,10 @@ def test_broadcast_to_like(): ref_res = np.broadcast_to(x, shape_like) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x, y) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x, y + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) def np_slice_like(np_data, np_shape_like, axis=None): @@ -288,7 +288,7 @@ def np_slice_like(np_data, np_shape_like, axis=None): return np_result -def verify_slice_like(data, slice_like, axes, output, dtype="float32"): +def verify_slice_like(executor_kind, data, slice_like, axes, output, dtype="float32"): x = relay.var("data", relay.TensorType(data, dtype)) y = relay.var("slice_like", relay.TensorType(slice_like, dtype)) z = relay.slice_like(x, y, axes) @@ -308,31 +308,46 @@ def verify_slice_like(data, slice_like, axes, output, dtype="float32"): ref_res = np_slice_like(x_data, y_data, axes) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - x_data, y_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data, y_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) @tvm.testing.uses_gpu -def test_slice_like(): +def test_slice_like(executor_kind): d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") - verify_slice_like(data=(d1, d2, d3), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3)) - verify_slice_like(data=(1, 2, 3), slice_like=(d1, d2, d3), axes=None, output=(d1, d2, d3)) - verify_slice_like(data=(d2, d3, d4), slice_like=(d1, d2, d3), axes=(1, 2), output=(d2, d2, d3)) - verify_slice_like(data=(3, 4, 5), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3)) - verify_slice_like(data=(3, 4, 5), slice_like=(1, 2), axes=None, output=(1, 2, 5)) - verify_slice_like(data=(3, 4, 5), slice_like=(1, 2, 3), axes=(1, 2), output=(3, 2, 3)) - verify_slice_like(data=(3, 4, 5), slice_like=(1, 2, 3), axes=(-1, -3), output=(1, 4, 3)) verify_slice_like( - data=(1, 3, 224, 224), slice_like=(1, 3, 112, 112), axes=(2, 3), output=(1, 3, 112, 112) + executor_kind, data=(d1, d2, d3), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3) + ) + verify_slice_like( + executor_kind, data=(1, 2, 3), slice_like=(d1, d2, d3), axes=None, output=(d1, d2, d3) + ) + verify_slice_like( + executor_kind, data=(d2, d3, d4), slice_like=(d1, d2, d3), axes=(1, 2), output=(d2, d2, d3) + ) + verify_slice_like( + executor_kind, data=(3, 4, 5), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3) + ) + verify_slice_like(executor_kind, data=(3, 4, 5), slice_like=(1, 2), axes=None, output=(1, 2, 5)) + verify_slice_like( + executor_kind, data=(3, 4, 5), slice_like=(1, 2, 3), axes=(1, 2), output=(3, 2, 3) + ) + verify_slice_like( + executor_kind, data=(3, 4, 5), slice_like=(1, 2, 3), axes=(-1, -3), output=(1, 4, 3) + ) + verify_slice_like( + executor_kind, + data=(1, 3, 224, 224), + slice_like=(1, 3, 112, 112), + axes=(2, 3), + output=(1, 3, 112, 112), ) @tvm.testing.uses_gpu -def test_reverse_reshape(): - def verify_reverse_reshape(shape, newshape, oshape): +def test_reverse_reshape(executor_kind): + def verify_reverse_reshape(executor_kind, shape, newshape, oshape): x = relay.var("x", relay.TensorType(shape, "float32")) z = relay.reverse_reshape(x, newshape=newshape) zz = run_infer_type(z) @@ -343,21 +358,20 @@ def verify_reverse_reshape(shape, newshape, oshape): x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32") ref_res = np.reshape(x_data, oshape) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - x_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) - verify_reverse_reshape((2, 3, 4), (4, 0, 2), (4, 3, 2)) - verify_reverse_reshape((2, 3, 4), (2, 0, 0), (2, 3, 4)) - verify_reverse_reshape((2, 3, 4), (0, -1), (3, 8)) - verify_reverse_reshape((2, 3, 4), (-1, 0), (6, 4)) - verify_reverse_reshape((2, 3, 4), (0, -3), (2, 12)) + verify_reverse_reshape(executor_kind, (2, 3, 4), (4, 0, 2), (4, 3, 2)) + verify_reverse_reshape(executor_kind, (2, 3, 4), (2, 0, 0), (2, 3, 4)) + verify_reverse_reshape(executor_kind, (2, 3, 4), (0, -1), (3, 8)) + verify_reverse_reshape(executor_kind, (2, 3, 4), (-1, 0), (6, 4)) + verify_reverse_reshape(executor_kind, (2, 3, 4), (0, -3), (2, 12)) def verify_batch_matmul_with_inputs( - x, y, x_np, y_np, out_shape, dtype="float32", trans_x=False, trans_y=True + executor_kind, x, y, x_np, y_np, out_shape, dtype="float32", trans_x=False, trans_y=True ): z = relay.nn.batch_matmul(x, y, transpose_a=trans_x, transpose_b=trans_y) zz = run_infer_type(z) @@ -368,26 +382,29 @@ def verify_batch_matmul_with_inputs( z_np = tvm.topi.testing.batch_matmul(x_np, y_np, trans_x=trans_x, trans_y=trans_y) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - if len(input_vars) == 2: - z = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - x_np, y_np - ) - else: - z = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x_np) - tvm.testing.assert_allclose(z.numpy(), z_np, rtol=1e-5, atol=1e-5) + if len(input_vars) == 2: + z = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_np, y_np + ) + else: + z = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(x_np) + tvm.testing.assert_allclose(z.numpy(), z_np, rtol=1e-5, atol=1e-5) -def verify_batch_matmul(x_shape, y_shape, out_shape, dtype="float32", trans_x=False, trans_y=True): +def verify_batch_matmul( + executor_kind, x_shape, y_shape, out_shape, dtype="float32", trans_x=False, trans_y=True +): x = relay.var("x", relay.TensorType(x_shape, dtype)) y = relay.var("y", relay.TensorType(y_shape, dtype)) x_np = np.random.uniform(size=x_shape).astype(dtype) y_np = np.random.uniform(size=y_shape).astype(dtype) - verify_batch_matmul_with_inputs(x, y, x_np, y_np, out_shape, dtype, trans_x, trans_y) + verify_batch_matmul_with_inputs( + executor_kind, x, y, x_np, y_np, out_shape, dtype, trans_x, trans_y + ) @tvm.testing.uses_gpu -def test_batch_matmul(): +def test_batch_matmul(executor_kind): b, m, n, k = te.size_var("b"), te.size_var("m"), te.size_var("n"), te.size_var("k") x = relay.var("x", relay.TensorType((b, m, k), "float32")) y = relay.var("y", relay.TensorType((b, n, k), "float32")) @@ -395,17 +412,31 @@ def test_batch_matmul(): zz = run_infer_type(z) assert zz.checked_type == relay.TensorType((b, m, n), "float32") - verify_batch_matmul((1, 16, 32), (1, 16, 32), (1, 16, 16), trans_x=False, trans_y=True) - verify_batch_matmul((5, 16, 32), (5, 16, 32), (5, 16, 16), trans_x=False, trans_y=True) - verify_batch_matmul((5, 16, 32), (5, 20, 32), (5, 16, 20), trans_x=False, trans_y=True) - verify_batch_matmul((30, 16, 32), (30, 20, 32), (30, 16, 20), trans_x=False, trans_y=True) - verify_batch_matmul((1, 32, 16), (1, 16, 32), (1, 16, 16), trans_x=True, trans_y=True) - verify_batch_matmul((5, 16, 32), (5, 32, 16), (5, 16, 16), trans_x=False, trans_y=False) - verify_batch_matmul((5, 32, 16), (5, 32, 20), (5, 16, 20), trans_x=True, trans_y=False) + verify_batch_matmul( + executor_kind, (1, 16, 32), (1, 16, 32), (1, 16, 16), trans_x=False, trans_y=True + ) + verify_batch_matmul( + executor_kind, (5, 16, 32), (5, 16, 32), (5, 16, 16), trans_x=False, trans_y=True + ) + verify_batch_matmul( + executor_kind, (5, 16, 32), (5, 20, 32), (5, 16, 20), trans_x=False, trans_y=True + ) + verify_batch_matmul( + executor_kind, (30, 16, 32), (30, 20, 32), (30, 16, 20), trans_x=False, trans_y=True + ) + verify_batch_matmul( + executor_kind, (1, 32, 16), (1, 16, 32), (1, 16, 16), trans_x=True, trans_y=True + ) + verify_batch_matmul( + executor_kind, (5, 16, 32), (5, 32, 16), (5, 16, 16), trans_x=False, trans_y=False + ) + verify_batch_matmul( + executor_kind, (5, 32, 16), (5, 32, 20), (5, 16, 20), trans_x=True, trans_y=False + ) x_np = np.random.randn(10, 27, 64).astype("float32") x = relay.var("x", shape=x_np.shape) - verify_batch_matmul_with_inputs(x, x, x_np, x_np, (10, 27, 27)) + verify_batch_matmul_with_inputs(executor_kind, x, x, x_np, x_np, (10, 27, 27)) @pytest.mark.skip("Requires cascadelake") @@ -492,13 +523,13 @@ def test_shape_of(): for target, dev in tvm.testing.enabled_targets(): # Because using graph executor, this op will be optimized after # constant folding pass, here we only test with interpreter - for kind in ["debug"]: + for kind in ["vm"]: op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x_data) tvm.testing.assert_allclose(op_res.numpy(), np.array(shape).astype("int32")) @tvm.testing.uses_gpu -def test_ndarray_size(): +def test_ndarray_size(executor_kind): def verify_ndarray_size(shape): x = relay.var("x", shape=shape) func = relay.Function([x], relay.op.ndarray_size(x)) @@ -507,11 +538,10 @@ def verify_ndarray_size(shape): x_data = np.random.uniform(size=shape).astype("float32") ref_res = np.size(x_data) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - x_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res) verify_ndarray_size((2, 3, 5)) verify_ndarray_size((2, 3, 5, 7)) @@ -573,7 +603,7 @@ def test_adaptive_pool(): @tvm.testing.uses_gpu -def test_sequence_mask(): +def test_sequence_mask(executor_kind): def _verify(data_shape, mask_value, axis, dtype, itype): max_length = data_shape[axis] nbatch = data_shape[1 - axis] @@ -588,11 +618,10 @@ def _verify(data_shape, mask_value, axis, dtype, itype): gt_out_np = tvm.topi.testing.sequence_mask(data_np, valid_length_np, mask_value, axis) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - out_relay = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - data_np, valid_length_np - ) - tvm.testing.assert_allclose(out_relay.numpy(), gt_out_np) + out_relay = relay.create_executor(executor_kind, device=dev, target=target).evaluate( + func + )(data_np, valid_length_np) + tvm.testing.assert_allclose(out_relay.numpy(), gt_out_np) _verify((5, 10), 0.0, 1, "float32", "int32") _verify((2, 3, 5, 3), 0.0, 0, "float32", "int64") @@ -600,7 +629,7 @@ def _verify(data_shape, mask_value, axis, dtype, itype): @tvm.testing.uses_gpu -def test_one_hot(): +def test_one_hot(executor_kind): def _get_oshape(indices_shape, depth, axis): oshape = [] true_axis = len(indices_shape) if axis == -1 else axis @@ -629,11 +658,10 @@ def _verify(indices_shape, depth, on_value, off_value, axis, dtype): out_np = tvm.topi.testing.one_hot(indices_np, on_value, off_value, depth, axis, dtype) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - out_relay = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - indices_np - ) - tvm.testing.assert_allclose(out_relay.numpy(), out_np) + out_relay = relay.create_executor(executor_kind, device=dev, target=target).evaluate( + func + )(indices_np) + tvm.testing.assert_allclose(out_relay.numpy(), out_np) _verify((3,), 3, 1, 0, -1, "int32") _verify((3,), 3, 1.0, 0.0, -1, "float32") @@ -644,7 +672,7 @@ def _verify(indices_shape, depth, on_value, off_value, axis, dtype): @tvm.testing.uses_gpu -def test_matrix_set_diag(): +def test_matrix_set_diag(executor_kind): def _verify(input_shape, diagonal_shape, dtype, k=0, align="RIGHT_LEFT"): input = relay.var("input", relay.TensorType(input_shape, dtype)) diagonal = relay.var("diagonal", relay.TensorType(diagonal_shape, dtype)) @@ -660,11 +688,10 @@ def _verify(input_shape, diagonal_shape, dtype, k=0, align="RIGHT_LEFT"): out_np = tvm.topi.testing.matrix_set_diag(input_np, diagonal_np, k, align) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - out_relay = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - input_np, diagonal_np - ) - tvm.testing.assert_allclose(out_relay.numpy(), out_np) + out_relay = relay.create_executor(executor_kind, device=dev, target=target).evaluate( + func + )(input_np, diagonal_np) + tvm.testing.assert_allclose(out_relay.numpy(), out_np) _verify((2, 2), (2,), "float32") _verify((4, 3, 3), (4, 3), "int32") @@ -675,7 +702,7 @@ def _verify(input_shape, diagonal_shape, dtype, k=0, align="RIGHT_LEFT"): @tvm.testing.parametrize_targets -def test_nll_loss(dev, target): +def test_nll_loss(executor_kind, dev, target): def _get_oshape(target_shape, reduction): if reduction == "none": return target_shape @@ -702,11 +729,10 @@ def _verify(prediction_shape, reduction="mean", ignore_index=-100, dtype="float3 predictions_np, targets_np, weights_np, reduction, ignore_index ) - for kind in ["graph", "debug"]: - out_relay = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - predictions_np, targets_np, weights_np - ) - tvm.testing.assert_allclose(out_relay.numpy(), out_np, rtol=1e-6, atol=1e-6) + out_relay = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + predictions_np, targets_np, weights_np + ) + tvm.testing.assert_allclose(out_relay.numpy(), out_np, rtol=1e-6, atol=1e-6) _verify((10, 5)) _verify((10, 5, 2, 2)) diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py index c644890bbcbe..726ee578da85 100644 --- a/tests/python/relay/test_op_level2.py +++ b/tests/python/relay/test_op_level2.py @@ -30,6 +30,8 @@ from tvm.relay.testing import run_infer_type from tvm.topi.cuda.conv3d_winograd import _infer_tile_size +executor_kind = tvm.testing.parameter("graph", "vm") + @tvm.testing.uses_gpu def test_conv1d_infer_type(): @@ -1301,7 +1303,7 @@ def test_avg_pool2d_no_count_pad(): @tvm.testing.uses_gpu -def test_flatten_infer_type(): +def test_flatten_infer_type(executor_kind): d1, d2, d3, d4 = te.size_var("d1"), te.size_var("d2"), te.size_var("d3"), te.size_var("d4") x = relay.var("x", relay.TensorType((d1, d2, d3, d4), "float32")) y = relay.nn.batch_flatten(x) @@ -1330,10 +1332,10 @@ def test_flatten_infer_type(): ref_res = x_data.flatten().reshape(o_shape) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) @tvm.testing.uses_gpu @@ -1438,7 +1440,7 @@ def _test_run(dtype): @tvm.testing.uses_gpu @pytest.mark.parametrize("dtype", ["float32", "float16"]) -def test_lrn(dtype): +def test_lrn(executor_kind, dtype): n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", shape=(n, c, h, w), dtype=dtype) y = relay.nn.lrn(x, size=10, axis=2, bias=0.5, alpha=0.00001, beta=0.75) @@ -1461,14 +1463,14 @@ def test_lrn(dtype): ref_res = tvm.topi.testing.lrn_python(x_data, size, axis, bias, alpha, beta) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) @tvm.testing.uses_gpu -def test_l2_normalize(): +def test_l2_normalize(executor_kind): n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", shape=(n, c, h, w)) y = relay.nn.l2_normalize(x, eps=0.001, axis=[1]) @@ -1489,10 +1491,10 @@ def test_l2_normalize(): ref_res = tvm.topi.testing.l2_normalize_python(x_data, eps, axis) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) def batch_flatten(data): diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index ef4b45ade9aa..9d27839c4703 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -30,7 +30,7 @@ from utils import ref_funcs -executor_kind = tvm.testing.parameter("graph", "debug") +executor_kind = tvm.testing.parameter("graph", "vm") class TestZerosOnes: @@ -644,7 +644,7 @@ def test_full_like_infer_type(): assert yy.checked_type == relay.TensorType((n, c, h, w), "float32") -def test_infer_type_leaky_relu(target, dev): +def test_infer_type_leaky_relu(target, dev, executor_kind): n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = relay.nn.leaky_relu(x, alpha=0.1) @@ -663,10 +663,8 @@ def test_infer_type_leaky_relu(target, dev): x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype) ref_res = np.where(x_data > 0, x_data, x_data * 0.1) - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(x_data) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) class TestInferTypePrelu: @@ -684,7 +682,7 @@ class TestInferTypePrelu: ((1, 2, 2, 3), None, 3, (1, 2, 2, 3)), ) - def test_infer_type_prelu(self, target, dev, data, alpha, axis, output, dtype): + def test_infer_type_prelu(self, target, dev, executor_kind, data, alpha, axis, output, dtype): x = relay.var("data", relay.TensorType(data, dtype)) if alpha: y = relay.var("alpha", relay.TensorType(alpha, dtype)) @@ -712,14 +710,10 @@ def test_infer_type_prelu(self, target, dev, data, alpha, axis, output, dtype): else: ref_res = (x_data < 0) * (x_data * a_data.reshape(1, 1, 3)) + (x_data >= 0) * x_data - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( - x_data, a_data - ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)( + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( x_data, a_data ) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) class TestArange: @@ -1051,7 +1045,7 @@ class TestDynamicScatter: ((16, 16, 4, 5), (16, 16, 4, 5), 3), ) - @pytest.mark.parametrize("executor_kind", ["vm", "debug"]) + @pytest.mark.parametrize("executor_kind", ["vm"]) def test_dynamic_scatter(self, target, dev, executor_kind, dshape, ishape, axis): d = relay.var("d", relay.TensorType([relay.Any() for i in range(len(dshape))], "float32")) i = relay.var("i", relay.TensorType([relay.Any() for i in range(len(ishape))], "int64")) @@ -2033,31 +2027,30 @@ def verify_unique(n, dtype, is_dyn=False, is_sorted=False, return_counts=False): x_data = np.random.randint(50, size=n).astype(dtype) if is_dyn: - backends = ["vm", "debug"] + backend = "vm" else: - backends = ["graph", "debug"] - - for kind in backends: - mod = tvm.ir.IRModule.from_expr(func) - tvm_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()( - x_data - ) # unique, indices, inverse_indices, num_unique, (counts) - np_res = calc_numpy_unique( - x_data, is_sorted - ) # unique, indices, inverse_indices, num_unique, counts - num_unique = np_res[3][0] - - # num_unique - assert num_unique == tvm_res[3].numpy()[0] - # unique - tvm.testing.assert_allclose(tvm_res[0].numpy()[:num_unique], np_res[0], rtol=1e-5) - # indices - tvm.testing.assert_allclose(tvm_res[1].numpy()[:num_unique], np_res[1], rtol=1e-5) - # inverse_indices - tvm.testing.assert_allclose(tvm_res[2].numpy(), np_res[2], rtol=1e-5) - # counts - if return_counts: - tvm.testing.assert_allclose(tvm_res[4].numpy()[:num_unique], np_res[4], rtol=1e-5) + backend = "graph" + + mod = tvm.ir.IRModule.from_expr(func) + tvm_res = relay.create_executor(backend, mod=mod, device=dev, target=target).evaluate()( + x_data + ) # unique, indices, inverse_indices, num_unique, (counts) + np_res = calc_numpy_unique( + x_data, is_sorted + ) # unique, indices, inverse_indices, num_unique, counts + num_unique = np_res[3][0] + + # num_unique + assert num_unique == tvm_res[3].numpy()[0] + # unique + tvm.testing.assert_allclose(tvm_res[0].numpy()[:num_unique], np_res[0], rtol=1e-5) + # indices + tvm.testing.assert_allclose(tvm_res[1].numpy()[:num_unique], np_res[1], rtol=1e-5) + # inverse_indices + tvm.testing.assert_allclose(tvm_res[2].numpy(), np_res[2], rtol=1e-5) + # counts + if return_counts: + tvm.testing.assert_allclose(tvm_res[4].numpy()[:num_unique], np_res[4], rtol=1e-5) for dtype in ["int32", "int64"]: for i in range(8): diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index b9bbef951555..e46832d570e9 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -26,7 +26,7 @@ from tvm.relay import transform from tvm.relay.testing import run_infer_type -executor_kind = tvm.testing.parameter("graph", "debug") +executor_kind = tvm.testing.parameter("graph", "vm") @tvm.testing.uses_gpu @@ -153,14 +153,13 @@ def test_binary_int_broadcast_2(): @tvm.testing.uses_gpu -def test_where(): +def test_where(executor_kind): def run(func, inputs, ref_res): for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - *inputs - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + *inputs + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) def verify(x_np, y_np, cond_np): ref_res = np.where(cond_np, x_np, y_np) @@ -398,7 +397,7 @@ def get_test_case(shape, gt_func, test_argmin=False): assert op_res.numpy().item() == ans -def verify_mean_var_std(funcs, shape, axis, keepdims): +def verify_mean_var_std(executor_kind, funcs, shape, axis, keepdims): test_func = funcs[0] ref_func = funcs[1] dtype = "float32" @@ -411,27 +410,26 @@ def verify_mean_var_std(funcs, shape, axis, keepdims): ref_res = ref_func(x_data, axis=axis, dtype=dtype, keepdims=keepdims) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res1[0].numpy(), ref_mean, rtol=1e-5) - tvm.testing.assert_allclose(op_res1[1].numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res2[0].numpy(), ref_mean, rtol=1e-5) - tvm.testing.assert_allclose(op_res2[1].numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res[0].numpy(), ref_mean, rtol=1e-5) + tvm.testing.assert_allclose(op_res[1].numpy(), ref_res, rtol=1e-5) @tvm.testing.uses_gpu -def test_mean_var_std(): +def test_mean_var_std(executor_kind): for func in [[relay.mean_variance, np.var], [relay.mean_std, np.std]]: - verify_mean_var_std(func, (2, 3, 4), 1, True) - verify_mean_var_std(func, (2, 3, 4), (1,), True) - verify_mean_var_std(func, (2, 3, 4), -1, True) - verify_mean_var_std(func, (2, 3, 4), (0, 1, 2), False) - verify_mean_var_std(func, (4, 4, 3), None, False) - verify_mean_var_std(func, (4, 4, 3), (0, 2), False) - verify_mean_var_std(func, (128, 24, 128), (0, 1), False) - verify_mean_var_std(func, (128, 24, 128), (0, 2), False) - verify_mean_var_std(func, (128, 24, 128), (0, 1), True) - verify_mean_var_std(func, (128, 24, 128), (0, 2), True) + verify_mean_var_std(executor_kind, func, (2, 3, 4), 1, True) + verify_mean_var_std(executor_kind, func, (2, 3, 4), (1,), True) + verify_mean_var_std(executor_kind, func, (2, 3, 4), -1, True) + verify_mean_var_std(executor_kind, func, (2, 3, 4), (0, 1, 2), False) + verify_mean_var_std(executor_kind, func, (4, 4, 3), None, False) + verify_mean_var_std(executor_kind, func, (4, 4, 3), (0, 2), False) + verify_mean_var_std(executor_kind, func, (128, 24, 128), (0, 1), False) + verify_mean_var_std(executor_kind, func, (128, 24, 128), (0, 2), False) + verify_mean_var_std(executor_kind, func, (128, 24, 128), (0, 1), True) + verify_mean_var_std(executor_kind, func, (128, 24, 128), (0, 2), True) @tvm.testing.uses_gpu diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index 10cd91415724..af9c08409c01 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -29,7 +29,7 @@ from tvm import relay, te from tvm.relay.testing import run_infer_type -executor_kind = tvm.testing.parameter("graph", "debug") +executor_kind = tvm.testing.parameter("graph", "vm") def test_resize1d_infer_type(): @@ -279,7 +279,7 @@ def test_crop_and_resize(self, target, dev, executor_kind, layout, interpolate_m @tvm.testing.uses_gpu -def test_multibox_prior(): +def test_multibox_prior(executor_kind): def get_ref_result( dshape, sizes=(1.0,), ratios=(1.0,), steps=(-1.0, -1.0), offsets=(0.5, 0.5), clip=True ): @@ -358,10 +358,10 @@ def verify_multibox_prior( func = relay.Function([x], z) func = run_infer_type(func) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(data) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(data) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) sizes = (0.3, 1.5, 0.7) ratios = (1.3, 2.4) @@ -415,7 +415,7 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): func = relay.Function([x], z.astuple()) func = run_infer_type(func) for target, dev in tvm.testing.enabled_targets(): - out = relay.create_executor("debug", device=dev, target=target).evaluate(func)(np_data) + out = relay.create_executor("vm", device=dev, target=target).evaluate(func)(np_data) tvm.testing.assert_allclose(out[0].numpy(), np_out1, rtol=1e-3, atol=1e-04) tvm.testing.assert_allclose(out[1].numpy(), np_out2, rtol=1e-3, atol=1e-04) @@ -428,7 +428,7 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): @tvm.testing.uses_gpu -def test_non_max_suppression(): +def test_non_max_suppression(executor_kind): def verify_nms( x0_data, x1_data, @@ -486,22 +486,14 @@ def verify_nms( func_indices = relay.Function([x0, x1, x2, x3], z_indices) func_indices = run_infer_type(func_indices) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( - x0_data, x1_data, x2_data, x3_data - ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)( + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( x0_data, x1_data, x2_data, x3_data ) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5) - op_indices_res1 = relay.create_executor("graph", device=dev, target=target).evaluate( - func_indices - )(x0_data, x1_data, x2_data, x3_data) - tvm.testing.assert_allclose(op_indices_res1[0].numpy(), ref_indices_res, rtol=1e-5) - op_indices_res2 = relay.create_executor("debug", device=dev, target=target).evaluate( - func_indices - )(x0_data, x1_data, x2_data, x3_data) - tvm.testing.assert_allclose(op_indices_res2[0].numpy(), ref_indices_res, rtol=1e-5) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + op_indices_res = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(func_indices)(x0_data, x1_data, x2_data, x3_data) + tvm.testing.assert_allclose(op_indices_res[0].numpy(), ref_indices_res, rtol=1e-5) np_data = np.array( [ @@ -633,7 +625,7 @@ def verify_nms( @tvm.testing.uses_gpu -def test_multibox_transform_loc(): +def test_multibox_transform_loc(executor_kind): def test_default_value(): num_anchors = 3 num_classes = 3 @@ -683,14 +675,10 @@ def test_default_value(): func = relay.Function([cls_prob, loc_pred, anchors], nms) func = run_infer_type(func) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( - np_cls_prob, np_loc_preds, np_anchors - ) - tvm.testing.assert_allclose(op_res1.numpy(), expected_np_out, rtol=1e-5) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)( + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( np_cls_prob, np_loc_preds, np_anchors ) - tvm.testing.assert_allclose(op_res2.numpy(), expected_np_out, rtol=1e-5) + tvm.testing.assert_allclose(op_res.numpy(), expected_np_out, rtol=1e-5) def test_threshold(): num_anchors = 5 @@ -727,7 +715,7 @@ def test_threshold(): @tvm.testing.uses_gpu -def test_roi_align(): +def test_roi_align(executor_kind): def verify_roi_align( data_shape, rois_shape, @@ -778,14 +766,10 @@ def verify_roi_align( mode=mode, ) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( np_data, np_rois ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, atol=1e-6, rtol=1e-3) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)( - np_data, np_rois - ) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, atol=1e-6, rtol=1e-3) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, atol=1e-6, rtol=1e-3) def verify_roi_align_nchw( data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio, mode @@ -848,7 +832,7 @@ def verify_roi_align_nhwc( @tvm.testing.uses_gpu -def test_roi_pool(): +def test_roi_pool(executor_kind): def verify_roi_pool(data_shape, rois_shape, pooled_size, spatial_scale): data = relay.var("data", relay.ty.TensorType(data_shape, "float32")) rois = relay.var("rois", relay.ty.TensorType(rois_shape, "float32")) @@ -875,21 +859,17 @@ def verify_roi_pool(data_shape, rois_shape, pooled_size, spatial_scale): np_data, np_rois, pooled_size=pooled_size, spatial_scale=spatial_scale ) for target, dev in tvm.testing.enabled_targets(): - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( - np_data, np_rois - ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-4) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)( + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( np_data, np_rois ) - tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-4) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4) verify_roi_pool((1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0) verify_roi_pool((4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5) @tvm.testing.uses_gpu -def test_proposal(): +def test_proposal(executor_kind): def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs): cls_prob = relay.var("cls_prob", relay.ty.TensorType(np_cls_prob.shape, "float32")) bbox_pred = relay.var("bbox_pred", relay.ty.TensorType(np_bbox_pred.shape, "float32")) @@ -905,14 +885,10 @@ def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs): print("Skip test because %s is not enabled." % target) continue dev = tvm.device(target, 0) - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( np_cls_prob, np_bbox_pred, np_im_info ) - tvm.testing.assert_allclose(op_res1.numpy(), np_out, rtol=1e-4) - op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)( - np_cls_prob, np_bbox_pred, np_im_info - ) - tvm.testing.assert_allclose(op_res2.numpy(), np_out, rtol=1e-4) + tvm.testing.assert_allclose(op_res.numpy(), np_out, rtol=1e-4) attrs = { "scales": (0.5,), @@ -986,7 +962,7 @@ def verify_yolo_reorg(shape, stride, out_shape): @tvm.testing.uses_gpu -def test_yolo_reorg(): +def test_yolo_reorg(executor_kind): def verify_yolo_reorg(shape, stride): x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32") ref_res = tvm.topi.testing.reorg_python(x_data, stride) @@ -1000,11 +976,10 @@ def verify_yolo_reorg(shape, stride): func = relay.Function([x], z) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - x_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) verify_yolo_reorg((1, 100, 20, 20), 10) verify_yolo_reorg((1, 4, 6, 6), 2) @@ -1155,7 +1130,7 @@ def test_run( @tvm.testing.uses_gpu -def test_depth_to_space(): +def test_depth_to_space(executor_kind): def verify_depth_to_space(dshape, block_size, layout, mode): if layout == "NHWC": out_shape = [ @@ -1188,11 +1163,10 @@ def verify_depth_to_space(dshape, block_size, layout, mode): func = relay.Function([x], z) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - x_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4) for layout in ["NHWC", "NCHW"]: for mode in ["DCR", "CDR"]: @@ -1200,7 +1174,7 @@ def verify_depth_to_space(dshape, block_size, layout, mode): @tvm.testing.uses_gpu -def test_space_to_depth(): +def test_space_to_depth(executor_kind): def verify_space_to_depth(dshape, block_size, layout): if layout == "NHWC": out_shape = [ @@ -1233,11 +1207,10 @@ def verify_space_to_depth(dshape, block_size, layout): func = relay.Function([x], z) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - x_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4) for layout in ["NHWC", "NCHW"]: verify_space_to_depth((1, 4, 4, 4), 2, layout) @@ -1369,7 +1342,7 @@ def test_dilation2d( @tvm.testing.uses_gpu -def test_affine_grid(): +def test_affine_grid(executor_kind): def verify_affine_grid(num_batch, target_shape): dtype = "float32" data_shape = (num_batch, 2, 3) @@ -1385,18 +1358,17 @@ def verify_affine_grid(num_batch, target_shape): ref_res = tvm.topi.testing.affine_grid_python(data_np, target_shape) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res1 = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - data_np - ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5, atol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + data_np + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5, atol=1e-5) verify_affine_grid(1, (16, 32)) verify_affine_grid(4, (16, 32)) @tvm.testing.uses_gpu -def test_grid_sample(): +def test_grid_sample(executor_kind): def verify_grid_sample( data_shape, grid_shape, method="bilinear", padding_mode="zeros", align_corners=True ): @@ -1436,11 +1408,10 @@ def verify_grid_sample( ) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res1 = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - data_np, grid_np - ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5, atol=1e-5) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + data_np, grid_np + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5, atol=1e-5) methods = ["nearest", "bilinear", "bicubic"] padding_modes = ["zeros", "border", "reflection"] @@ -1462,7 +1433,7 @@ def verify_grid_sample( @tvm.testing.uses_gpu -def test_space_to_batch_nd(): +def test_space_to_batch_nd(executor_kind): def verify_space_to_batch_nd(dshape, block_shape, paddings): x_data = np.random.uniform(size=dshape).astype("float32") pad_before, pad_after = map(list, zip(*paddings)) @@ -1479,18 +1450,17 @@ def verify_space_to_batch_nd(dshape, block_shape, paddings): func = relay.Function([x], z) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - x_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4) verify_space_to_batch_nd([3, 3, 2, 1], [3], [[0, 0]]) verify_space_to_batch_nd([2, 2, 4, 1], [2, 2], [[0, 0], [2, 0]]) @tvm.testing.uses_gpu -def test_batch_to_space_nd(): +def test_batch_to_space_nd(executor_kind): def verify_batch_to_space_nd(dshape, block_shape, crops): x_data = np.random.uniform(size=dshape).astype("float32") crop_begin_list, crop_end_list = map(list, zip(*crops)) @@ -1507,18 +1477,17 @@ def verify_batch_to_space_nd(dshape, block_shape, crops): func = relay.Function([x], z) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - x_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4) verify_batch_to_space_nd([4, 1, 1, 3], [2, 2], [[0, 0], [0, 0]]) verify_batch_to_space_nd([8, 1, 3, 1], [2, 2], [[0, 0], [2, 0]]) @tvm.testing.uses_gpu -def test_all_class_non_max_suppression(): +def test_all_class_non_max_suppression(executor_kind): def verify_all_class_non_max_suppression( boxes_np, scores_np, @@ -1542,12 +1511,11 @@ def verify_all_class_non_max_suppression( func = run_infer_type(func) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - selected_indices, num_detections = relay.create_executor( - kind, device=dev, target=target - ).evaluate(func)(boxes_np, scores_np) - tvm_res = selected_indices.numpy()[: num_detections.numpy()[0]] - np.testing.assert_equal(tvm_res, expected_indices) + selected_indices, num_detections = relay.create_executor( + executor_kind, device=dev, target=target + ).evaluate(func)(boxes_np, scores_np) + tvm_res = selected_indices.numpy()[: num_detections.numpy()[0]] + np.testing.assert_equal(tvm_res, expected_indices) boxes = np.array( [ diff --git a/tests/python/relay/test_op_level6.py b/tests/python/relay/test_op_level6.py index 48c58dc2dc33..78db5b87385d 100644 --- a/tests/python/relay/test_op_level6.py +++ b/tests/python/relay/test_op_level6.py @@ -23,6 +23,8 @@ from tvm.topi.testing import searchsorted_ref import tvm.testing +executor_kind = tvm.testing.parameter("graph", "vm") + @tvm.testing.uses_gpu def test_sort(): @@ -40,16 +42,15 @@ def verify_sort(shape, axis, is_ascend, is_dyn=False, in_dtype="float32"): ref_res = -np.sort(-x_data, axis=axis) if is_dyn: - backends = ["vm", "debug"] + backend = "vm" else: - backends = ["graph", "debug"] + backend = "graph" for target, dev in tvm.testing.enabled_targets(): - for kind in backends: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()( - x_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor(backend, mod=mod, device=dev, target=target).evaluate()( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) for is_dyn in [False, True]: verify_sort((2, 3, 4), axis=0, is_ascend=False, is_dyn=is_dyn) @@ -76,16 +77,15 @@ def verify_argsort(shape, axis, is_ascend, dtype, is_dyn=False, in_dtype="float3 ref_res = np.argsort(-x_data, axis=axis, kind="stable") if is_dyn: - backends = ["vm", "debug"] + backend = "vm" else: - backends = ["graph", "debug"] + backend = "graph" for target, dev in tvm.testing.enabled_targets(): - for kind in backends: - mod = tvm.ir.IRModule.from_expr(func) - op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()( - x_data - ) - tvm.testing.assert_allclose(op_res.numpy(), ref_res.astype(dtype), rtol=1e-5) + mod = tvm.ir.IRModule.from_expr(func) + op_res = relay.create_executor(backend, mod=mod, device=dev, target=target).evaluate()( + x_data + ) + tvm.testing.assert_allclose(op_res.numpy(), ref_res.astype(dtype), rtol=1e-5) for is_dyn in [False, True]: for dtype in ["int32", "int64", "float32", "float64"]: @@ -102,7 +102,7 @@ def verify_argsort(shape, axis, is_ascend, dtype, is_dyn=False, in_dtype="float3 @tvm.testing.uses_gpu -def test_topk(): +def test_topk(executor_kind): def verify_topk(k, axis, ret_type, is_ascend, dtype, in_dtype="float32"): shape = (20, 100) x = relay.var("x", relay.TensorType(shape, in_dtype)) @@ -129,17 +129,16 @@ def verify_topk(k, axis, ret_type, is_ascend, dtype, in_dtype="float32"): np_indices = np_indices.astype(dtype) for target, dev in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)( - np_data - ) - if ret_type == "both": - tvm.testing.assert_allclose(op_res[0].numpy(), np_values) - tvm.testing.assert_allclose(op_res[1].numpy(), np_indices) - elif ret_type == "values": - tvm.testing.assert_allclose(op_res.numpy(), np_values) - else: - tvm.testing.assert_allclose(op_res.numpy(), np_indices) + op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)( + np_data + ) + if ret_type == "both": + tvm.testing.assert_allclose(op_res[0].numpy(), np_values) + tvm.testing.assert_allclose(op_res[1].numpy(), np_indices) + elif ret_type == "values": + tvm.testing.assert_allclose(op_res.numpy(), np_values) + else: + tvm.testing.assert_allclose(op_res.numpy(), np_indices) np.random.seed(0) for k in [0, 1, 5]: