From 00448aa17456b1365579efcbb4566b12f179a421 Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Thu, 16 Sep 2021 17:29:50 +0300 Subject: [PATCH 1/4] [Auto-Schedule][Fix] Fix hang while tune model through rpc --- python/tvm/auto_scheduler/measure.py | 48 +++++++--------------------- 1 file changed, 11 insertions(+), 37 deletions(-) diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py index c58aeea57d14..db5e8a7f62d1 100644 --- a/python/tvm/auto_scheduler/measure.py +++ b/python/tvm/auto_scheduler/measure.py @@ -818,7 +818,7 @@ def prepare_input_map(args): return tensor_input_map -def prepare_runner_args(inp, build_res): +def prepare_runner_args(inp, build_res, dev): """This function prepares the pre-defined arguments in `TASK_INPUT_BUFFER_TABLE` for local/rpc runner in main process @@ -840,6 +840,9 @@ def prepare_runner_args(inp, build_res): # pylint: disable=import-outside-toplevel from .search_task import get_task_input_buffer # lazily import to avoid recursive dependency + random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True) + assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" + task_input_names = inp.task.task_input_names tensor_input_map = prepare_input_map(build_res.args) if not task_input_names: @@ -852,7 +855,7 @@ def prepare_runner_args(inp, build_res): if tensor_name in task_input_names: task_input_buffer = get_task_input_buffer(inp.task.workload_key, tensor_name) # convert tvm.NDArray to picklable numpy.ndarray - args.append(task_input_buffer.numpy()) + args.append(ndarray.array(task_input_buffer), dev) task_inputs_count += 1 else: raise ValueError( @@ -860,7 +863,9 @@ def prepare_runner_args(inp, build_res): + "should provide with `SearchTask(..., task_inputs={...})`" ) else: - args.append(None) + empty_array = ndarray.empty(get_const_tuple(arg.shape), arg.dtype, dev) + random_fill(empty_array) + args.append(empty_array) if task_inputs_count != len(task_input_names): raise RuntimeError("task_inputs not fully matched, check if there's any unexpected error") return args @@ -869,7 +874,6 @@ def prepare_runner_args(inp, build_res): def _timed_eval_func( inp_serialized, build_res, - args, number, repeat, min_repeat_ms, @@ -906,20 +910,8 @@ def _timed_eval_func( if error_no == 0: try: - random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True) - assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" + args = prepare_runner_args(inp, build_res, dev) assert len(args) == len(build_res.args) - # pylint: disable=consider-using-enumerate - for idx in range(len(args)): - if args[idx] is None: - build_res_arg = build_res.args[idx] - empty_array = ndarray.empty( - get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev - ) - random_fill(empty_array) - args[idx] = empty_array - else: - args[idx] = ndarray.array(args[idx], dev) dev.sync() costs = time_f(*args).results # pylint: disable=broad-except @@ -1010,7 +1002,6 @@ def local_run( time.time(), ) else: - args = prepare_runner_args(inp, build_res) res = call_func_with_timeout( worker, timeout, @@ -1018,7 +1009,6 @@ def local_run( args=( inp.serialize(), build_res, - args, number, repeat, min_repeat_ms, @@ -1059,7 +1049,6 @@ def local_run( def _rpc_run( inp_serialized, build_res, - args, key, host, port, @@ -1106,23 +1095,9 @@ def _rpc_run( try: stream = dev.create_raw_stream() dev.set_raw_stream(stream) - random_fill = remote.get_function("tvm.contrib.random.random_fill") - assert ( - random_fill - ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices" + args = prepare_runner_args(inp, build_res, dev) assert len(args) == len(build_res.args) - # pylint: disable=consider-using-enumerate - for idx in range(len(args)): - if args[idx] is None: - build_res_arg = build_res.args[idx] - empty_array = ndarray.empty( - get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev - ) - random_fill(empty_array) - args[idx] = empty_array - else: - args[idx] = ndarray.array(args[idx], dev) dev.sync() # First run for check that the kernel is correct @@ -1169,7 +1144,7 @@ def _rpc_run_worker(args): res : MeasureResult The measure result of this Runner thread. """ - _, build_res, _, _, _, _, _, timeout, _, _, _, _, _, verbose = args + _, build_res, _, _, _, _, timeout, _, _, _, _, _, verbose = args if build_res.error_no != MeasureErrorNo.NO_ERROR: return ( (MAX_FLOAT,), @@ -1275,7 +1250,6 @@ def rpc_runner_run( ( inp.serialize(), build_res, - prepare_runner_args(inp, build_res), key, host, port, From 23f9bc9551e4c8ebcd2be0c27192c64f353491b7 Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Fri, 17 Sep 2021 09:49:00 +0300 Subject: [PATCH 2/4] Fix problem with hang by using deep copy --- python/tvm/auto_scheduler/measure.py | 57 +++++++++++++++++++++------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py index db5e8a7f62d1..10823e518ba7 100644 --- a/python/tvm/auto_scheduler/measure.py +++ b/python/tvm/auto_scheduler/measure.py @@ -37,6 +37,7 @@ import tempfile import multiprocessing import logging +import copy import tvm._ffi from tvm.runtime import Object, module, ndarray @@ -818,7 +819,7 @@ def prepare_input_map(args): return tensor_input_map -def prepare_runner_args(inp, build_res, dev): +def prepare_runner_args(inp, build_res): """This function prepares the pre-defined arguments in `TASK_INPUT_BUFFER_TABLE` for local/rpc runner in main process @@ -840,9 +841,6 @@ def prepare_runner_args(inp, build_res, dev): # pylint: disable=import-outside-toplevel from .search_task import get_task_input_buffer # lazily import to avoid recursive dependency - random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True) - assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" - task_input_names = inp.task.task_input_names tensor_input_map = prepare_input_map(build_res.args) if not task_input_names: @@ -855,7 +853,7 @@ def prepare_runner_args(inp, build_res, dev): if tensor_name in task_input_names: task_input_buffer = get_task_input_buffer(inp.task.workload_key, tensor_name) # convert tvm.NDArray to picklable numpy.ndarray - args.append(ndarray.array(task_input_buffer), dev) + args.append(task_input_buffer.numpy()) task_inputs_count += 1 else: raise ValueError( @@ -863,9 +861,7 @@ def prepare_runner_args(inp, build_res, dev): + "should provide with `SearchTask(..., task_inputs={...})`" ) else: - empty_array = ndarray.empty(get_const_tuple(arg.shape), arg.dtype, dev) - random_fill(empty_array) - args.append(empty_array) + args.append(None) if task_inputs_count != len(task_input_names): raise RuntimeError("task_inputs not fully matched, check if there's any unexpected error") return args @@ -874,6 +870,7 @@ def prepare_runner_args(inp, build_res, dev): def _timed_eval_func( inp_serialized, build_res, + args, number, repeat, min_repeat_ms, @@ -910,10 +907,23 @@ def _timed_eval_func( if error_no == 0: try: - args = prepare_runner_args(inp, build_res, dev) + random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True) + assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" assert len(args) == len(build_res.args) + loc_args = copy.deepcopy(args) + # pylint: disable=consider-using-enumerate + for idx in range(len(loc_args)): + if loc_args[idx] is None: + build_res_arg = build_res.args[idx] + empty_array = ndarray.empty( + get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev + ) + random_fill(empty_array) + loc_args[idx] = empty_array + else: + loc_args[idx] = ndarray.array(loc_args[idx], dev) dev.sync() - costs = time_f(*args).results + costs = time_f(*loc_args).results # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT,) @@ -1002,6 +1012,7 @@ def local_run( time.time(), ) else: + args = prepare_runner_args(inp, build_res) res = call_func_with_timeout( worker, timeout, @@ -1009,6 +1020,7 @@ def local_run( args=( inp.serialize(), build_res, + args, number, repeat, min_repeat_ms, @@ -1049,6 +1061,7 @@ def local_run( def _rpc_run( inp_serialized, build_res, + args, key, host, port, @@ -1095,16 +1108,31 @@ def _rpc_run( try: stream = dev.create_raw_stream() dev.set_raw_stream(stream) + random_fill = remote.get_function("tvm.contrib.random.random_fill") + assert ( + random_fill + ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices" - args = prepare_runner_args(inp, build_res, dev) assert len(args) == len(build_res.args) + loc_args = copy.deepcopy(args) + # pylint: disable=consider-using-enumerate + for idx in range(len(loc_args)): + if loc_args[idx] is None: + build_res_arg = build_res.args[idx] + empty_array = ndarray.empty( + get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev + ) + random_fill(empty_array) + loc_args[idx] = empty_array + else: + loc_args[idx] = ndarray.array(loc_args[idx], dev) dev.sync() # First run for check that the kernel is correct - func.entry_func(*args) + func.entry_func(*loc_args) dev.sync() - costs = time_f(*args).results + costs = time_f(*loc_args).results # clean up remote files remote.remove(build_res.filename) @@ -1144,7 +1172,7 @@ def _rpc_run_worker(args): res : MeasureResult The measure result of this Runner thread. """ - _, build_res, _, _, _, _, timeout, _, _, _, _, _, verbose = args + _, build_res, _, _, _, _, _, timeout, _, _, _, _, _, verbose = args if build_res.error_no != MeasureErrorNo.NO_ERROR: return ( (MAX_FLOAT,), @@ -1250,6 +1278,7 @@ def rpc_runner_run( ( inp.serialize(), build_res, + prepare_runner_args(inp, build_res), key, host, port, From 00a2aa2ce6e3b9915b3e9dd3495fa66b65283a20 Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Mon, 20 Sep 2021 08:40:59 +0300 Subject: [PATCH 3/4] Fix with local args --- python/tvm/auto_scheduler/measure.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py index 10823e518ba7..42c3b2a60768 100644 --- a/python/tvm/auto_scheduler/measure.py +++ b/python/tvm/auto_scheduler/measure.py @@ -37,7 +37,6 @@ import tempfile import multiprocessing import logging -import copy import tvm._ffi from tvm.runtime import Object, module, ndarray @@ -910,18 +909,18 @@ def _timed_eval_func( random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True) assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" assert len(args) == len(build_res.args) - loc_args = copy.deepcopy(args) + loc_args = [] # pylint: disable=consider-using-enumerate - for idx in range(len(loc_args)): - if loc_args[idx] is None: + for idx in range(len(args)): + if args[idx] is None: build_res_arg = build_res.args[idx] empty_array = ndarray.empty( get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev ) random_fill(empty_array) - loc_args[idx] = empty_array + loc_args.append(empty_array) else: - loc_args[idx] = ndarray.array(loc_args[idx], dev) + loc_args.append(ndarray.array(arg)) dev.sync() costs = time_f(*loc_args).results # pylint: disable=broad-except @@ -1114,18 +1113,18 @@ def _rpc_run( ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices" assert len(args) == len(build_res.args) - loc_args = copy.deepcopy(args) + loc_args = [] # pylint: disable=consider-using-enumerate - for idx in range(len(loc_args)): - if loc_args[idx] is None: + for idx in range(len(args)): + if args[idx] is None: build_res_arg = build_res.args[idx] empty_array = ndarray.empty( get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev ) random_fill(empty_array) - loc_args[idx] = empty_array + loc_args.append(empty_array) else: - loc_args[idx] = ndarray.array(loc_args[idx], dev) + loc_args.append(ndarray.array(args[idx], dev)) dev.sync() # First run for check that the kernel is correct From c53badd502eadb696fab481b351784a498abb854 Mon Sep 17 00:00:00 2001 From: Wuwei Lin Date: Wed, 22 Sep 2021 13:13:40 -0400 Subject: [PATCH 4/4] Update python/tvm/auto_scheduler/measure.py --- python/tvm/auto_scheduler/measure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py index 42c3b2a60768..8c6fd5f1a949 100644 --- a/python/tvm/auto_scheduler/measure.py +++ b/python/tvm/auto_scheduler/measure.py @@ -920,7 +920,7 @@ def _timed_eval_func( random_fill(empty_array) loc_args.append(empty_array) else: - loc_args.append(ndarray.array(arg)) + loc_args.append(ndarray.array(args[idx], dev)) dev.sync() costs = time_f(*loc_args).results # pylint: disable=broad-except