diff --git a/python/tvm/auto_scheduler/testing/__init__.py b/python/tvm/auto_scheduler/testing/__init__.py
new file mode 100644
index 000000000000..2bbcf8317de3
--- /dev/null
+++ b/python/tvm/auto_scheduler/testing/__init__.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=unused-import, redefined-builtin
+"""Testing utilities in auto scheduler."""
+
+# NOTE: Do not import any module here by default
diff --git a/python/tvm/meta_schedule/testing/tune_onnx_auto_scheduler.py b/python/tvm/auto_scheduler/testing/tune_onnx.py
similarity index 93%
rename from python/tvm/meta_schedule/testing/tune_onnx_auto_scheduler.py
rename to python/tvm/auto_scheduler/testing/tune_onnx.py
index e916f5ace339..2e6b9e5924e6 100644
--- a/python/tvm/meta_schedule/testing/tune_onnx_auto_scheduler.py
+++ b/python/tvm/auto_scheduler/testing/tune_onnx.py
@@ -22,11 +22,11 @@
 import numpy as np  # type: ignore
 import onnx  # type: ignore
 import tvm
-from tvm.relay.frontend import from_onnx
 from tvm import auto_scheduler
 from tvm import meta_schedule as ms
 from tvm import relay
 from tvm.meta_schedule.testing.custom_builder_runner import run_module_via_rpc
+from tvm.relay.frontend import from_onnx
 
 
 def _parse_args():
@@ -82,6 +82,26 @@ def _parse_args():
         type=str,
         required=True,
     )
+    args.add_argument(
+        "--number",
+        type=int,
+        default=3,
+    )
+    args.add_argument(
+        "--repeat",
+        type=int,
+        default=1,
+    )
+    args.add_argument(
+        "--min-repeat-ms",
+        type=int,
+        default=100,
+    )
+    args.add_argument(
+        "--cpu-flush",
+        type=bool,
+        required=True,
+    )
     parsed = args.parse_args()
     parsed.target = tvm.target.Target(parsed.target)
     parsed.input_shape = json.loads(parsed.input_shape)
@@ -105,10 +125,10 @@ def main():
         host=ARGS.rpc_host,
         port=ARGS.rpc_port,
         n_parallel=ARGS.rpc_workers,
-        number=3,
-        repeat=1,
-        min_repeat_ms=100,  # TODO
-        enable_cpu_cache_flush=False,  # TODO
+        number=ARGS.number,
+        repeat=ARGS.repeat,
+        min_repeat_ms=ARGS.min_repeat_ms,
+        enable_cpu_cache_flush=ARGS.cpu_flush,
     )
 
     if ARGS.target.kind.name == "llvm":
diff --git a/python/tvm/meta_schedule/testing/tune_relay_auto_scheduler.py b/python/tvm/auto_scheduler/testing/tune_relay.py
similarity index 93%
rename from python/tvm/meta_schedule/testing/tune_relay_auto_scheduler.py
rename to python/tvm/auto_scheduler/testing/tune_relay.py
index ff4f9313470c..48ed44ef19b7 100644
--- a/python/tvm/meta_schedule/testing/tune_relay_auto_scheduler.py
+++ b/python/tvm/auto_scheduler/testing/tune_relay.py
@@ -80,6 +80,26 @@ def _parse_args():
         type=str,
         default=None,
     )
+    args.add_argument(
+        "--number",
+        type=int,
+        default=3,
+    )
+    args.add_argument(
+        "--repeat",
+        type=int,
+        default=1,
+    )
+    args.add_argument(
+        "--min-repeat-ms",
+        type=int,
+        default=100,
+    )
+    args.add_argument(
+        "--cpu-flush",
+        type=bool,
+        required=True,
+    )
     parsed = args.parse_args()
     parsed.target = tvm.target.Target(parsed.target)
     parsed.input_shape = json.loads(parsed.input_shape)
@@ -103,10 +123,10 @@ def main():
         host=ARGS.rpc_host,
         port=ARGS.rpc_port,
         n_parallel=ARGS.rpc_workers,
-        number=3,
-        repeat=1,
-        min_repeat_ms=100,  # TODO
-        enable_cpu_cache_flush=False,  # TODO
+        number=ARGS.number,
+        repeat=ARGS.repeat,
+        min_repeat_ms=ARGS.min_repeat_ms,
+        enable_cpu_cache_flush=ARGS.cpu_flush,
     )
 
     if ARGS.target.kind.name == "llvm":
diff --git a/python/tvm/meta_schedule/testing/tune_te_auto_scheduler.py b/python/tvm/auto_scheduler/testing/tune_te.py
similarity index 85%
rename from python/tvm/meta_schedule/testing/tune_te_auto_scheduler.py
rename to python/tvm/auto_scheduler/testing/tune_te.py
index 00edb7d48d04..b02a6059e23d 100644
--- a/python/tvm/meta_schedule/testing/tune_te_auto_scheduler.py
+++ b/python/tvm/auto_scheduler/testing/tune_te.py
@@ -12,7 +12,7 @@
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitatios
+# specific language governing permissions and limitations
 # under the License.
 # pylint: disable=missing-docstring
 import argparse
@@ -61,10 +61,30 @@ def _parse_args():
         required=True,
     )
     args.add_argument(
-        "--log-dir",
+        "--work-dir",
         type=str,
         required=True,
     )
+    args.add_argument(
+        "--number",
+        type=int,
+        default=3,
+    )
+    args.add_argument(
+        "--repeat",
+        type=int,
+        default=1,
+    )
+    args.add_argument(
+        "--min-repeat-ms",
+        type=int,
+        default=100,
+    )
+    args.add_argument(
+        "--cpu-flush",
+        type=bool,
+        required=True,
+    )
     parsed = args.parse_args()
     parsed.target = tvm.target.Target(parsed.target)
     return parsed
@@ -74,7 +94,7 @@ def _parse_args():
 
 
 def main():
-    log_file = os.path.join(ARGS.log_dir, f"{ARGS.workload}.json")
+    log_file = os.path.join(ARGS.work_dir, f"{ARGS.workload}.json")
     workload_func, params = CONFIGS[ARGS.workload]
     params = params[0]  # type: ignore
     workload_func = auto_scheduler.register_workload(workload_func)
@@ -110,10 +130,10 @@ def main():
         host=ARGS.rpc_host,
         port=ARGS.rpc_port,
         n_parallel=ARGS.rpc_workers,
-        number=3,
-        repeat=1,
-        min_repeat_ms=100,
-        enable_cpu_cache_flush=False,
+        number=ARGS.number,
+        repeat=ARGS.repeat,
+        min_repeat_ms=ARGS.min_repeat_ms,
+        enable_cpu_cache_flush=ARGS.cpu_flush,
     )
 
     # Inspect the computational graph
diff --git a/python/tvm/meta_schedule/testing/__init__.py b/python/tvm/meta_schedule/testing/__init__.py
index 5d6081fa81e4..b742191e16bf 100644
--- a/python/tvm/meta_schedule/testing/__init__.py
+++ b/python/tvm/meta_schedule/testing/__init__.py
@@ -15,3 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 """Testing utilities in meta schedule"""
+
+# NOTE: Do not import any module here by default
diff --git a/python/tvm/meta_schedule/testing/tune_onnx_meta_schedule.py b/python/tvm/meta_schedule/testing/tune_onnx.py
similarity index 84%
rename from python/tvm/meta_schedule/testing/tune_onnx_meta_schedule.py
rename to python/tvm/meta_schedule/testing/tune_onnx.py
index f5c7d1cde80b..3a1b4cd5fe20 100644
--- a/python/tvm/meta_schedule/testing/tune_onnx_meta_schedule.py
+++ b/python/tvm/meta_schedule/testing/tune_onnx.py
@@ -18,12 +18,13 @@
 import argparse
 import json
 import logging
+
 import numpy as np  # type: ignore
 import onnx  # type: ignore
 import tvm
-from tvm.relay.frontend import from_onnx
 from tvm import meta_schedule as ms
 from tvm.meta_schedule.testing.custom_builder_runner import run_module_via_rpc
+from tvm.relay.frontend import from_onnx
 
 
 def _parse_args():
@@ -79,6 +80,26 @@ def _parse_args():
         type=str,
         required=True,
     )
+    args.add_argument(
+        "--number",
+        type=int,
+        default=3,
+    )
+    args.add_argument(
+        "--repeat",
+        type=int,
+        default=1,
+    )
+    args.add_argument(
+        "--min-repeat-ms",
+        type=int,
+        default=100,
+    )
+    args.add_argument(
+        "--cpu-flush",
+        type=bool,
+        required=True,
+    )
     parsed = args.parse_args()
     parsed.target = tvm.target.Target(parsed.target)
     parsed.input_shape = json.loads(parsed.input_shape)
@@ -108,31 +129,33 @@ def main():
         print(f"  input_dtype: {item['dtype']}")
         shape_dict[item["name"]] = item["shape"]
     mod, params = from_onnx(onnx_model, shape_dict, freeze_params=True)
-    alloc_repeat = 1
     runner = ms.runner.RPCRunner(
         rpc_config=ARGS.rpc_config,
         evaluator_config=ms.runner.EvaluatorConfig(
-            number=3,
-            repeat=1,
-            min_repeat_ms=100,
-            enable_cpu_cache_flush=False,
+            number=ARGS.number,
+            repeat=ARGS.repeat,
+            min_repeat_ms=ARGS.min_repeat_ms,
+            enable_cpu_cache_flush=ARGS.cpu_flush,
         ),
-        alloc_repeat=alloc_repeat,
+        alloc_repeat=1,
         max_workers=ARGS.rpc_workers,
     )
-    lib = ms.tune_relay(
-        mod=mod,
-        target=ARGS.target,
-        config=ms.TuneConfig(
-            strategy="evolutionary",
-            num_trials_per_iter=64,
-            max_trials_per_task=ARGS.num_trials,
-            max_trials_global=ARGS.num_trials,
-        ),
-        runner=runner,  # type: ignore
-        work_dir=ARGS.work_dir,
-        params=params,
-    )
+    with ms.Profiler() as profiler:
+        lib = ms.tune_relay(
+            mod=mod,
+            target=ARGS.target,
+            config=ms.TuneConfig(
+                strategy="evolutionary",
+                num_trials_per_iter=64,
+                max_trials_per_task=ARGS.num_trials,
+                max_trials_global=ARGS.num_trials,
+            ),
+            runner=runner,  # type: ignore
+            work_dir=ARGS.work_dir,
+            params=params,
+        )
+    print("Tuning Time:")
+    print(profiler.table())
     graph, rt_mod, params = lib.graph_json, lib.lib, lib.params
     input_data = {}
     for item in ARGS.input_shape:
diff --git a/python/tvm/meta_schedule/testing/tune_relay_meta_schedule.py b/python/tvm/meta_schedule/testing/tune_relay.py
similarity index 91%
rename from python/tvm/meta_schedule/testing/tune_relay_meta_schedule.py
rename to python/tvm/meta_schedule/testing/tune_relay.py
index ee26b6303da0..8663eb460c4a 100644
--- a/python/tvm/meta_schedule/testing/tune_relay_meta_schedule.py
+++ b/python/tvm/meta_schedule/testing/tune_relay.py
@@ -78,6 +78,26 @@ def _parse_args():
         type=str,
         default=None,
     )
+    args.add_argument(
+        "--number",
+        type=int,
+        default=3,
+    )
+    args.add_argument(
+        "--repeat",
+        type=int,
+        default=1,
+    )
+    args.add_argument(
+        "--min-repeat-ms",
+        type=int,
+        default=100,
+    )
+    args.add_argument(
+        "--cpu-flush",
+        type=bool,
+        required=True,
+    )
     parsed = args.parse_args()
     parsed.target = tvm.target.Target(parsed.target)
     parsed.input_shape = json.loads(parsed.input_shape)
@@ -110,16 +130,15 @@ def main():
         print(f"  input_name: {input_name}")
         print(f"  input_shape: {input_shape}")
         print(f"  input_dtype: {input_dtype}")
-    alloc_repeat = 1
     runner = ms.runner.RPCRunner(
         rpc_config=ARGS.rpc_config,
         evaluator_config=ms.runner.EvaluatorConfig(
-            number=3,
-            repeat=1,
-            min_repeat_ms=100,
-            enable_cpu_cache_flush=False,
+            number=ARGS.number,
+            repeat=ARGS.repeat,
+            min_repeat_ms=ARGS.min_repeat_ms,
+            enable_cpu_cache_flush=ARGS.cpu_flush,
         ),
-        alloc_repeat=alloc_repeat,
+        alloc_repeat=1,
         max_workers=ARGS.rpc_workers,
     )
     with ms.Profiler() as profiler:
diff --git a/python/tvm/meta_schedule/testing/tune_te_meta_schedule.py b/python/tvm/meta_schedule/testing/tune_te.py
similarity index 69%
rename from python/tvm/meta_schedule/testing/tune_te_meta_schedule.py
rename to python/tvm/meta_schedule/testing/tune_te.py
index b65761ba4fe5..b2649564bfa9 100644
--- a/python/tvm/meta_schedule/testing/tune_te_meta_schedule.py
+++ b/python/tvm/meta_schedule/testing/tune_te.py
@@ -68,6 +68,26 @@ def _parse_args():
         type=str,
         required=True,
     )
+    args.add_argument(
+        "--number",
+        type=int,
+        default=3,
+    )
+    args.add_argument(
+        "--repeat",
+        type=int,
+        default=1,
+    )
+    args.add_argument(
+        "--min-repeat-ms",
+        type=int,
+        default=100,
+    )
+    args.add_argument(
+        "--cpu-flush",
+        type=bool,
+        required=True,
+    )
     parsed = args.parse_args()
     parsed.target = tvm.target.Target(parsed.target)
     parsed.rpc_config = ms.runner.RPCConfig(
@@ -87,32 +107,34 @@ def _parse_args():
 
 
 def main():
-    alloc_repeat = 1
     runner = ms.runner.RPCRunner(
         rpc_config=ARGS.rpc_config,
         evaluator_config=ms.runner.EvaluatorConfig(
-            number=3,
-            repeat=1,
-            min_repeat_ms=100,
-            enable_cpu_cache_flush=False,
+            number=ARGS.number,
+            repeat=ARGS.repeat,
+            min_repeat_ms=ARGS.min_repeat_ms,
+            enable_cpu_cache_flush=ARGS.cpu_flush,
         ),
-        alloc_repeat=alloc_repeat,
+        alloc_repeat=1,
         max_workers=ARGS.rpc_workers,
     )
-    sch: Optional[tir.Schedule] = ms.tune_tir(
-        mod=create_te_workload(ARGS.workload, 0),
-        target=ARGS.target,
-        config=ms.TuneConfig(
-            strategy="evolutionary",
-            num_trials_per_iter=64,
-            max_trials_per_task=ARGS.num_trials,
-            max_trials_global=ARGS.num_trials,
-        ),
-        runner=runner,  # type: ignore
-        task_name=ARGS.workload,
-        work_dir=ARGS.work_dir,
-        num_threads=cpu_count(),
-    )
+    with ms.Profiler() as profiler:
+        sch: Optional[tir.Schedule] = ms.tune_tir(
+            mod=create_te_workload(ARGS.workload, 0),
+            target=ARGS.target,
+            config=ms.TuneConfig(
+                strategy="evolutionary",
+                num_trials_per_iter=64,
+                max_trials_per_task=ARGS.num_trials,
+                max_trials_global=ARGS.num_trials,
+            ),
+            runner=runner,  # type: ignore
+            task_name=ARGS.workload,
+            work_dir=ARGS.work_dir,
+            num_threads=cpu_count(),
+        )
+    print("Tuning Time:")
+    print(profiler.table())
     if sch is None:
         print("No valid schedule found!")
     else:
diff --git a/python/tvm/meta_schedule/tune.py b/python/tvm/meta_schedule/tune.py
index fd31760c1174..d3c09b41292c 100644
--- a/python/tvm/meta_schedule/tune.py
+++ b/python/tvm/meta_schedule/tune.py
@@ -430,15 +430,13 @@ def tune_tir(
         mutator_probs=mutator_probs,
         num_threads=num_threads,
     )
-    bests: List[TuningRecord] = database.get_top_k(
-        database.commit_workload(mod),
-        top_k=1,
-    )
-    if not bests:
-        return None
-    assert len(bests) == 1
-    sch = Schedule(mod)
-    bests[0].trace.apply_to_schedule(sch, remove_postproc=False)
+    with Profiler.timeit("ApplyHistoryBest"):
+        bests: List[TuningRecord] = database.get_top_k(database.commit_workload(mod), top_k=1)
+        if not bests:
+            return None
+        assert len(bests) == 1
+        sch = Schedule(mod)
+        bests[0].trace.apply_to_schedule(sch, remove_postproc=False)
     return sch
 
 
@@ -488,8 +486,10 @@ def tune_te(
     sch : Optional[Schedule]
         The tuned schedule.
     """
+    with Profiler.timeit("CreatePrimFunc"):
+        func = create_prim_func(tensors)
     return tune_tir(
-        mod=create_prim_func(tensors),
+        mod=func,
         target=target,
         config=config,
         work_dir=work_dir,