apache · jroesch · Oct 14, 2021 · Sep 10, 2021 · Sep 15, 2021 · Sep 15, 2021
diff --git a/include/tvm/driver/driver_api.h b/include/tvm/driver/driver_api.h
@@ -30,6 +30,7 @@
 #define TVM_DRIVER_DRIVER_API_H_
 
 #include <tvm/ir/module.h>
+#include <tvm/ir/transform.h>
 #include <tvm/runtime/packed_func.h>
 #include <tvm/support/with.h>
 #include <tvm/target/target.h>
@@ -43,6 +44,34 @@
 #include <vector>
 
 namespace tvm {
+using tvm::transform::Pass;
+
+/*!
+ * \brief Configures and returns the composite Pass for the fused module (pre split) that contains
+ * device and host code.
+ * \param mixed_mod The original mixed module.
+ * \param target The device Target.
+ * \return The composite Pass for the fused module.
+//  */
+TVM_DLL transform::Sequential MixedModulePassManager(IRModule mixed_mod, Target target);
+
+/*!
+ * \brief Configures and returns the composite Pass for the device Target after device/host from
+ * mixed module.
+ * \param mixed_mod The optimized mixed module.
+ * \param target The device Target.
+ * \return The composite Pass for the device module.
+ */
+TVM_DLL transform::Sequential DeviceModulePassManager(IRModule mixed_mod, Target target);
+
+/*!
+ * \brief Configures and returns the composite Pass for the host Target after device/host from mixed
+ * module.
+ * \param mixed_mod The optimized mixed module.
+ * \param target_host The host Target.
+ * \return The composite Pass for the host module.
+ */
+TVM_DLL transform::Sequential HostModulePassManager(IRModule mixed_mod, Target target_host);
 
 /*!
  * \brief Lower an IRModule (optimize with it with the pass list defined in CreatePassList)
@@ -136,6 +165,7 @@ TVM_DLL runtime::Module build(const Map<Target, IRModule>& input, const Target&
  * \return The built module that contains code for different processors.
  */
 TVM_DLL runtime::Module build(const Map<String, IRModule>& input, const Target& target_host);
+
 }  // namespace tvm
 
 #endif  // TVM_DRIVER_DRIVER_API_H_
diff --git a/python/tvm/driver/build_module.py b/python/tvm/driver/build_module.py
@@ -20,23 +20,20 @@
 """
 
 from typing import Union, Optional, List, Mapping
-import warnings
 
 import tvm.tir
 
 from tvm.runtime import Module
 from tvm.runtime import ndarray
 from tvm.ir import container
-from tvm.ir import CallingConv
 from tvm.tir import PrimFunc
 from tvm.ir.module import IRModule
-from tvm.ir.transform import PassContext
-from tvm.target import codegen
 from tvm.te import tensor
 from tvm.te import schedule
 from tvm.target import Target
 from tvm.tir.buffer import Buffer
 from tvm.tir.expr import Var
+from tvm.driver import _ffi_api as _driver_ffi
 
 from . import _ffi_api as ffi
 
@@ -123,6 +120,7 @@ def lower(
     m : IRModule
        The result IRModule
     """
+    # TODO(@mikepapadim) introduce ffi.relay.lower_te_pass()
     if isinstance(inp, IRModule):
         return ffi.lower_module(inp, simple_mode)
     if isinstance(inp, PrimFunc):
@@ -132,98 +130,6 @@ def lower(
     raise ValueError("Expected input to be an IRModule, PrimFunc or Schedule, but got, ", type(inp))
 
 
-def _build_for_device(input_mod, target, target_host):
-    """Build the lowered functions for a device with the given compilation
-    target.
-
-    Parameters
-    ----------
-    input_mod : IRModule
-        The schedule to be built.
-
-    target : str or :any:`tvm.target.Target`
-        The target and option of the compilation.
-
-    target_host : str or :any:`tvm.target.Target`
-        The host compilation target.
-
-    Returns
-    -------
-    fhost : IRModule
-        The host IRModule.
-
-    mdev : tvm.module
-        A module that contains device code.
-    """
-    target, target_host = Target.check_and_update_host_consist(target, target_host)
-    device_type = ndarray.device(target.kind.name, 0).device_type
-
-    mod_mixed = input_mod
-    mod_mixed = tvm.tir.transform.Apply(lambda f: f.with_attr("target", target))(mod_mixed)
-
-    opt_mixed = [
-        tvm.tir.transform.VerifyMemory(),
-        tvm.tir.transform.MergeDynamicSharedMemoryAllocations(),
-    ]
-    if len(mod_mixed.functions) == 1:
-        opt_mixed += [tvm.tir.transform.Apply(lambda f: f.with_attr("tir.is_entry_func", True))]
-
-    if PassContext.current().config.get("tir.detect_global_barrier", False):
-        opt_mixed += [tvm.tir.transform.ThreadSync("global")]
-    opt_mixed += [
-        tvm.tir.transform.ThreadSync("shared"),
-        tvm.tir.transform.ThreadSync("warp"),
-        tvm.tir.transform.InferFragment(),
-        tvm.tir.transform.LowerThreadAllreduce(),
-        tvm.tir.transform.MakePackedAPI(),
-        tvm.tir.transform.SplitHostDevice(),
-    ]
-    mod_mixed = tvm.transform.Sequential(opt_mixed)(mod_mixed)
-
-    # device optimizations
-    opt_device = tvm.transform.Sequential(
-        [
-            tvm.tir.transform.Filter(
-                lambda f: "calling_conv" in f.attrs
-                and f.attrs["calling_conv"].value == CallingConv.DEVICE_KERNEL_LAUNCH
-            ),
-            tvm.tir.transform.LowerWarpMemory(),
-            tvm.tir.transform.Simplify(),
-            tvm.tir.transform.LowerDeviceStorageAccessInfo(),
-            tvm.tir.transform.LowerCustomDatatypes(),
-            tvm.tir.transform.LowerIntrin(),
-        ]
-    )
-    mod_dev = opt_device(mod_mixed)
-
-    # host optimizations
-    opt_host = tvm.transform.Sequential(
-        [
-            tvm.tir.transform.Filter(
-                lambda f: "calling_conv" not in f.attrs
-                or f.attrs["calling_conv"].value != CallingConv.DEVICE_KERNEL_LAUNCH
-            ),
-            tvm.tir.transform.Apply(lambda f: f.with_attr("target", target_host)),
-            tvm.tir.transform.LowerTVMBuiltin(),
-            tvm.tir.transform.LowerDeviceStorageAccessInfo(),
-            tvm.tir.transform.LowerCustomDatatypes(),
-            tvm.tir.transform.LowerIntrin(),
-            tvm.tir.transform.CombineContextCall(),
-        ]
-    )
-    mod_host = opt_host(mod_mixed)
-
-    if device_type == ndarray.cpu(0).device_type and target_host == target:
-        assert len(mod_dev.functions) == 0
-    if "gpu" in target.keys and len(mod_dev.functions) == 0:
-        warnings.warn(
-            "Specified target %s, but cannot find device code, did you do " "bind?" % target
-        )
-
-    rt_mod_dev = codegen.build_module(mod_dev, target) if len(mod_dev.functions) != 0 else None
-    return mod_host, rt_mod_dev
-
-
 def build(
     inputs: Union[schedule.Schedule, PrimFunc, IRModule, Mapping[str, IRModule]],
     args: Optional[List[Union[Buffer, tensor.Tensor, Var]]] = None,
@@ -234,59 +140,47 @@ def build(
 ):
     """Build a function with arguments as signature. Code will be generated
     for devices coupled with target information.
-
     Parameters
     ----------
-    inputs : Union[tvm.te.schedule.Schedule, tvm.tir.PrimFunc, IRModule, Mapping[str, IRModule]]
+    inputs : Union[tvm.te.schedule.Schedule,
+        tvm.tir.PrimFunc, IRModule, Mapping[str, IRModule]]
         The input to be built
-
     args : Optional[List[Union[tvm.tir.Buffer, tensor.Tensor, Var]]]
         The argument lists to the function.
-
     target : Optional[Union[str, Target]]
         The target and option of the compilation.
-
     target_host : Optional[Union[str, Target]]
         Host compilation target, if target is device.
         When TVM compiles device specific program such as CUDA,
         we also need host(CPU) side code to interact with the driver
         setup the dimensions and parameters correctly.
         target_host is used to specify the host side codegen target.
         By default, llvm is used if it is enabled,
-        otherwise a stackvm intepreter is used.
-
+        otherwise a stackvm interpreter is used.
     name : Optional[str]
         The name of result function.
-
     binds : Optional[Mapping[tensor.Tensor, tvm.tir.Buffer]]
         Dictionary that maps the binding of symbolic buffer to Tensor.
         By default, a new buffer is created for each tensor in the argument.
-
     Returns
     -------
     ret : tvm.module
         A module that combines both host and device code.
-
     Examples
     ________
     There are two typical example uses of this function depending on the type
     of the argument `inputs`:
     1. it is an IRModule.
-
     .. code-block:: python
-
         n = 2
         A = te.placeholder((n,), name='A')
         B = te.placeholder((n,), name='B')
         C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
         s = tvm.te.create_schedule(C.op)
         m = tvm.lower(s, [A, B, C], name="test_add")
         rt_mod = tvm.build(m, target="llvm")
-
     2. it is a dict of compilation target to IRModule.
-
     .. code-block:: python
-
         n = 2
         A = te.placeholder((n,), name='A')
         B = te.placeholder((n,), name='B')
@@ -297,11 +191,11 @@ def build(
           m1 = tvm.lower(s1, [A, B, C], name="test_add1")
           m2 = tvm.lower(s2, [A, B, C], name="test_add2")
           rt_mod = tvm.build({"llvm": m1, "cuda": m2}, target_host="llvm")
-
     Note
     ----
     See the note on :any:`tvm.target` on target string format.
     """
+
     if isinstance(inputs, schedule.Schedule):
         if args is None:
             raise ValueError("args must be given for build from schedule")
@@ -318,7 +212,7 @@ def build(
             f"Inputs must be Schedule, IRModule or dict of target to IRModule, "
             f"but got {type(inputs)}."
         )
-
+    # starts here
     if not isinstance(inputs, (dict, container.Map)):
         target = Target.current() if target is None else target
         target = target if target else "llvm"
@@ -350,21 +244,11 @@ def build(
         target_input_mod, target_host
     )
 
-    mod_host_all = tvm.IRModule({})
-
-    device_modules = []
-    for tar, input_mod in target_input_mod.items():
-        mod_host, mdev = _build_for_device(input_mod, tar, target_host)
-        mod_host_all.update(mod_host)
-        device_modules.append(mdev)
+    rt_mod_host = _driver_ffi.finalize_module(target_input_mod, target_host)
 
-    # Generate a unified host module.
-    rt_mod_host = codegen.build_module(mod_host_all, target_host)
-
-    # Import all modules.
-    for mdev in device_modules:
-        if mdev:
-            rt_mod_host.import_module(mdev)
+    target_input_mod, target_host = Target.check_and_update_host_consist(
+        target_input_mod, target_host
+    )
 
     if not isinstance(target_host, Target):
         target_host = Target(target_host)

diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
@@ -123,7 +123,7 @@ def build(
             to setup the dimensions and parameters correctly.
             target_host is used to specify the host side codegen target.
             By default, llvm is used if it is enabled,
-            otherwise a stackvm intepreter is used.
+            otherwise a stackvm interpreter is used.
 
         params : dict of str to NDArray
             Input parameters to the graph that do not change
@@ -303,7 +303,7 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"
         setup the dimensions and parameters correctly.
         target_host is used to specify the host side codegen target.
         By default, llvm is used if it is enabled,
-        otherwise a stackvm intepreter is used.
+        otherwise a stackvm interpreter is used.
 
     params : dict of str to NDArray
         Input parameters to the graph that do not change
@@ -452,7 +452,7 @@ def bind_params_by_name(func, params):
 class GraphExecutor(_interpreter.Executor):
     """Wrapper around Executor interface.
 
-    This executor is used for debug and testing purpoes.
+    This executor is used for debug and testing purposes.
 
     Parameters
     ----------

diff --git a/python/tvm/target/codegen.py b/python/tvm/target/codegen.py
@@ -36,7 +36,7 @@ def build_module(mod, target):
         The corressponding module.
     """
     target = Target(target) if isinstance(target, str) else target
-    return _ffi_api.Build(mod, target)
+    return _ffi_api.Codegen(mod, target)
 
 
 def llvm_lookup_intrinsic_id(name):