Improve docstring

IntelPython · Mar 21, 2024 · 6deac11 · 6deac11
1 parent 611a597
commit 6deac11
Showing 1 changed file with 111 additions and 12 deletions.
diff --git a/numba_dpex/experimental/decorators.py b/numba_dpex/experimental/decorators.py
@@ -43,16 +43,116 @@ def _parse_func_or_sig(signature_or_function):
     return pyfunc, sigs
 
 
-def kernel(func_or_sig=None, **options):
-    """A decorator to define a kernel function.
+def kernel(function_or_signature=None, **options):
+    """A decorator to compile a function written using :py:mod:`numba_dpex.kernel_api`.
+
+    The ``kernel`` decorator triggers the compilation of a function written
+    using the data-parallel kernel programming API exposed by
+    :py:mod:`numba_dpex.kernel_api`. Such a function is conceptually
+    equivalent to a kernel function written in the C++ SYCL eDSL. The
+    decorator will compile the function based on the types of the arguments
+    to a SPIR-V binary that can be executed either on OpenCL CPU, GPU
+    devices or Intel Level Zero GPU devices.
+
+    Any function to be compilable using the kernel decorator should
+    adhere to the following semantic rules:
+
+    - The first argument to the function should be either an instance of the
+      :class:`numba_dpex.kernel_api.Item` class or an instance of the
+      :class:`numba_dpex.kernel_api.NdItem`.
+
+    - The function should not return any value.
+
+    - The function should have at least one array type argument that can
+      either be an instance of ``dpnp.ndarray`` or an instance of
+      ``dpctl.tensor.usm_ndarray``.
+
+
+    Args:
+        signature_or_function (optional): An optional signature or list of
+            signatures for which a function is to be compiled. Passing in a
+            signature "specializes" the decorated function and no other versions of
+            the function will be compiled. A function can also be
+            directly passed instead of a signature and the signature will get
+            inferred from the function. The actual compilation happens on every
+            invocation of the :func:`numba_dpex.experimental.call_kernel` function
+            where the decorated function is passed in as an argument along with the
+            argument values for the decorated function.
+        options (optional):
+            - **debug** (bool): Whether the compilation should happen in debug
+              mode. *(Default = False)*
+            - **inline_threshold** (int): Specifies the level of inlining that
+              the compiler should attempt. *(Default = 2)*
+    Returns:
+        An instance of
+        :class:`numba_dpex.kernel_api_impl.spirv.dispatcher.KernelDispatcher`.
+        The ``KernelDispatcher`` object compiles the decorated function when
+        passed in to :func:`numba_dpex.experimental.call_kernel`.
+
+    Examples:
+
+    1. Decorate a function and pass it to ``call_kernel`` for compilation and
+       execution.
+
+    .. code-block:: python
+
+        import dpnp
+        import numba_dpex as dpex
+        from numba_dpex import kernel_api as kapi
+
+
+        # Data parallel kernel implementing vector sum
+        @dpex.kernel
+        def vecadd(item: kapi.Item, a, b, c):
+            i = item.get_id(0)
+            c[i] = a[i] + b[i]
+
+
+        N = 1024
+        a = dpnp.ones(N)
+        b = dpnp.ones_like(a)
+        c = dpnp.zeros_like(a)
+        dpex.call_kernel(vecadd, kapi.Range(N), a, b, c)
+
+    2. Specializes a kernel and then compiles it directly before executing it
+       via ``call_kernel``. The kernel is specialized to expect a 1-D
+       ``dpnp.ndarray`` with either ``float32`` type elements or ``int64`` type
+       elements.
 
-    A kernel function is conceptually equivalent to a SYCL kernel function, and
-    gets compiled into either an OpenCL or a LevelZero SPIR-V binary kernel.
-    A kernel decorated Python function has the following restrictions:
+    .. code-block:: python
+
+        import dpnp
+        import numba_dpex as dpex
+        from numba_dpex import kernel_api as kapi
+        from numba_dpex import DpnpNdArray, float32, int64
+        from numba_dpex.core.types.kernel_api.index_space_ids import ItemType
+
+        i64arrty = DpnpNdArray(ndim=1, dtype=int64, layout="C")
+        f32arrty = DpnpNdArray(ndim=1, dtype=float32, layout="C")
+        item_ty = ItemType(ndim=1)
+
+        specialized_kernel = dpex.kernel(
+            [
+                (item_ty, i64arrty, i64arrty, i64arrty),
+                (item_ty, f32arrty, f32arrty, f32arrty),
+            ]
+        )
+
+
+        def vecadd(item: kapi.Item, a, b, c):
+            i = item.get_id(0)
+            c[i] = a[i] + b[i]
+
+
+        # Compile all specializations for vecadd
+        precompiled_kernels = specialized_kernel(vecadd)
+        N = 1024
+        a = dpnp.ones(N, dtype=dpnp.int64)
+        b = dpnp.ones_like(a)
+        c = dpnp.zeros_like(a)
+        # Call a specific pre-compiled version of vecadd
+        dpex.call_kernel(precompiled_kernels, kapi.Range(N), a, b, c)
 
-        * The function can not return any value.
-        * All array arguments passed to a kernel should adhere to compute
-          follows data programming model.
     """
 
     # dispatcher is a type:
@@ -67,10 +167,10 @@ def kernel(func_or_sig=None, **options):
         )
     options["_compilation_mode"] = CompilationMode.KERNEL
 
-    # FIXME: The options need to be evaluated and checked here like it is
+    # TODO: The options need to be evaluated and checked here like it is
     # done in numba.core.decorators.jit
 
-    func, sigs = _parse_func_or_sig(func_or_sig)
+    func, sigs = _parse_func_or_sig(function_or_signature)
     for sig in sigs:
         if isinstance(sig, str):
             raise NotImplementedError(
@@ -105,8 +205,7 @@ def _kernel_dispatcher(pyfunc):
 
 
 def device_func(func_or_sig=None, **options):
-    """Generates a function with a device-only calling convention, e.g.,
-    spir_func for SPIR-V based devices.
+    """Compiles a function into a "device-only" function.
 
     The decorator is used to compile overloads in the DpexKernelTarget and
     users should use the decorator to define functions that are only callable