Merge pull request #1341 from IntelPython/docs/overview

[Documentation] Update to overview section of documentation
IntelPython · Feb 28, 2024 · 4b1c8a9 · 4b1c8a9
2 parents 3d8c8fe + 219d4ed
commit 4b1c8a9
Show file tree

Hide file tree

Showing 24 changed files with 410 additions and 777 deletions.
diff --git a/docs/_templates/autoapi/index.rst b/docs/_templates/autoapi/index.rst
@@ -4,9 +4,11 @@ API Reference
 This page contains auto-generated API reference documentation [#f1]_.
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
 
    numba_dpex/kernel_api/index
+   numba_dpex/experimental/decorators/index
+   numba_dpex/experimental/launcher/index
 
    {% for page in pages %}
    {% if page.top_level_object and page.display %}

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -27,7 +27,6 @@
     "sphinx.ext.extlinks",
     "sphinx.ext.githubpages",
     "sphinx.ext.napoleon",
-    "sphinx.ext.autosectionlabel",
     "sphinxcontrib.programoutput",
     "sphinxcontrib.googleanalytics",
     "myst_parser",
@@ -114,19 +113,10 @@
 
 # -- Auto API configurations ---------------------------------------------------
 
-
-# def skip_util_classes(app, what, name, obj, skip, options):
-#     if what == "module" and "experimental" in name:
-#         if what == "module" and "decorators" not in name:
-#             skip = True
-#     return skip
-
-
-# def setup(sphinx):
-#     sphinx.connect("autoapi-skip-member", skip_util_classes)
-
-
-autoapi_dirs = ["../../numba_dpex/kernel_api"]
+autoapi_dirs = [
+    "../../numba_dpex/kernel_api",
+    "../../numba_dpex/experimental",
+]
 autoapi_type = "python"
 
 autoapi_template_dir = "_templates/autoapi"
@@ -160,3 +150,14 @@ def prepare_jinja_env(jinja_env) -> None:
 
 
 autoapi_prepare_jinja_env = prepare_jinja_env
+
+
+def skip_member(app, what, name, obj, skip, options):
+    # skip submodules
+    if what == "module":
+        skip = True
+    return skip
+
+
+def setup(sphinx):
+    sphinx.connect("autoapi-skip-member", skip_member)
diff --git a/docs/source/experimental/index.rst b/docs/source/experimental/index.rst
@@ -0,0 +1,126 @@
+.. _index:
+.. include:: ./../ext_links.txt
+
+Experimental Features
+=====================
+
+Numba-dpex includes various experimental features that are not yet suitable for
+everyday production usage, but are included as an engineering preview.
+The most prominent experimental features currently included in numba-dpex are
+listed in this section.
+
+
+Compiling and Offloading ``dpnp`` statements
+--------------------------------------------
+
+Data Parallel Extension for NumPy* (`dpnp`_) is a drop-in NumPy* replacement
+library built using the oneAPI software stack including `oneMKL`_, `oneDPL`_ and
+`SYCL*`_. numba-dpex has experimental support for compiling a subset of dpnp
+functions. The feature is enabled by the :py:func:`numba_dpex.dpjit` decorator.
+
+An example of a supported usage of dpnp in numba-dpex is provided in the
+following code snippet:
+
+.. code-block:: python
+
+    import dpnp
+    from numba_dpex import dpjit
+
+
+    @dpjit
+    def foo():
+        a = dpnp.ones(1024, device="gpu")
+        return dpnp.sqrt(a)
+
+
+    a = foo()
+    print(a)
+    print(type(a))
+
+
+Offloading ``prange`` loops
+---------------------------
+
+numba-dpex supports using the ``numba.prange`` statements with
+``dpnp.ndarray`` objects. All such ``prange`` loops are offloaded as kernels and
+executed on a device inferred using the compute follows data programming model.
+The next examples shows using a ``prange`` loop.
+
+
+.. code-block:: python
+
+    import dpnp
+    from numba_dpex import dpjit, prange
+
+
+    @dpjit
+    def foo():
+        x = dpnp.ones(1024, device="gpu")
+        o = dpnp.empty_like(a)
+        for i in prange(x.shape[0]):
+            o[i] = x[i] * x[i]
+        return o
+
+
+    c = foo()
+    print(c)
+    print(type(c))
+
+
+``prange`` loop statements can also be used to write reduction loops as
+demonstrated by the following naive pairwise distance computation.
+
+.. code-block:: python
+
+  from numba_dpex import dpjit, prange
+  import dpnp
+  import dpctl
+
+
+  @dpjit
+  def pairwise_distance(X1, X2, D):
+      """Naïve pairwise distance impl - take an array representing M points in N
+      dimensions, and return the M x M matrix of Euclidean distances
+
+      Args:
+          X1 : Set of points
+          X2 : Set of points
+          D  : Outputted distance matrix
+      """
+      # Size of inputs
+      X1_rows = X1.shape[0]
+      X2_rows = X2.shape[0]
+      X1_cols = X1.shape[1]
+
+      float0 = X1.dtype.type(0.0)
+
+      # Outermost parallel loop over the matrix X1
+      for i in prange(X1_rows):
+          # Loop over the matrix X2
+          for j in range(X2_rows):
+              d = float0
+              # Compute exclidean distance
+              for k in range(X1_cols):
+                  tmp = X1[i, k] - X2[j, k]
+                  d += tmp * tmp
+              # Write computed distance to distance matrix
+              D[i, j] = dpnp.sqrt(d)
+
+
+  q = dpctl.SyclQueue()
+  X1 = dpnp.ones((10, 2), sycl_queue=q)
+  X2 = dpnp.zeros((10, 2), sycl_queue=q)
+  D = dpnp.empty((10, 2), sycl_queue=q)
+
+  pairwise_distance(X1, X2, D)
+  print(D)
+
+
+Kernel fusion
+-------------
+
+.. ``numba-dpex`` can identify each NumPy* (or ``dpnp``) array expression as a
+.. data-parallel kernel and fuse them together to generate a single SYCL kernel.
+.. The kernel is automatically offloaded to the specified device where the fusion
+.. operation is invoked. Here is a simple example of a Black-Scholes formula
+.. computation where kernel fusion occurs at different ``dpnp`` math functions:
diff --git a/docs/source/ext_links.txt b/docs/source/ext_links.txt
@@ -25,3 +25,6 @@
 .. _Intel VTune Profiler: https://www.intel.com/content/www/us/en/developer/tools/oneapi/vtune-profiler.html
 .. _Intel Advisor: https://www.intel.com/content/www/us/en/developer/tools/oneapi/advisor.html
 .. _oneMKL: https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/2023-2/intel-oneapi-math-kernel-library-onemkl.html
+.. _oneDPL: https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-library.html#gs.5izf63
+.. _UXL: https://uxlfoundation.org/
+.. _oneAPI GPU optimization guide: https://www.intel.com/content/www/us/en/docs/oneapi/optimization-guide-gpu/2024-0/general-purpose-computing-on-gpu.html
diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -41,26 +41,26 @@ Data Parallel Extension for Numba*
 .. module:: numba_dpex
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
 
    overview
    getting_started
    programming_model
    user_guide/index
    autoapi/index
+   experimental/index
    useful_links
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
    :caption: Development
 
    contribution_guide
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
    :caption: Misc Notes
 
    examples
-   glossary
    license
    release-notes