kaskada-ai · bjchambers · Aug 29, 2023 · Aug 28, 2023 · Aug 28, 2023 · Aug 28, 2023
diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
@@ -20,6 +20,7 @@
     # "myst_parser",
     "myst_nb",
     "sphinx_copybutton",
+    "sphinx_autodoc_typehints",  # must be after napoleon
     "_extensions.gallery_directive",
 ]
 autodoc_typehints = "description"
@@ -110,12 +111,18 @@
 # Automatically extract typehints when specified and place them in
 # descriptions of the relevant function/method.
 autodoc_typehints = "description"
+autodoc_type_aliases = {"kaskada.Arg": "kaskada.Arg"}
 
 # Don't show class signature with the class' name.
 autodoc_class_signature = "separated"
 
 autosummary_generate = True
 
 napoleon_preprocess_types = True
+napoleon_attr_annotations = True
+napoleon_use_rtype = False
+typehints_use_rtype = False
+typehints_document_rtype = False
+typehints_defaults = 'comma'
 
 suppress_warnings = ["mystnb.unknown_mime_type"]
diff --git a/python/docs/source/index.md b/python/docs/source/index.md
@@ -1,7 +1,11 @@
 ---
 hide-toc: true
 html_theme.sidebar_secondary.remove: true
+sd_hide_title: true
 ---
+
+# Real-Time AI without the fuss.
+
 <div class="px-4 py-5 my-5 text-center">
     <img class="d-block mx-auto mb-4 only-light" src="_static/kaskada-positive.svg" alt="" width="50%">
     <img class="d-block mx-auto mb-4 only-dark" src="_static/kaskada-negative.svg" alt="" width="50%">
@@ -12,7 +16,7 @@ html_theme.sidebar_secondary.remove: true
     </div>
 </div>
 
-# Kaskada completes the Real-Time AI stack, providing...
+## Kaskada completes the Real-Time AI stack, providing...
 
 ```{gallery-grid}
 :grid-columns: 1 2 2 3

diff --git a/python/docs/source/reference/timestream/aggregation.md b/python/docs/source/reference/timestream/aggregation.md
@@ -2,12 +2,16 @@
 
 Timestream aggregations are:
 
-Cumulative:
-    They reflect all values up to and including the current time.
-Grouped:
-    They reflect the values for each entity separately.
-Windowed:
-    They reflect the values within a specific [window](../windows.md).
+Cumulative
+: They reflect all values up to and including the current time.
+
+Grouped
+: They reflect the values for each entity separately.
+
+Windowed
+: They reflect the values within a specific [window](../windows.md).
+
+## Aggregation Methods
 
 ```{eval-rst}
 .. currentmodule:: kaskada

diff --git a/python/docs/source/reference/timestream/arithmetic.md b/python/docs/source/reference/timestream/arithmetic.md
@@ -8,6 +8,8 @@ For instance, `a.add(b)` may be written as `a + b`.
 See the notes on the specific functions for more information.
 ```
 
+## Arithmetic Methods
+
 ```{eval-rst}
 .. currentmodule:: kaskada
 

diff --git a/python/docs/source/reference/timestream/collection.md b/python/docs/source/reference/timestream/collection.md
@@ -1,7 +1,9 @@
-# Arithmetic
+# Collection
 
 Timestreams allow each point to contain a collection -- a `list` or `map` -- of elements.
 
+## Collection Methods
+
 ```{eval-rst}
 .. currentmodule:: kaskada
 

diff --git a/python/docs/source/reference/timestream/comparison.md b/python/docs/source/reference/timestream/comparison.md
@@ -10,6 +10,8 @@ See the notes on the specific functions for more information.
 To respect the semantics of `__eq__` and `__ne__`, `a == b` and `a != b` are *not* overloaded.
 ```
 
+## Comparison Methods
+
 ```{eval-rst}
 .. currentmodule:: kaskada
 

diff --git a/python/docs/source/reference/timestream/index.md b/python/docs/source/reference/timestream/index.md
@@ -12,6 +12,7 @@ html_theme.sidebar_secondary.remove:
     :exclude-members: __init__
 
     .. autoproperty:: data_type
+.. autoclass:: kaskada.Arg
 ```
 
 ```{toctree}

diff --git a/python/docs/source/reference/timestream/misc.md b/python/docs/source/reference/timestream/misc.md
@@ -13,4 +13,5 @@
     Timestream.if_
     Timestream.lag
     Timestream.null_if
+    Timestream.pipe
 ```
diff --git a/python/docs/source/reference/timestream/records.md b/python/docs/source/reference/timestream/records.md
@@ -1,7 +1,8 @@
 # Records
 
 Record operations create, extract or manipulate Timestreams of records.
-Comparison operations produce boolean Timestreams.
+
+## Record Methods
 
 ```{eval-rst}
 .. currentmodule:: kaskada

diff --git a/python/docs/source/reference/windows.md b/python/docs/source/reference/windows.md
@@ -6,6 +6,7 @@
 .. autosummary::
     :toctree: apidocs/windows/
 
+    Window
     Since
     Sliding
     Trailing

diff --git a/python/poetry.lock b/python/poetry.lock
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -81,6 +81,7 @@ optional = true
 [tool.poetry.group.docs.dependencies]
 sphinx = ">=6.0.0"
 sphinx-autobuild = ">=2021.3.14"
+sphinx-autodoc-typehints = ">=1.23.0"
 sphinx-book-theme = "^1.0.1"
 sphinx-copybutton = "^0.5.2"
 sphinx-design = "^0.5.0"

diff --git a/python/pysrc/kaskada/__init__.py b/python/pysrc/kaskada/__init__.py
@@ -7,13 +7,15 @@
 from ._execution import ExecutionOptions
 from ._result import Result
 from ._session import init_session
+from ._timestream import Arg
 from ._timestream import Literal
 from ._timestream import Timestream
 from ._timestream import record
 from .udf import udf
 
 
 __all__ = [
+    "Arg",
     "ExecutionOptions",
     "init_session",
     "Literal",

diff --git a/python/pysrc/kaskada/_execution.py b/python/pysrc/kaskada/_execution.py
@@ -6,18 +6,11 @@
 class ExecutionOptions:
     """Execution options for a query.
 
-    Attributes
-    ----------
-    row_limit : Optional[int]
-        The maximum number of rows to return.
-        If not specified, all rows are returned.
-
-    max_batch_size : Optional[int]
-        The maximum batch size to use when returning results.
-        If not specified, the default batch size will be used.
-
-    materialize : bool
-        If true, the query will be a continuous materialization.
+    Attributes:
+        row_limit: The maximum number of rows to return. If not specified, all rows are returned.
+        max_batch_size: The maximum batch size to use when returning results.
+          If not specified, the default batch size will be used.
+        materialize: If true, the query will be a continuous materialization.
     """
 
     row_limit: Optional[int] = None

diff --git a/python/pysrc/kaskada/_result.py b/python/pysrc/kaskada/_result.py
@@ -17,34 +17,26 @@ def __init__(self, ffi_execution: _ffi.Execution) -> None:
         self._ffi_execution = ffi_execution
 
     def to_pandas(self) -> pd.DataFrame:
-        """
-        Convert the result to a Pandas DataFrame.
+        """Convert the result to a Pandas DataFrame.
 
-        Returns
-        -------
-        pd.DataFrame
+        Returns:
             The result as a Pandas DataFrame.
 
-        Warnings
-        --------
-        This method will block on the complete results of the query and collect
-        all results into memory. If this is not desired, use `iter_pandas` instead.
+        Warnings:
+            This method will block on the complete results of the query and collect
+            all results into memory. If this is not desired, use `iter_pandas` instead.
         """
         return self.to_pyarrow().to_pandas()
 
     def to_pyarrow(self) -> pa.Table:
-        """
-        Convert the result to a PyArrow Table.
+        """Convert the result to a PyArrow Table.
 
-        Returns
-        -------
-        pa.Table
+        Returns:
             The result as a PyArrow Table.
 
-        Warnings
-        --------
-        This method will block on the complete results of the query and collect
-        all results into memory. If this is not desired, use `iter_pyarrow` instead.
+        Warnings:
+            This method will block on the complete results of the query and collect
+            all results into memory. If this is not desired, use `iter_pyarrow` instead.
         """
         batches = self._ffi_execution.collect_pyarrow()
         if len(batches) == 0:
@@ -55,14 +47,7 @@ def to_pyarrow(self) -> pa.Table:
         return table
 
     def iter_pyarrow(self) -> Iterator[pa.RecordBatch]:
-        """
-        Iterate over the results as PyArrow RecordBatches.
-
-        Yields
-        ------
-        pa.RecordBatch
-            The next RecordBatch.
-        """
+        """Yield the results as PyArrow RecordBatches."""
         next_batch = self._ffi_execution.next_pyarrow()
         while next_batch is not None:
             # Annoyingly, PyArrow doesn't suport `drop_columns` on batches.
@@ -75,39 +60,18 @@ def iter_pyarrow(self) -> Iterator[pa.RecordBatch]:
             next_batch = self._ffi_execution.next_pyarrow()
 
     def iter_pandas(self) -> Iterator[pd.DataFrame]:
-        """
-        Iterate over the results as Pandas DataFrames.
-
-        Yields
-        ------
-        pd.DataFrame
-            The next Pandas DataFrame.
-        """
+        """Yield the resulting Pandas DataFrames."""
         for batch in self.iter_pyarrow():
             yield batch.to_pandas()
 
     def iter_rows(self) -> Iterator[dict]:
-        """
-        Iterate over the results as row dictionaries.
-
-        Yields
-        ------
-        dict
-            The next row as a dictionary.
-        """
+        """Yield the resulting rows as dictionaries."""
         for batch in self.iter_pyarrow():
             for row in batch.to_pylist():
                 yield row
 
     async def iter_pyarrow_async(self) -> AsyncIterator[pa.RecordBatch]:
-        """
-        Asynchronously iterate over the results as PyArrow RecordBatches.
-
-        Yields
-        ------
-        pa.RecordBatch
-            The next RecordBatch.
-        """
+        """Yield the resulting PyArrow RecordBatches asynchronously."""
         next_batch = await self._ffi_execution.next_pyarrow_async()
         while next_batch is not None:
             # Annoyingly, PyArrow doesn't suport `drop_columns` on batches.
@@ -120,26 +84,12 @@ async def iter_pyarrow_async(self) -> AsyncIterator[pa.RecordBatch]:
             next_batch = await self._ffi_execution.next_pyarrow_async()
 
     async def iter_pandas_async(self) -> AsyncIterator[pd.DataFrame]:
-        """
-        Asynchronously iterate over the results as Pandas DataFrames.
-
-        Yields
-        ------
-        pd.DataFrame
-            The next Pandas DataFrame.
-        """
+        """Yield the resulting Pandas DataFrames asynchronously."""
         async for batch in self.iter_pyarrow_async():
             yield batch.to_pandas()
 
     async def iter_rows_async(self) -> AsyncIterator[dict]:
-        """
-        Asycnchronously iterate over the results as row dictionaries.
-
-        Yields
-        ------
-        dict
-            The next row as a dictionary.
-        """
+        """Yield the resulting row dictionaries asynchronously."""
         async for batch in self.iter_pyarrow_async():
             for row in batch.to_pylist():
                 yield row

diff --git a/python/pysrc/kaskada/_session.py b/python/pysrc/kaskada/_session.py
@@ -9,16 +9,13 @@
 
 
 def init_session() -> None:
-    """
-    Initialize the Kaskada session for this Python process.
+    """Initialize the Kaskada session for this Python process.
 
     This must only be called once per session. It must be called before
     any other Kaskada functions are called.
 
-    Raises
-    ------
-    RuntimeError
-        If the session has already been initialized.
+    Raises:
+        RuntimeError: If the session has already been initialized.
     """
     global _SESSION
     if _SESSION is not None:
@@ -30,15 +27,10 @@ def _get_session() -> _ffi.Session:
     """
     Assert that the session has been initialized.
 
-    Returns
-    -------
-    _ffi.Session
-        The FFI session handle.
+    Returns: The FFI session handle.
 
-    Raises
-    ------
-    AssertionError
-        If the session has not been initialized.
+    Raises:
+        AssertionError: If the session has not been initialized.
     """
     global _SESSION
     assert _SESSION is not None, "Session has not been initialized"