From 71af3d2cfd7019fa2e14f9aeedd4755cbdb8b28f Mon Sep 17 00:00:00 2001 From: Merel Theisen <49397448+merelcht@users.noreply.github.com> Date: Tue, 15 Aug 2023 10:40:14 +0100 Subject: [PATCH] ci: Add docs rtd check on `kedro-datasets` (#299) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Try adding docs rtd check on kedro datasets Signed-off-by: Merel Theisen * Add Read the Docs configuration for kedro-datasets Signed-off-by: Juan Luis Cano Rodríguez Signed-off-by: Flavien Lambert --- .github/workflows/check-plugin.yml | 28 ++ Makefile | 3 + kedro-datasets/.readthedocs.yaml | 30 ++ .../source/_templates/autosummary/base.rst | 5 + .../source/_templates/autosummary/class.rst | 32 ++ .../source/_templates/autosummary/module.rst | 56 +++ .../docs/source/_templates/breadcrumbs.html | 94 ++++ .../docs/source/_templates/layout.html | 8 + kedro-datasets/docs/source/conf.py | 440 ++++++++++++++++++ kedro-datasets/docs/source/index.rst | 22 + kedro-datasets/docs/source/kedro_datasets.rst | 57 +++ 11 files changed, 775 insertions(+) create mode 100644 kedro-datasets/.readthedocs.yaml create mode 100644 kedro-datasets/docs/source/_templates/autosummary/base.rst create mode 100644 kedro-datasets/docs/source/_templates/autosummary/class.rst create mode 100644 kedro-datasets/docs/source/_templates/autosummary/module.rst create mode 100644 kedro-datasets/docs/source/_templates/breadcrumbs.html create mode 100644 kedro-datasets/docs/source/_templates/layout.html create mode 100644 kedro-datasets/docs/source/conf.py create mode 100644 kedro-datasets/docs/source/index.rst create mode 100644 kedro-datasets/docs/source/kedro_datasets.rst diff --git a/.github/workflows/check-plugin.yml b/.github/workflows/check-plugin.yml index 045c36c13..00bc8a083 100644 --- a/.github/workflows/check-plugin.yml +++ b/.github/workflows/check-plugin.yml @@ -93,6 +93,34 @@ jobs: - name: Run linter run: make plugin=${{ inputs.plugin }} lint + RTD-build: + if: inputs.plugin == 'kedro-datasets' + defaults: + run: + shell: bash + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + - name: Set up Python 3.8 + uses: actions/setup-python@v3 + with: + python-version: "3.8" + - name: Cache python packages + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{inputs.plugin}}-ubuntu-latest-python-"3.8" + restore-keys: ${{inputs.plugin}} + - name: Install dependencies + run: | + cd ${{ inputs.plugin }} + pip install ".[docs]" + pip install ".[test]" + - name: RTD build for kedro-datasets + run: | + make rtd + e2e-tests: if: inputs.plugin != 'kedro-datasets' defaults: diff --git a/Makefile b/Makefile index 074095ba7..1c6c7e478 100644 --- a/Makefile +++ b/Makefile @@ -60,3 +60,6 @@ test-no-spark-sequential: # kedro-datasets/snowflake tests skipped from default scope test-snowflake-only: cd kedro-datasets && pytest tests --no-cov --numprocesses 1 --dist loadfile -m snowflake + +rtd: + cd kedro-datasets && python -m sphinx -WETan -j auto -D language=en -b linkcheck -d _build/doctrees docs/source _build/linkcheck diff --git a/kedro-datasets/.readthedocs.yaml b/kedro-datasets/.readthedocs.yaml new file mode 100644 index 000000000..ca40fa54c --- /dev/null +++ b/kedro-datasets/.readthedocs.yaml @@ -0,0 +1,30 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.8" + jobs: + pre_build: + - pip freeze + - python -m sphinx -WETan -j auto -D language=en -b linkcheck -d kedro-datasets/_build/doctrees kedro-datasets/docs/source kedro-datasets/_build/linkcheck + +# Build documentation in the docs/ directory with Sphinx +sphinx: + builder: html + configuration: kedro-datasets/docs/source/conf.py + fail_on_warning: true + +# Optionally set the version of Python and requirements required to build your docs +python: + install: + - method: pip + path: kedro-datasets + extra_requirements: + - docs + - test diff --git a/kedro-datasets/docs/source/_templates/autosummary/base.rst b/kedro-datasets/docs/source/_templates/autosummary/base.rst new file mode 100644 index 000000000..b7556ebf7 --- /dev/null +++ b/kedro-datasets/docs/source/_templates/autosummary/base.rst @@ -0,0 +1,5 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. auto{{ objtype }}:: {{ objname }} diff --git a/kedro-datasets/docs/source/_templates/autosummary/class.rst b/kedro-datasets/docs/source/_templates/autosummary/class.rst new file mode 100644 index 000000000..10c8ff8be --- /dev/null +++ b/kedro-datasets/docs/source/_templates/autosummary/class.rst @@ -0,0 +1,32 @@ +{{ fullname | escape | underline }} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :members: + :undoc-members: + :inherited-members: + + {% block attributes %} + {% if attributes %} + .. rubric:: Attributes + + .. autosummary:: + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block methods %} + {% if methods %} + .. rubric:: Methods + + .. autosummary:: + {% for item in all_methods %} + {%- if not item.startswith('_') %} + ~{{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/kedro-datasets/docs/source/_templates/autosummary/module.rst b/kedro-datasets/docs/source/_templates/autosummary/module.rst new file mode 100644 index 000000000..a496ca3f5 --- /dev/null +++ b/kedro-datasets/docs/source/_templates/autosummary/module.rst @@ -0,0 +1,56 @@ +{{ fullname | escape | underline }} + +.. rubric:: Description + +.. automodule:: {{ fullname }} + + {% block functions %} + {% if functions %} + .. rubric:: Functions + + .. autosummary:: + :toctree: + {% for item in functions %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block classes %} + {% if classes %} + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: autosummary/class.rst + {% for item in classes %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block exceptions %} + {% if exceptions %} + .. rubric:: Exceptions + + .. autosummary:: + :toctree: + :template: autosummary/class.rst + {% for item in exceptions %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + +{% block modules %} +{% if modules %} +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: +{% for item in modules %} + {{ item }} +{%- endfor %} +{% endif %} +{% endblock %} diff --git a/kedro-datasets/docs/source/_templates/breadcrumbs.html b/kedro-datasets/docs/source/_templates/breadcrumbs.html new file mode 100644 index 000000000..49fa4779f --- /dev/null +++ b/kedro-datasets/docs/source/_templates/breadcrumbs.html @@ -0,0 +1,94 @@ +{# Support for Sphinx 1.3+ page_source_suffix, but don't break old builds. #} + +{% if page_source_suffix %} +{% set suffix = page_source_suffix %} +{% else %} +{% set suffix = source_suffix %} +{% endif %} + +{# modification to enable custom github_url #} + +{% if meta is not defined or meta is none %} + {% set meta = {} %} +{% endif %} + +{% if github_url is defined %} + {% set _dummy = meta.update({'github_url': github_url}) %} +{% endif %} + +{# // modification to enable custom github_url #} + +{% if meta is defined and meta is not none %} +{% set check_meta = True %} +{% else %} +{% set check_meta = False %} +{% endif %} + +{% if check_meta and 'github_url' in meta %} +{% set display_github = True %} +{% endif %} + +{% if check_meta and 'bitbucket_url' in meta %} +{% set display_bitbucket = True %} +{% endif %} + +{% if check_meta and 'gitlab_url' in meta %} +{% set display_gitlab = True %} +{% endif %} + +
+ + + + {% if (theme_prev_next_buttons_location == 'top' or theme_prev_next_buttons_location == 'both') and (next or prev) %} + + {% endif %} +
+
diff --git a/kedro-datasets/docs/source/_templates/layout.html b/kedro-datasets/docs/source/_templates/layout.html new file mode 100644 index 000000000..ecdde06f1 --- /dev/null +++ b/kedro-datasets/docs/source/_templates/layout.html @@ -0,0 +1,8 @@ +{% extends "!layout.html" %} + +{%- block extrahead %} + +{% endblock %} diff --git a/kedro-datasets/docs/source/conf.py b/kedro-datasets/docs/source/conf.py new file mode 100644 index 000000000..4b231efe9 --- /dev/null +++ b/kedro-datasets/docs/source/conf.py @@ -0,0 +1,440 @@ +#!/usr/bin/env python3 +# +# Kedro documentation build configuration file, created by +# sphinx-quickstart on Mon Dec 18 11:31:24 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. +from __future__ import annotations + +import importlib +import os +import re +import sys +from inspect import getmembers, isclass, isfunction +from pathlib import Path + +from click import secho, style +from kedro import __version__ as release + +# -- Project information ----------------------------------------------------- + +project = "kedro-datasets" +author = "kedro" + +# The short X.Y version. +version = re.match(r"^([0-9]+\.[0-9]+).*", release).group(1) + + +# -- General configuration --------------------------------------------------- +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon", + "sphinx_autodoc_typehints", + "sphinx.ext.doctest", + "sphinx.ext.ifconfig", + "sphinx.ext.viewcode", + "sphinx_copybutton", + "myst_parser", + "notfound.extension", +] + +# enable autosummary plugin (table of contents for modules/classes/class +# methods) +autosummary_generate = True +autosummary_generate_overwrite = False +napoleon_include_init_with_doc = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = {".rst": "restructuredtext", ".md": "markdown"} + +# The master toctree document. +master_doc = "index" + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = "en" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = [ + "**.ipynb_checkpoints", + "_templates", + "modules.rst", + "source", + "kedro_docs_style_guide.md", +] + + +type_targets = { + "py:class": ( + "kedro.io.core.AbstractDataSet", + "kedro.io.AbstractDataSet", + "kedro.io.core.Version", + "requests.auth.AuthBase", + "google.oauth2.credentials.Credentials", + "deltalake.table.Metadata", + "DataCatalog" + ), + "py:data": ( + "typing.Any", + "typing.Union", + "typing.Optional", + "typing.Tuple", + ), + "py:exc": ( + "DataSetError", + "DatasetError", + ), +} +# https://stackoverflow.com/questions/61770698/sphinx-nit-picky-mode-but-only-for-links-i-explicitly-wrote +nitpick_ignore = [(key, value) for key in type_targets for value in type_targets[key]] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" +here = Path(__file__).parent.absolute() + +# Theme options are theme-specific and customise the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +html_theme_options = {"collapse_navigation": False, "style_external_links": True} + +# Removes, from all docs, the copyright footer. +html_show_copyright = False + +# retry before render a link broken (fix for "too many requests") +linkcheck_retries = 5 +linkcheck_rate_limit_timeout = 2.0 + +html_context = { + "display_github": True, + "github_url": "https://github.com/kedro-org/kedro-plugins/tree/main/kedro-datasets/docs/source", +} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + +html_show_sourcelink = False + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = "Kedrodoc" + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [(master_doc, "Kedro.tex", "Kedro Documentation", "Kedro", "manual")] + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, "kedro", "Kedro Documentation", [author], 1)] + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "Kedro", + "Kedro Documentation", + author, + "Kedro", + "Kedro is a Python framework for creating reproducible, maintainable and modular data science code.", + "Data-Science", + ) +] + +# -- Options for todo extension ---------------------------------------------- + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + +# -- Kedro specific configuration ----------------------------------------- +KEDRO_MODULES = [ + "kedro_datasets", +] + + +def get_classes(module): + importlib.import_module(module) + return [obj[0] for obj in getmembers(sys.modules[module], lambda obj: isclass(obj))] + + +def get_functions(module): + importlib.import_module(module) + return [ + obj[0] for obj in getmembers(sys.modules[module], lambda obj: isfunction(obj)) + ] + + +def remove_arrows_in_examples(lines): + for i, line in enumerate(lines): + lines[i] = line.replace(">>>", "") + + +def autolink_replacements(what: str) -> list[tuple[str, str, str]]: + """ + Create a list containing replacement tuples of the form: + (``regex``, ``replacement``, ``obj``) for all classes and methods which are + imported in ``KEDRO_MODULES`` ``__init__.py`` files. The ``replacement`` + is a reStructuredText link to their documentation. + + For example, if the docstring reads: + This LambdaDataSet loads and saves ... + + Then the word ``LambdaDataSet``, will be replaced by + :class:`~kedro.io.LambdaDataSet` + + Works for plural as well, e.g: + These ``LambdaDataSet``s load and save + + Will convert to: + These :class:`kedro.io.LambdaDataSet` load and save + + Args: + what: The objects to create replacement tuples for. Possible values + ["class", "func"]. + + Returns: + A list of tuples: (regex, replacement, obj), for all "what" objects + imported in __init__.py files of ``KEDRO_MODULES``. + + """ + replacements = [] + suggestions = [] + for module in KEDRO_MODULES: + if what == "class": + objects = get_classes(module) + elif what == "func": + objects = get_functions(module) + + # Look for recognised class names/function names which are + # surrounded by double back-ticks + if what == "class": + # first do plural only for classes + replacements += [ + ( + rf"``{obj}``s", + f":{what}:`~{module}.{obj}`\\\\s", + obj, + ) + for obj in objects + ] + + # singular + replacements += [ + (rf"``{obj}``", f":{what}:`~{module}.{obj}`", obj) for obj in objects + ] + + # Look for recognised class names/function names which are NOT + # surrounded by double back-ticks, so that we can log these in the + # terminal + if what == "class": + # first do plural only for classes + suggestions += [ + (rf"(?>>" in lines[i]: + continue + + for existing, replacement, obj in suggestions: + new = re.sub(existing, rf"{replacement}", lines[i]) + if new == lines[i]: + continue + if ":rtype:" in lines[i] or ":type " in lines[i]: + continue + + if not title_printed: + secho("-" * 50 + "\n" + name + ":\n" + "-" * 50, fg="blue") + title_printed = True + + print( + "[" + + str(i) + + "] " + + re.sub(existing, r"{}".format(style(obj, fg="magenta")), lines[i]) + ) + print( + "[" + + str(i) + + "] " + + re.sub(existing, r"``{}``".format(style(obj, fg="green")), lines[i]) + ) + + if title_printed: + print("\n") + + +def autolink_classes_and_methods(lines): + for i in range(len(lines)): + if ">>>" in lines[i]: + continue + + for existing, replacement, obj in replacements: + lines[i] = re.sub(existing, rf"{replacement}", lines[i]) + + +def autodoc_process_docstring(app, what, name, obj, options, lines): + try: + # guarded method to make sure build never fails + log_suggestions(lines, name) + autolink_classes_and_methods(lines) + except Exception as e: + print( + style( + "Failed to check for class name mentions that can be " + "converted to reStructuredText links in docstring of {}. " + "Error is: \n{}".format(name, str(e)), + fg="red", + ) + ) + + remove_arrows_in_examples(lines) + + +def env_override(default_appid): + build_version = os.getenv("READTHEDOCS_VERSION") + + if build_version == "latest": + return os.environ["HEAP_APPID_QA"] + if build_version == "stable": + return os.environ["HEAP_APPID_PROD"] + + return default_appid # default to Development for local builds + + +def _add_jinja_filters(app): + # https://github.com/crate/crate/issues/10833 + from sphinx.builders.latex import LaTeXBuilder + from sphinx.builders.linkcheck import CheckExternalLinksBuilder + + # LaTeXBuilder is used in the PDF docs build, + # and it doesn't have attribute 'templates' + if not ( + isinstance(app.builder, (LaTeXBuilder,CheckExternalLinksBuilder)) + ): + app.builder.templates.environment.filters["env_override"] = env_override + + +def _override_permalinks_icon(app): + # https://github.com/readthedocs/sphinx_rtd_theme/issues/98#issuecomment-1503211439 + app.config.html_permalinks_icon = "¶" + + +def setup(app): + app.connect("builder-inited", _add_jinja_filters) + app.connect("builder-inited", _override_permalinks_icon) + app.connect("autodoc-process-docstring", autodoc_process_docstring) + +# (regex, restructuredText link replacement, object) list +replacements = [] + +# (regex, class/function name surrounded with back-ticks, object) list +suggestions = [] + +try: + # guarded code to make sure build never fails + replacements_f, suggestions_f = autolink_replacements("func") + replacements_c, suggestions_c = autolink_replacements("class") + replacements = replacements_f + replacements_c + suggestions = suggestions_f + suggestions_c +except Exception as e: + print( + style( + "Failed to create list of (regex, reStructuredText link " + "replacement) for class names and method names in docstrings. " + "Error is: \n{}".format(str(e)), + fg="red", + ) + ) + +user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0" + +myst_heading_anchors = 5 diff --git a/kedro-datasets/docs/source/index.rst b/kedro-datasets/docs/source/index.rst new file mode 100644 index 000000000..84decee2a --- /dev/null +++ b/kedro-datasets/docs/source/index.rst @@ -0,0 +1,22 @@ +.. Kedro documentation master file, created by + sphinx-quickstart on Mon Dec 18 11:31:24 2017. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + + +API documentation +================= + +.. autosummary:: + :toctree: + :caption: API documentation + :template: autosummary/module.rst + :recursive: + + kedro_datasets + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` diff --git a/kedro-datasets/docs/source/kedro_datasets.rst b/kedro-datasets/docs/source/kedro_datasets.rst new file mode 100644 index 000000000..18bff8808 --- /dev/null +++ b/kedro-datasets/docs/source/kedro_datasets.rst @@ -0,0 +1,57 @@ +kedro_datasets +============== + +.. rubric:: Description + +.. automodule:: kedro_datasets + +.. rubric:: Classes + +.. autosummary:: + :toctree: + :template: autosummary/class.rst + + kedro_datasets.api.APIDataSet + kedro_datasets.biosequence.BioSequenceDataSet + kedro_datasets.dask.ParquetDataSet + kedro_datasets.databricks.ManagedTableDataSet + kedro_datasets.email.EmailMessageDataSet + kedro_datasets.geopandas.GeoJSONDataSet + kedro_datasets.holoviews.HoloviewsWriter + kedro_datasets.json.JSONDataSet + kedro_datasets.matplotlib.MatplotlibWriter + kedro_datasets.networkx.GMLDataSet + kedro_datasets.networkx.GraphMLDataSet + kedro_datasets.networkx.JSONDataSet + kedro_datasets.pandas.CSVDataSet + kedro_datasets.pandas.DeltaTableDataSet + kedro_datasets.pandas.ExcelDataSet + kedro_datasets.pandas.FeatherDataSet + kedro_datasets.pandas.GBQQueryDataSet + kedro_datasets.pandas.GBQTableDataSet + kedro_datasets.pandas.GenericDataSet + kedro_datasets.pandas.HDFDataSet + kedro_datasets.pandas.JSONDataSet + kedro_datasets.pandas.ParquetDataSet + kedro_datasets.pandas.SQLQueryDataSet + kedro_datasets.pandas.SQLTableDataSet + kedro_datasets.pandas.XMLDataSet + kedro_datasets.pickle.PickleDataSet + kedro_datasets.pillow.ImageDataSet + kedro_datasets.plotly.JSONDataSet + kedro_datasets.plotly.PlotlyDataSet + kedro_datasets.polars.CSVDataSet + kedro_datasets.redis.PickleDataSet + kedro_datasets.snowflake.SnowparkTableDataSet + kedro_datasets.spark.DeltaTableDataSet + kedro_datasets.spark.SparkDataSet + kedro_datasets.spark.SparkHiveDataSet + kedro_datasets.spark.SparkJDBCDataSet + kedro_datasets.spark.SparkStreamingDataSet + kedro_datasets.svmlight.SVMLightDataSet + kedro_datasets.tensorflow.TensorFlowModelDataSet + kedro_datasets.text.TextDataSet + kedro_datasets.tracking.JSONDataSet + kedro_datasets.tracking.MetricsDataSet + kedro_datasets.video.VideoDataSet + kedro_datasets.yaml.YAMLDataSet