Adding source files for docs.delta.io

tdas · Dec 1, 2023 · 4fbe32e · 4fbe32e
1 parent ca82bef
commit 4fbe32e
Show file tree

Hide file tree

Showing 114 changed files with 18,066 additions and 1 deletion.
diff --git a/docs/api-docs.css → docs/apis/api-docs.css b/docs/api-docs.css → docs/apis/api-docs.css
diff --git a/docs/api-docs.js → docs/apis/api-docs.js b/docs/api-docs.js → docs/apis/api-docs.js
diff --git a/docs/api-javadocs.css → docs/apis/api-javadocs.css b/docs/api-javadocs.css → docs/apis/api-javadocs.css
diff --git a/docs/api-javadocs.js → docs/apis/api-javadocs.js b/docs/api-javadocs.js → docs/apis/api-javadocs.js
diff --git a/docs/generate_api_docs.py → docs/apis/generate_api_docs.py b/docs/generate_api_docs.py → docs/apis/generate_api_docs.py
@@ -27,7 +27,7 @@ def main():
     verbose = args.verbose
 
     # Set up the directories
-    docs_root_dir = os.path.dirname(os.path.realpath(__file__))
+    docs_root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
     repo_root_dir = os.path.dirname(docs_root_dir)
 
     # --- dirs where docs are generated

diff --git a/docs/python/Makefile → docs/apis/python/Makefile b/docs/python/Makefile → docs/apis/python/Makefile
diff --git a/docs/python/conf.py → docs/apis/python/conf.py b/docs/python/conf.py → docs/apis/python/conf.py
diff --git a/docs/python/index.rst → docs/apis/python/index.rst b/docs/python/index.rst → docs/apis/python/index.rst
diff --git a/docs/generate_docs.py b/docs/generate_docs.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import subprocess
+import random
+import shutil
+import string
+import tempfile
+
+
+def main():
+    """Script to manage the deployment of Delta Lake docs to the hosting bucket.
+       To build the docs:
+       $ generate_docs --livehtml
+
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--livehtml",
+        action="store_true",
+        help="Build and serve a local build of docs")
+    parser.add_argument(
+        "--api-docs",
+        action="store_true",
+        help="Generate the API docs")
+
+    args = parser.parse_args()
+
+    docs_root_dir = os.path.dirname(os.path.realpath(__file__))
+    api_docs_root_dir = os.path.join(docs_root_dir, "apis")
+
+    with WorkingDirectory(docs_root_dir):
+        html_output = os.path.join(docs_root_dir, '_site', 'html')
+        html_source = os.path.join(docs_root_dir, 'source')
+        print("Building content")
+        env = { "TARGET_CLOUD": "delta-oss-only" }
+
+        sphinx_cmd = "sphinx-build"
+        if args.livehtml:
+            sphinx_cmd = "sphinx-autobuild"
+        build_docs_args = "%s -b html -d /tmp/build/doctrees %s %s" % (
+            sphinx_cmd, html_source, html_output)
+        if args.api_docs:
+            copy_api_docs(args.api_docs_location, html_output)
+        run_cmd(build_docs_args, env=env, shell=True, stream_output=True)
+
+
+def copy_api_docs(apl_docs_dir, target_loc):
+    print("Building API docs")
+    with WorkingDirectory(apl_docs_dir):
+        run_cmd(["python generate_api_docs.py"], shell=True, stream_output=True)
+        assert os.path.exists(os.path.join(apl_docs_dir, "_site", "api")), \
+            "Doc generation didn't create the expected api directory"
+        shutil.copytree(os.path.join(apl_docs_dir, "_site", "api"), os.path.join(target_loc, "api"))
+
+
+class WorkingDirectory(object):
+    def __init__(self, working_directory):
+        self.working_directory = working_directory
+        self.old_workdir = os.getcwd()
+
+    def __enter__(self):
+        os.chdir(self.working_directory)
+
+    def __exit__(self, type, value, traceback):
+        os.chdir(self.old_workdir)
+
+
+def run_cmd(cmd, throw_on_error=True, env=None, stream_output=False, **kwargs):
+    """Runs a command as a child process.
+
+    A convenience wrapper for running a command from a Python script.
+    Keyword arguments:
+    cmd -- the command to run, as a list of strings
+    throw_on_error -- if true, raises an Exception if the exit code of the program is nonzero
+    env -- additional environment variables to be defined when running the child process
+    stream_output -- if true, does not capture standard output and error; if false, captures these
+      streams and returns them
+
+    Note on the return value: If stream_output is true, then only the exit code is returned. If
+    stream_output is false, then a tuple of the exit code, standard output and standard error is
+    returned.
+    """
+    cmd_env = os.environ.copy()
+    if env:
+        cmd_env.update(env)
+
+    if stream_output:
+        child = subprocess.Popen(cmd, env=cmd_env, **kwargs)
+        exit_code = child.wait()
+        if throw_on_error and exit_code != 0:
+            raise Exception("Non-zero exitcode: %s" % exit_code)
+        return exit_code
+    else:
+        child = subprocess.Popen(
+            cmd,
+            env=cmd_env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            **kwargs)
+        (stdout, stderr) = child.communicate()
+        exit_code = child.wait()
+        if throw_on_error and exit_code != 0:
+            raise Exception(
+                "Non-zero exitcode: %s\n\nSTDOUT:\n%s\n\nSTDERR:%s" %
+                (exit_code, stdout, stderr))
+        return exit_code, stdout.decode("utf-8"), stderr.decode("utf-8")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/remove_edge.py b/docs/remove_edge.py
@@ -0,0 +1,72 @@
+import sys
+import shutil
+
+
+
+def remove_edge(input_filename, output_filename):
+    with open(input_filename, 'r') as input_file, open(output_filename, 'w') as output_file:
+        line_index = 0
+        is_in_section = False
+        section_header = None
+        section_header_indentation_count = -1
+        section_indentation_count = -1
+        section_buffer = []
+        print(f"\nRemoving edge sections")
+
+        for line in input_file:
+            line_index = line_index + 1
+
+            line_without_indentation = line.lstrip(' ')
+            indentation_count = len(line) - len(line_without_indentation)
+
+            if is_in_section:
+                if len(line.lstrip('\t')) < len(line):
+                    raise Exception("file has tabs, line: '%s'" % line)
+                if len(line.strip()) > 0:
+                    if section_indentation_count < 0 and indentation_count > 0:
+                        section_indentation_count = indentation_count
+                        # print("  section indent = " + str(section_indentation_count))
+
+                    if indentation_count < section_indentation_count or indentation_count <= section_header_indentation_count:
+                        is_in_section = False
+                        if section_header.startswith(".. delta-oss-only::"):
+                            for buf_line in section_buffer:
+                                if len(buf_line.strip()) > 0:
+                                    output_file.write(buf_line[section_indentation_count:])
+                                else:
+                                    output_file.write(buf_line.lstrip(' '))
+                        section_buffer.clear()
+            if is_in_section:
+                section_buffer.append(line)
+            else:
+                if line_without_indentation.startswith(".. delta-oss-only::") or \
+                        line_without_indentation.startswith(".. delta-edge-only::") or \
+                        line_without_indentation.startswith(".. aws::") or \
+                        line_without_indentation.startswith(".. gcp::") or \
+                        line_without_indentation.startswith(".. azure::") or \
+                        line_without_indentation.startswith(".. aws-gcp::") or \
+                        line_without_indentation.startswith(".. azure-gcp::") or \
+                        line_without_indentation.startswith(".. azure-aws::"):
+                    is_in_section = True
+                    section_header = line_without_indentation.strip()
+                    section_header_indentation_count = indentation_count
+                    section_indentation_count = -1
+                    # print("new section '%s' at line %s" % (section_header, str(line_index)))
+                else:
+                    output_file.write(line)
+    print(f"\nRemoved edge sections, and file successfully written to {output_filename}")
+
+
+def copy_file(source_path, destination_path):
+    shutil.copyfile(source_path, destination_path)
+    print(f"File '{source_path}' copied to '{destination_path}'.")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python remove_edge.py <input_filename>")
+    else:
+        input_filename = sys.argv[1]
+        output_filename = f"{input_filename}.removed"
+        copy_file(input_filename, f"{input_filename}.original")
+        remove_edge(input_filename, output_filename)
+        copy_file(output_filename, input_filename)
diff --git a/docs/shared/extensions/code_language_tabs/README.md b/docs/shared/extensions/code_language_tabs/README.md
@@ -0,0 +1,81 @@
+# Code language tabs
+
+The module adds a new directive, `code-language-tabs` to [Sphinx](http://www.sphinx-doc.org/).
+
+Tested with Sphinx 1.8.5.
+
+## Usage
+
+### conf.py
+
+```python
+extensions = ['code_language_tabs']
+```
+
+### docs source
+
+#### short form
+
+```markdown
+.. code-language-tabs::
+
+    ```python
+    python code
+    ```
+
+    ```java
+    java code
+    ```
+
+    ```dotnet
+    dotnet code
+    ```
+
+```
+
+#### mixed form
+
+``` markdown
+.. code-language-tabs::
+
+    .. lang:: python
+
+        some arbitrary python related content
+
+    .. lang:: java
+
+        some arbitrary java related content
+
+    .. lang:: dotnet
+
+        some arbitrary .net related content
+```
+
+## Notes
+
+* In the `short form` the directive may contain only `literal blocks`. If anything else faced inside the body - an exception would be thrown.
+* In the `mixed form` the directive may contain only `lang` children, which represent particular pages. The children themselves may contain any arbitrary content.
+* You can not mix `short form` and `mixed form` into a single directive body.
+* The blocks order in final HTML is the same as in sources. The directive does not reorder blocks.
+
+## Output mode
+
+The extension might require to function differently when building different outputs. So there is a config setting for changing the extension output mode.
+
+### code-language-tabs-mode
+
+Type: `str`
+
+Default: `html`
+
+Options:
+
+* `html` - produce an output for further handling
+* `markdown` - produce an output compatible with Microsoft docs
+* `off` - outputs the directive content directly, as there was no directive used
+
+To pass a config value through the command line, see the [command-line options](https://www.sphinx-doc.org/en/1.5/man/sphinx-build.html):
+
+```bash
+sphinx-build -D code-language-tabs-mode=markdown <other_options>
+```
diff --git a/docs/shared/extensions/code_language_tabs/__init__.py b/docs/shared/extensions/code_language_tabs/__init__.py
@@ -0,0 +1,13 @@
+"""Code language tabs initialization."""
+from sphinx.application import Sphinx
+from .directives import CodeLanguageTabs, Lang
+
+
+def setup(app: Sphinx):
+    """Set up the required directives and configs.
+
+    :param Sphinx app: The current sphinx app.
+    """
+    app.add_directive('code-language-tabs', CodeLanguageTabs)
+    app.add_directive('lang', Lang)
+    app.add_config_value('code-language-tabs-mode', 'html', True)