googleapis · dandhlee · Aug 23, 2021 · Aug 21, 2021 · Aug 21, 2021 · Aug 21, 2021
@@ -33,7 +33,7 @@ python3 -m pip install --user gcp-docuploader
 python3 -m pip install -e .
 python3 -m pip install recommonmark
 # Required for some client libraries:
-python3 -m pip install --user django==2.2
+python3 -m pip install --user django==2.2 ipython
 
 # Retrieve unique repositories to regenerate the YAML with.
 for bucket_item in $(gsutil ls 'gs://docs-staging-v2/docfx-python*' | sort -u -t- -k5,5); do
@@ -74,7 +74,7 @@ for bucket_item in $(gsutil ls 'gs://docs-staging-v2/docfx-python*' | sort -u -t
   fi
 
   for tag in ${GITHUB_TAGS}; do
-    git checkout ${tag}
+    #git checkout ${tag}
 
     # Use the latest noxfile for all tags.
     cp ../"noxfile.py" .
@@ -100,7 +100,7 @@ for bucket_item in $(gsutil ls 'gs://docs-staging-v2/docfx-python*' | sort -u -t
     # Test running with the plugin version locally.
     if [[ "${TEST_PLUGIN}" == "true" ]]; then
       # --no-use-pep517 is required for django-spanner install issue: see https://github.com/pypa/pip/issues/7953
-      python3 -m pip install --user --no-use-pep517 -e .
+      python3 -m pip install --user --no-use-pep517 -e .[all]
       sphinx-build -T -N -D extensions=sphinx.ext.autodoc,sphinx.ext.autosummary,docfx_yaml.extension,sphinx.ext.intersphinx,sphinx.ext.coverage,sphinx.ext.napoleon,sphinx.ext.todo,sphinx.ext.viewcode,recommonmark -b html -d docs/_build/doctrees/ docs/ docs/_build/html/
       continue
     fi

@@ -22,6 +22,8 @@
 import inspect
 import re
 import copy
+import shutil
+from pathlib import Path
 from functools import partial
 from itertools import zip_longest
 
@@ -45,6 +47,8 @@
 from .directives import RemarksDirective, TodoDirective
 from .nodes import remarks
 
+import subprocess
+from docuploader import shell
 
 class Bcolors:
     HEADER = '\033[95m'
@@ -80,7 +84,31 @@ class Bcolors:
 PROPERTY = 'property'
 
 
+# Run sphinx-build with Markdown builder in the plugin.
+def run_sphinx_markdown():
+    cwd = os.getcwd()
+    # Skip running sphinx-build for Markdown for some unit tests.
+    # Not required other than to output DocFX YAML.
+    if "docs" in cwd:
+        return
+
+    return shell.run(
+        [
+            "sphinx-build",
+            "-M",
+            "markdown",
+            "docs/",
+            "docs/_build",
+        ],
+        hide_output=False
+    )
+
+
 def build_init(app):
+    print("Running sphinx-build with Markdown first...")
+    run_sphinx_markdown()
+    print("Completed running sphinx-build with Markdown files.")
+
     """
     Set up environment data
     """
@@ -103,6 +131,8 @@ def build_init(app):
     app.env.docfx_uid_names = {}
     # This stores file path for class when inspect cannot retrieve file path
     app.env.docfx_class_paths = {}
+    # This stores the name and href of the markdown pages.
+    app.env.markdown_pages = []
 
     app.env.docfx_xrefs = {}
 
@@ -930,11 +960,11 @@ def find_unique_name(package_name, entries):
 # Used to disambiguate names that have same entries.
 # Returns a dictionary of names that are disambiguated in the form of:
 # {uidname: disambiguated_name}
-def disambiguate_toc_name(toc_yaml):
+def disambiguate_toc_name(pkg_toc_yaml):
     name_entries = {}
     disambiguated_names = {}
 
-    for module in toc_yaml:
+    for module in pkg_toc_yaml:
         module_name = module['name']
         if module_name not in name_entries:
             name_entries[module_name] = {}
@@ -955,7 +985,7 @@ def disambiguate_toc_name(toc_yaml):
             # Update the dictionary of dismabiguated names
             disambiguated_names.update(disambiguate_toc_name(module['items']))
 
-    for module in toc_yaml:
+    for module in pkg_toc_yaml:
         module_name = module['name']
         # Check if there are multiple entires of module['name'], disambiguate if needed.
         if name_entries[module_name][module_name] > 1:
@@ -965,11 +995,11 @@ def disambiguate_toc_name(toc_yaml):
     return disambiguated_names
 
 
-# Combines toc_yaml entries with similar version headers.
-def group_by_package(toc_yaml):
-    new_toc_yaml = []
+# Combines pkg_toc_yaml entries with similar version headers.
+def group_by_package(pkg_toc_yaml):
+    new_pkg_toc_yaml = []
     package_groups = {}
-    for module in toc_yaml:
+    for module in pkg_toc_yaml:
         package_group = find_package_group(module['uidname'])
         if package_group not in package_groups:
             package_name = pretty_package_name(package_group)
@@ -981,9 +1011,9 @@ def group_by_package(toc_yaml):
         package_groups[package_group]['items'].append(module)
 
     for package_group in package_groups:
-        new_toc_yaml.append(package_groups[package_group])
+        new_pkg_toc_yaml.append(package_groups[package_group])
 
-    return new_toc_yaml
+    return new_pkg_toc_yaml
 
 
 # Given the full uid, return the package group including its prefix.
@@ -1002,20 +1032,66 @@ def pretty_package_name(package_group):
     return " ".join(capitalized_name)
 
 
+# For a given markdown file, extract its header line.
+def extract_header_from_markdown(mdfile_iterator):
+    for header_line in mdfile_iterator:
+        # Ignore licenses and other non-headers prior to the header.
+        if "#" in header_line:
+            break
+
+    if header_line.count("#") != 1:
+        raise ValueError(f"The first header of {mdfile_iterator.name} is not a h1 header: {header_line}")
+
+    # Extract the header name.
+    return header_line.strip("#").strip()
+
+
+# Given generated markdown files, incorporate them into the docfx_yaml output.
+# The markdown file metadata will be added to top level of the TOC.
+def find_markdown_pages(app, outdir):
+    # Use this to ignore markdown files that are unnecessary.
+    files_to_ignore = [
+        "index.md",     # merge index.md and README.md and index.yaml later
+        "reference.md", # Reference docs overlap with Overview. Will try and incorporate this in later.
+        "readme.md",    # README does not seem to work in cloud site
+        "upgrading.md", # Currently the formatting breaks, will need to come back to it.
+    ]
+
+    markdown_dir = Path(app.builder.outdir).parent / "markdown"
+    if not markdown_dir.exists():
+        print("There's no markdown file to move.")
+        return
+
+    # For each file, if it is a markdown file move to the top level pages.
+    for mdfile in markdown_dir.iterdir():
+        if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore:
+            shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")
+
+            # Extract the header name for TOC.
+            with open(mdfile) as mdfile_iterator:
+                name = extract_header_from_markdown(mdfile_iterator)
+
+            # Add the file to the TOC later.
+            app.env.markdown_pages.append({
+                'name': name,
+                'href': mdfile.name.lower(),
+            })
+
+
 def build_finished(app, exception):
     """
     Output YAML on the file system.
     """
 
     # Used to get rid of the uidname field for cleaner toc file.
-    def sanitize_uidname_field(toc_yaml):
-        for module in toc_yaml:
+    def sanitize_uidname_field(pkg_toc_yaml):
+        for module in pkg_toc_yaml:
             if 'items' in module:
                 sanitize_uidname_field(module['items'])
             module.pop('uidname')
 
-    def find_node_in_toc_tree(toc_yaml, to_add_node):
-        for module in toc_yaml:
+    def find_node_in_toc_tree(pkg_toc_yaml, to_add_node):
+        for module in pkg_toc_yaml:
             if module['uidname'] == to_add_node:
                 return module
 
@@ -1048,7 +1124,10 @@ def convert_module_to_package_if_needed(obj):
     ))
     ensuredir(normalized_outdir)
 
-    toc_yaml = []
+    # Add markdown pages to the configured output directory.
+    find_markdown_pages(app, normalized_outdir)
+
+    pkg_toc_yaml = []
     # Used to record filenames dumped to avoid confliction
     # caused by Windows case insensitive file system
     file_name_set = set()
@@ -1197,7 +1276,7 @@ def convert_module_to_package_if_needed(obj):
             # Build nested TOC
             if uid.count('.') >= 1:
                 parent_level = '.'.join(uid.split('.')[:-1])
-                found_node = find_node_in_toc_tree(toc_yaml, parent_level)
+                found_node = find_node_in_toc_tree(pkg_toc_yaml, parent_level)
 
                 if found_node:
                     found_node.pop('uid', 'No uid found')
@@ -1222,40 +1301,41 @@ def convert_module_to_package_if_needed(obj):
                         file_name = obj['source']['path'].split("/")[-1][:-3]
                     except AttributeError:
                         file_name = node_name
-                    toc_yaml.append({
+                    pkg_toc_yaml.append({
                       'name': file_name, 
                       'uidname': uid, 
                       'uid': uid
                     })
 
             else:
-                toc_yaml.append({
+                pkg_toc_yaml.append({
                   'name': node_name, 
                   'uidname': uid, 
                   'uid': uid
                 })
 
-    if len(toc_yaml) == 0:
+    # Exit if there are no generated YAML pages or Markdown pages.
+    if len(pkg_toc_yaml) == 0 and len(app.env.markdown_pages) == 0:
         raise RuntimeError("No documentation for this module.")
 
     # Perform additional disambiguation of the name
-    disambiguated_names = disambiguate_toc_name(toc_yaml)
+    disambiguated_names = disambiguate_toc_name(pkg_toc_yaml)
 
-    toc_yaml = group_by_package(toc_yaml)
+    pkg_toc_yaml = group_by_package(pkg_toc_yaml)
 
     # Keeping uidname field carrys over onto the toc.yaml files, we need to
     # be keep using them but don't need them in the actual file
-    toc_yaml_with_uid = copy.deepcopy(toc_yaml)
+    pkg_toc_yaml_with_uid = copy.deepcopy(pkg_toc_yaml)
 
-    sanitize_uidname_field(toc_yaml)
+    sanitize_uidname_field(pkg_toc_yaml)
 
     toc_file = os.path.join(normalized_outdir, 'toc.yml')
     with open(toc_file, 'w') as writable:
         writable.write(
             dump(
                 [{
                     'name': app.config.project,
-                    'items': [{'name': 'Overview', 'uid': 'project-' + app.config.project}] + toc_yaml
+                    'items': [{'name': 'Overview', 'uid': 'project-' + app.config.project}] + app.env.markdown_pages + pkg_toc_yaml
                 }],
                 default_flow_style=False,
             )
@@ -1306,7 +1386,7 @@ def convert_module_to_package_if_needed(obj):
     index_file = os.path.join(normalized_outdir, 'index.yml')
     index_children = []
     index_references = []
-    for package in toc_yaml_with_uid:
+    for package in pkg_toc_yaml_with_uid:
         for item in package.get("items"):
             index_children.append(item.get('uidname', ''))
             index_references.append({

@@ -19,8 +19,10 @@
 description = 'Sphinx Python Domain to DocFX YAML Generator'
 version = '0.5.2'
 dependencies = [
+    'gcp-docuploader',
     'PyYAML',
     'sphinx',
+    'sphinx-markdown-builder',
     'sphinxcontrib.napoleon',
     'unidecode',
     'wheel>=0.24.0'

@@ -0,0 +1,4 @@
+#Test header for a simple markdown file.  
+
+##Content header
+This is a simple line followed by an h2 header.
@@ -0,0 +1,4 @@
+##Test header for a simple markdown file.  
+
+##Content header
+This is a simple line followed by an h2 header.
@@ -0,0 +1,18 @@
+<!--
+Copyright 2021 Google LLC
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    https://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+
+#Test header for a simple markdown file.  
+
+##Content header
+This is a simple line followed by an h2 header.
@@ -0,0 +1,5 @@
+This is a simple markdown file with no header.
+
+When running the test on this file to extract its header,
+
+it should throw an exception.
@@ -7,6 +7,7 @@
 from docfx_yaml.extension import find_package_group
 from docfx_yaml.extension import pretty_package_name
 from docfx_yaml.extension import group_by_package
+from docfx_yaml.extension import extract_header_from_markdown
 
 import unittest
 
@@ -396,7 +397,6 @@ def test_extract_docstring_info_with_xref(self):
         }
 
         top_summary_got = _extract_docstring_info(summary_info_got, summary, "")
-        self.maxDiff = None
         # Same as the top summary from previous example, compare with that
         self.assertEqual(top_summary_got, self.top_summary1_want)
         self.assertDictEqual(summary_info_got, summary_info_want)
@@ -538,5 +538,42 @@ def test_group_by_package(self):
         self.assertCountEqual(toc_yaml_got, toc_yaml_want)
 
 
+    def test_extract_header_from_markdown(self):
+        # Check the header for a normal markdown file.
+        header_line_want = "Test header for a simple markdown file."
+
+        mdfile = open('tests/markdown_example.md', 'r')
+        header_line_got = extract_header_from_markdown(mdfile)
+
+        self.assertEqual(header_line_got, header_line_want)
+        mdfile.close()
+
+        # The header should be the same even with the license header.
+        header_line_with_license_want = header_line_want
+
+        mdfile = open('tests/markdown_example_header.md', 'r')
+        header_line_with_license_got = extract_header_from_markdown(mdfile)
+
+        self.assertEqual(header_line_with_license_got, header_line_with_license_want)
+        mdfile.close()
+
+
+    def test_extract_header_from_markdown_check_error(self):
+        # Check an exception is thrown for markdown files that's not well
+        # formatted.
+        mdfile = open('tests/markdown_example_bad.md', 'r')
+        with self.assertRaises(ValueError):
+            header_line = extract_header_from_markdown(mdfile)
+
+        mdfile.close()
+
+        mdfile = open('tests/markdown_example_noheader.md', 'r')
+        with self.assertRaises(ValueError):
+            header_line = extract_header_from_markdown(mdfile)
+
+        mdfile.close()
+
+
+
 if __name__ == '__main__':
     unittest.main()