Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: add markdown page support #102

Merged
merged 13 commits into from
Aug 23, 2021
6 changes: 3 additions & 3 deletions .kokoro/generate-docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ python3 -m pip install --user gcp-docuploader
python3 -m pip install -e .
python3 -m pip install recommonmark
# Required for some client libraries:
python3 -m pip install --user django==2.2
python3 -m pip install --user django==2.2 ipython

# Retrieve unique repositories to regenerate the YAML with.
for bucket_item in $(gsutil ls 'gs://docs-staging-v2/docfx-python*' | sort -u -t- -k5,5); do
Expand Down Expand Up @@ -74,7 +74,7 @@ for bucket_item in $(gsutil ls 'gs://docs-staging-v2/docfx-python*' | sort -u -t
fi

for tag in ${GITHUB_TAGS}; do
git checkout ${tag}
#git checkout ${tag}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems unrelated?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, it was to test only against HEAD and not the latest release for some library version issues. I'll have that reverted.


# Use the latest noxfile for all tags.
cp ../"noxfile.py" .
Expand All @@ -100,7 +100,7 @@ for bucket_item in $(gsutil ls 'gs://docs-staging-v2/docfx-python*' | sort -u -t
# Test running with the plugin version locally.
if [[ "${TEST_PLUGIN}" == "true" ]]; then
# --no-use-pep517 is required for django-spanner install issue: see https://github.com/pypa/pip/issues/7953
python3 -m pip install --user --no-use-pep517 -e .
python3 -m pip install --user --no-use-pep517 -e .[all]
sphinx-build -T -N -D extensions=sphinx.ext.autodoc,sphinx.ext.autosummary,docfx_yaml.extension,sphinx.ext.intersphinx,sphinx.ext.coverage,sphinx.ext.napoleon,sphinx.ext.todo,sphinx.ext.viewcode,recommonmark -b html -d docs/_build/doctrees/ docs/ docs/_build/html/
continue
fi
Expand Down
128 changes: 104 additions & 24 deletions docfx_yaml/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import inspect
import re
import copy
import shutil
from pathlib import Path
from functools import partial
from itertools import zip_longest

Expand All @@ -45,6 +47,8 @@
from .directives import RemarksDirective, TodoDirective
from .nodes import remarks

import subprocess
from docuploader import shell

class Bcolors:
HEADER = '\033[95m'
Expand Down Expand Up @@ -80,7 +84,31 @@ class Bcolors:
PROPERTY = 'property'


# Run sphinx-build with Markdown builder in the plugin.
def run_sphinx_markdown():
cwd = os.getcwd()
# Skip running sphinx-build for Markdown for some unit tests.
# Not required other than to output DocFX YAML.
if "docs" in cwd:
return

return shell.run(
[
"sphinx-build",
"-M",
"markdown",
"docs/",
"docs/_build",
],
hide_output=False
)


def build_init(app):
print("Running sphinx-build with Markdown first...")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add another log message after this to say we're done?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

run_sphinx_markdown()
print("Completed running sphinx-build with Markdown files.")

"""
Set up environment data
"""
Expand All @@ -103,6 +131,8 @@ def build_init(app):
app.env.docfx_uid_names = {}
# This stores file path for class when inspect cannot retrieve file path
app.env.docfx_class_paths = {}
# This stores the name and href of the markdown pages.
app.env.markdown_pages = []

app.env.docfx_xrefs = {}

Expand Down Expand Up @@ -930,11 +960,11 @@ def find_unique_name(package_name, entries):
# Used to disambiguate names that have same entries.
# Returns a dictionary of names that are disambiguated in the form of:
# {uidname: disambiguated_name}
def disambiguate_toc_name(toc_yaml):
def disambiguate_toc_name(pkg_toc_yaml):
name_entries = {}
disambiguated_names = {}

for module in toc_yaml:
for module in pkg_toc_yaml:
module_name = module['name']
if module_name not in name_entries:
name_entries[module_name] = {}
Expand All @@ -955,7 +985,7 @@ def disambiguate_toc_name(toc_yaml):
# Update the dictionary of dismabiguated names
disambiguated_names.update(disambiguate_toc_name(module['items']))

for module in toc_yaml:
for module in pkg_toc_yaml:
module_name = module['name']
# Check if there are multiple entires of module['name'], disambiguate if needed.
if name_entries[module_name][module_name] > 1:
Expand All @@ -965,11 +995,11 @@ def disambiguate_toc_name(toc_yaml):
return disambiguated_names


# Combines toc_yaml entries with similar version headers.
def group_by_package(toc_yaml):
new_toc_yaml = []
# Combines pkg_toc_yaml entries with similar version headers.
def group_by_package(pkg_toc_yaml):
new_pkg_toc_yaml = []
package_groups = {}
for module in toc_yaml:
for module in pkg_toc_yaml:
package_group = find_package_group(module['uidname'])
if package_group not in package_groups:
package_name = pretty_package_name(package_group)
Expand All @@ -981,9 +1011,9 @@ def group_by_package(toc_yaml):
package_groups[package_group]['items'].append(module)

for package_group in package_groups:
new_toc_yaml.append(package_groups[package_group])
new_pkg_toc_yaml.append(package_groups[package_group])

return new_toc_yaml
return new_pkg_toc_yaml


# Given the full uid, return the package group including its prefix.
Expand All @@ -1002,20 +1032,66 @@ def pretty_package_name(package_group):
return " ".join(capitalized_name)


# For a given markdown file, extract its header line.
def extract_header_from_markdown(mdfile_iterator):
for header_line in mdfile_iterator:
# Ignore licenses and other non-headers prior to the header.
if "#" in header_line:
break

if header_line.count("#") != 1:
raise ValueError(f"The first header of {mdfile_iterator.name} is not a h1 header: {header_line}")

# Extract the header name.
return header_line.strip("#").strip()


# Given generated markdown files, incorporate them into the docfx_yaml output.
# The markdown file metadata will be added to top level of the TOC.
def find_markdown_pages(app, outdir):
# Use this to ignore markdown files that are unnecessary.
files_to_ignore = [
"index.md", # merge index.md and README.md and index.yaml later
"reference.md", # Reference docs overlap with Overview. Will try and incorporate this in later.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File an issue and reference it here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean within the code, or in this PR? If it's the latter, done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both.

"readme.md", # README does not seem to work in cloud site
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. The file would need to have a different name.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, I'll look into what to name this. Filing an issue.

"upgrading.md", # Currently the formatting breaks, will need to come back to it.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File issue and reference it here? Possibly the same issue as reference.md?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Different issue, upgrading.md file already comes as markdown format in docs/ directory, whereas other files come in reStructuredText format (.rst). Processing .rst->.md with doc-pipeline seems to work well, but processing .md -> .md(?) -> doc-pipeline seems to break. Will need to mitigate this in the middle and use markdown from the source directly, or to make this work on doc-pipeline.

]

markdown_dir = Path(app.builder.outdir).parent / "markdown"
if not markdown_dir.exists():
print("There's no markdown file to move.")
return

# For each file, if it is a markdown file move to the top level pages.
for mdfile in markdown_dir.iterdir():
if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore:
shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")

# Extract the header name for TOC.
with open(mdfile) as mdfile_iterator:
name = extract_header_from_markdown(mdfile_iterator)

# Add the file to the TOC later.
app.env.markdown_pages.append({
'name': name,
'href': mdfile.name.lower(),
})


def build_finished(app, exception):
"""
Output YAML on the file system.
"""

# Used to get rid of the uidname field for cleaner toc file.
def sanitize_uidname_field(toc_yaml):
for module in toc_yaml:
def sanitize_uidname_field(pkg_toc_yaml):
for module in pkg_toc_yaml:
if 'items' in module:
sanitize_uidname_field(module['items'])
module.pop('uidname')

def find_node_in_toc_tree(toc_yaml, to_add_node):
for module in toc_yaml:
def find_node_in_toc_tree(pkg_toc_yaml, to_add_node):
for module in pkg_toc_yaml:
if module['uidname'] == to_add_node:
return module

Expand Down Expand Up @@ -1048,7 +1124,10 @@ def convert_module_to_package_if_needed(obj):
))
ensuredir(normalized_outdir)

toc_yaml = []
# Add markdown pages to the configured output directory.
find_markdown_pages(app, normalized_outdir)

pkg_toc_yaml = []
# Used to record filenames dumped to avoid confliction
# caused by Windows case insensitive file system
file_name_set = set()
Expand Down Expand Up @@ -1197,7 +1276,7 @@ def convert_module_to_package_if_needed(obj):
# Build nested TOC
if uid.count('.') >= 1:
parent_level = '.'.join(uid.split('.')[:-1])
found_node = find_node_in_toc_tree(toc_yaml, parent_level)
found_node = find_node_in_toc_tree(pkg_toc_yaml, parent_level)

if found_node:
found_node.pop('uid', 'No uid found')
Expand All @@ -1222,40 +1301,41 @@ def convert_module_to_package_if_needed(obj):
file_name = obj['source']['path'].split("/")[-1][:-3]
except AttributeError:
file_name = node_name
toc_yaml.append({
pkg_toc_yaml.append({
'name': file_name,
'uidname': uid,
'uid': uid
})

else:
toc_yaml.append({
pkg_toc_yaml.append({
'name': node_name,
'uidname': uid,
'uid': uid
})

if len(toc_yaml) == 0:
# Exit if there are no generated YAML pages or Markdown pages.
if len(pkg_toc_yaml) == 0 and len(app.env.markdown_pages) == 0:
raise RuntimeError("No documentation for this module.")

# Perform additional disambiguation of the name
disambiguated_names = disambiguate_toc_name(toc_yaml)
disambiguated_names = disambiguate_toc_name(pkg_toc_yaml)

toc_yaml = group_by_package(toc_yaml)
pkg_toc_yaml = group_by_package(pkg_toc_yaml)

# Keeping uidname field carrys over onto the toc.yaml files, we need to
# be keep using them but don't need them in the actual file
toc_yaml_with_uid = copy.deepcopy(toc_yaml)
pkg_toc_yaml_with_uid = copy.deepcopy(pkg_toc_yaml)

sanitize_uidname_field(toc_yaml)
sanitize_uidname_field(pkg_toc_yaml)

toc_file = os.path.join(normalized_outdir, 'toc.yml')
with open(toc_file, 'w') as writable:
writable.write(
dump(
[{
'name': app.config.project,
'items': [{'name': 'Overview', 'uid': 'project-' + app.config.project}] + toc_yaml
'items': [{'name': 'Overview', 'uid': 'project-' + app.config.project}] + app.env.markdown_pages + pkg_toc_yaml
}],
default_flow_style=False,
)
Expand Down Expand Up @@ -1306,7 +1386,7 @@ def convert_module_to_package_if_needed(obj):
index_file = os.path.join(normalized_outdir, 'index.yml')
index_children = []
index_references = []
for package in toc_yaml_with_uid:
for package in pkg_toc_yaml_with_uid:
for item in package.get("items"):
index_children.append(item.get('uidname', ''))
index_references.append({
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
description = 'Sphinx Python Domain to DocFX YAML Generator'
version = '0.5.2'
dependencies = [
'gcp-docuploader',
'PyYAML',
'sphinx',
'sphinx-markdown-builder',
'sphinxcontrib.napoleon',
'unidecode',
'wheel>=0.24.0'
Expand Down
4 changes: 4 additions & 0 deletions tests/markdown_example.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#Test header for a simple markdown file.

##Content header
This is a simple line followed by an h2 header.
4 changes: 4 additions & 0 deletions tests/markdown_example_bad.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
##Test header for a simple markdown file.

##Content header
This is a simple line followed by an h2 header.
18 changes: 18 additions & 0 deletions tests/markdown_example_header.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<!--
Copyright 2021 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->


#Test header for a simple markdown file.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also test a space after the #.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it.


##Content header
This is a simple line followed by an h2 header.
5 changes: 5 additions & 0 deletions tests/markdown_example_noheader.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
This is a simple markdown file with no header.

When running the test on this file to extract its header,

it should throw an exception.
39 changes: 38 additions & 1 deletion tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from docfx_yaml.extension import find_package_group
from docfx_yaml.extension import pretty_package_name
from docfx_yaml.extension import group_by_package
from docfx_yaml.extension import extract_header_from_markdown

import unittest

Expand Down Expand Up @@ -396,7 +397,6 @@ def test_extract_docstring_info_with_xref(self):
}

top_summary_got = _extract_docstring_info(summary_info_got, summary, "")
self.maxDiff = None
# Same as the top summary from previous example, compare with that
self.assertEqual(top_summary_got, self.top_summary1_want)
self.assertDictEqual(summary_info_got, summary_info_want)
Expand Down Expand Up @@ -538,5 +538,42 @@ def test_group_by_package(self):
self.assertCountEqual(toc_yaml_got, toc_yaml_want)


def test_extract_header_from_markdown(self):
# Check the header for a normal markdown file.
header_line_want = "Test header for a simple markdown file."

mdfile = open('tests/markdown_example.md', 'r')
header_line_got = extract_header_from_markdown(mdfile)

self.assertEqual(header_line_got, header_line_want)
mdfile.close()

# The header should be the same even with the license header.
header_line_with_license_want = header_line_want

mdfile = open('tests/markdown_example_header.md', 'r')
header_line_with_license_got = extract_header_from_markdown(mdfile)

self.assertEqual(header_line_with_license_got, header_line_with_license_want)
mdfile.close()


def test_extract_header_from_markdown_check_error(self):
# Check an exception is thrown for markdown files that's not well
# formatted.
mdfile = open('tests/markdown_example_bad.md', 'r')
with self.assertRaises(ValueError):
header_line = extract_header_from_markdown(mdfile)

mdfile.close()

mdfile = open('tests/markdown_example_noheader.md', 'r')
with self.assertRaises(ValueError):
header_line = extract_header_from_markdown(mdfile)

mdfile.close()



if __name__ == '__main__':
unittest.main()