Skip to content

Commit

Permalink
feat!: retrieve all markdown pages (#266)
Browse files Browse the repository at this point in the history
* feat!: retrieve all markdown files in directories

* test: add unit test coverage

* feat!: require python3.9 and above

* feat: cover all package versions

* test: update goldens

* test: update unittest

* feat: address review comments

* feat: address review comment

* feat: address review comment

* test: update unit test
  • Loading branch information
dandhlee authored Nov 15, 2022
1 parent 43751cf commit 1cee1ed
Show file tree
Hide file tree
Showing 11 changed files with 5,822 additions and 15 deletions.
94 changes: 91 additions & 3 deletions docfx_yaml/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import black
import logging

from collections import defaultdict
from collections.abc import MutableSet
from pathlib import Path
from functools import partial
from itertools import zip_longest
Expand All @@ -39,6 +41,7 @@

from yaml import safe_dump as dump

import sphinx.application
from sphinx.util.console import darkgreen, bold
from sphinx.util import ensuredir
from sphinx.errors import ExtensionError
Expand Down Expand Up @@ -160,8 +163,11 @@ def build_init(app):
app.env.docfx_uid_names = {}
# This stores file path for class when inspect cannot retrieve file path
app.env.docfx_class_paths = {}
# This stores the name and href of the markdown pages.
app.env.markdown_pages = []
# This stores the name and href of the nested markdown pages.
app.env.markdown_pages = defaultdict(list)
# This stores all the markdown pages moved from the plugin, will be used
# to compare and delete unused pages.
app.env.moved_markdown_pages = set()

app.env.docfx_xrefs = {}

Expand Down Expand Up @@ -1527,6 +1533,75 @@ def search_cross_references(obj, current_object_name: str, known_uids: List[str]
markdown_utils.reformat_markdown_to_html(attribute_type))


# Type alias used for toc_yaml entries.
_toc_yaml_type_alias = dict[str, any]

def merge_markdown_and_package_toc(
pkg_toc_yaml: list[_toc_yaml_type_alias],
markdown_toc_yaml: _toc_yaml_type_alias,
known_uids: set[str],
) -> tuple[MutableSet[str], list[_toc_yaml_type_alias]]:
"""
Merges the markdown and package table of contents.
Args:
pkg_toc_yaml: table of content for package files.
markdown_toc_yaml: table fo content for markdown files.
Returns:
A set of markdown pages that has been added, and the merged table of
contents file, with files in the correct position.
"""
def _flatten_toc(
toc_yaml_entry: list[_toc_yaml_type_alias],
) -> list[_toc_yaml_type_alias]:
"""Flattens and retrieves all children within pkg_toc_yaml."""
entries = list(toc_yaml_entry)
for entry in toc_yaml_entry:
if (children := entry.get('items')):
entries.extend(_flatten_toc(children))
return entries

added_pages = set()

pkg_toc_entries = _flatten_toc(pkg_toc_yaml)

for entry in pkg_toc_entries:
entry_name = entry['name'].lower()
if entry_name not in markdown_toc_yaml:
continue

markdown_pages_to_add = []
for page in markdown_toc_yaml[entry_name]:
if page['href'].split('.')[0] not in known_uids and (
page['href'] not in added_pages):
markdown_pages_to_add.append(
{'name': page['name'], 'href': page['href']}
)

if not markdown_pages_to_add:
continue

markdown_pages_to_add = sorted(
markdown_pages_to_add,
key=lambda entry: entry['href'])

entry['items'] = markdown_pages_to_add + entry['items']
added_pages.update({
page['href'] for page in markdown_pages_to_add
})

if (top_level_pages := markdown_toc_yaml.get('/')) is None or (
top_level_pages and top_level_pages[0]['href'] != 'index.md'):
return added_pages, [pkg_toc_yaml]

added_pages.update({
page['href'] for page in top_level_pages
})

return added_pages, top_level_pages + pkg_toc_yaml


def build_finished(app, exception):
"""
Output YAML on the file system.
Expand Down Expand Up @@ -1767,13 +1842,26 @@ def convert_module_to_package_if_needed(obj):

sanitize_uidname_field(pkg_toc_yaml)

known_uids = {
uid.split('.')[-1]
for uid in app.env.docfx_uid_names
}

added_pages, pkg_toc_yaml = merge_markdown_and_package_toc(
pkg_toc_yaml, app.env.markdown_pages, known_uids)

# Remove unused pages after merging the table of contents.
if added_pages:
markdown_utils.remove_unused_pages(
added_pages, app.env.moved_markdown_pages, normalized_outdir)

toc_file = os.path.join(normalized_outdir, 'toc.yml')
with open(toc_file, 'w') as writable:
writable.write(
dump(
[{
'name': app.config.project,
'items': app.env.markdown_pages + pkg_toc_yaml
'items': pkg_toc_yaml
}],
default_flow_style=False,
)
Expand Down
80 changes: 70 additions & 10 deletions docfx_yaml/markdown_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
"""Markdown related utilities for Sphinx DocFX YAML extension."""


from collections.abc import MutableSet
import os
from pathlib import Path
import re
import shutil
from typing import Iterable
from typing import Iterable, List, Optional

from docuploader import shell
import sphinx.application
Expand Down Expand Up @@ -231,7 +232,11 @@ def _prepend_markdown_header(filename: str, mdfile: Iterable[str]) -> None:
mdfile.write(file_content)


def move_markdown_pages(app: sphinx.application, outdir: Path) -> None:
def move_markdown_pages(
app: sphinx.application,
outdir: Path,
cwd: Optional[List[str]] = [],
) -> None:
"""Moves markdown pages to be added to the generated reference documentation.
Markdown pages may be hand written or auto generated. They're processed
Expand All @@ -254,7 +259,14 @@ def move_markdown_pages(app: sphinx.application, outdir: Path) -> None:
'readme.md': 'index.md',
}

markdown_dir = Path(app.builder.outdir).parent / "markdown"
base_markdown_dir = Path(app.builder.outdir).parent / "markdown"

markdown_dir = (
base_markdown_dir.joinpath(*cwd)
if cwd
else base_markdown_dir
)

if not markdown_dir.exists():
print("There's no markdown file to move.")
return
Expand All @@ -264,6 +276,12 @@ def move_markdown_pages(app: sphinx.application, outdir: Path) -> None:

# For each file, if it is a markdown file move to the top level pages.
for mdfile in markdown_dir.iterdir():
if mdfile.is_dir():
cwd.append(mdfile.name)
move_markdown_pages(app, outdir, cwd)
# Restore the original cwd after finish working on the directory.
cwd.pop()

if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore:
mdfile_name = ""

Expand All @@ -288,6 +306,7 @@ def move_markdown_pages(app: sphinx.application, outdir: Path) -> None:
mdfile_outdir = f"{outdir}/{mdfile_name_to_use}"

shutil.copy(mdfile, mdfile_outdir)
app.env.moved_markdown_pages.add(mdfile_name_to_use)

_highlight_md_codeblocks(mdfile_outdir)
_clean_image_links(mdfile_outdir)
Expand All @@ -297,33 +316,74 @@ def move_markdown_pages(app: sphinx.application, outdir: Path) -> None:
# Save the index page entry.
index_page_entry = {
'name': 'Overview',
'href': 'index.md'
'href': 'index.md',
}
continue

if not cwd:
# Use '/' to reserve for top level pages.
app.env.markdown_pages['/'].append({
'name': name,
'href': mdfile_name_to_use,
})
continue

# Add the file to the TOC later.
app.env.markdown_pages.append({
app.env.markdown_pages[cwd[-1]].append({
'name': name,
'href': mdfile_name_to_use,
})

if app.env.markdown_pages:
# Sort the TOC alphabetically based on href entry.
app.env.markdown_pages = sorted(
app.env.markdown_pages,
if app.env.markdown_pages.get('/'):
# Sort the top level pages. Other pages will be sorted when they're
# added to package level files accordingly.
app.env.markdown_pages['/'] = sorted(
app.env.markdown_pages['/'],
key=lambda entry: entry['href'],
)

if index_page_entry is None:
return

# Place the Overview page at the top of the list.
app.env.markdown_pages.insert(
app.env.markdown_pages['/'].insert(
0,
index_page_entry,
)


def remove_unused_pages(
added_pages: MutableSet[str],
all_pages: MutableSet[str],
outdir: Path,
) -> None:
"""Removes unused markdown pages after merging the table of contents.
Pages may be generated as part of generating the document. API pages
are needed and may be generated as part of Sphinx config, but if not
used they will be identified and removed.
Args:
added_pages: markdown pages that have been added to the merged
table of contents.
all_pages: set of all markdown pages generated.
outdir: output directory containing the markdown pages.
"""

pages_to_remove = set(
page for page in all_pages
if page not in added_pages
)

for page in pages_to_remove:
try:
os.remove(f"{outdir}/{page}")
except FileNotFoundError:
# This shouldn't happen, but in case we fail, ignore the failure
# and continue deleting other files.
print(f"Could not delete {page}.")


def run_sphinx_markdown() -> None:
"""Runs sphinx-build with Markdown builder in the plugin."""
cwd = os.getcwd()
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
package_dir={'': '.'},
packages=packages,
install_requires=dependencies,
python_requires=">=3.7",
python_requires=">=3.9",
include_package_data=True,
zip_safe=False,
**extra_setup
Expand Down
32 changes: 31 additions & 1 deletion tests/test_markdown.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from docfx_yaml import markdown_utils


import unittest
from unittest.mock import patch
from parameterized import parameterized
import pathlib

import os
from yaml import load, Loader

import pytest
import tempfile

class TestGenerate(unittest.TestCase):
Expand Down Expand Up @@ -248,5 +251,32 @@ def test_extract_header_from_markdown_bad_headers(self, markdown_filename):
self.assertFalse(header_line_got)


def test_remove_unused_pages(self):
# Check that pages are removed as expected.
added_page = ['safe.md']
all_pages = ['to_delete.md', 'safe.md']
outdir = pathlib.Path('output_path')

expected_delete_call = f"{outdir}/to_delete.md"

with patch('os.remove') as mock_os_remove:
markdown_utils.remove_unused_pages(added_page, all_pages, outdir)
mock_os_remove.assert_called_once_with(expected_delete_call)


def test_remove_unused_pages_with_exception(self):
# Check that the method still runs as expected.
added_page = ['safe.md']
all_pages = ['does_not_exist.md', 'safe.md']
outdir = pathlib.Path('output_path')

self.assertFalse(os.path.isfile(outdir / 'does_not_exist.md'))

try:
markdown_utils.remove_unused_pages(added_page, all_pages, outdir)
except FileNotFoundError:
pytest.fail('Should not have thrown an exception.')


if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit 1cee1ed

Please sign in to comment.