Skip to content

Commit

Permalink
fix: add markdown header if it is missing (#203)
Browse files Browse the repository at this point in the history
* fix: add a Markdown header level 1 if it is missing

* test: update unit test

* fix: update name from prepend_markdown_title to prepend_markdown_header

* test: update unit test

* fix: update comments based on code review

* test: update unit test with comments and parameterized tests

* test: update unit test

* fix: update docstring with types

* fix: update type hint for Iterables
  • Loading branch information
dandhlee authored Apr 12, 2022
1 parent 9ffe7e0 commit ccd53bd
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 60 deletions.
46 changes: 38 additions & 8 deletions docfx_yaml/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from pathlib import Path
from functools import partial
from itertools import zip_longest
from typing import List
from typing import List, Iterable
from black import InvalidInput

try:
Expand Down Expand Up @@ -1286,12 +1286,18 @@ def parse_markdown_header(header_line, prev_line):
return ""


# For a given markdown file, extract its header line.
def extract_header_from_markdown(mdfile_iterator):
mdfile_name = mdfile_iterator.name.split("/")[-1].split(".")[0].capitalize()
def extract_header_from_markdown(mdfile: Iterable[str]) -> str:
"""For a given markdown file, extract its header line.
Args:
mdfile: iterator to the markdown file.
Returns:
A string for header or empty string if header is not found.
"""
prev_line = ""

for header_line in mdfile_iterator:
for header_line in mdfile:

# Ignore licenses and other non-headers prior to the header.
header = parse_markdown_header(header_line, prev_line)
Expand All @@ -1301,8 +1307,7 @@ def extract_header_from_markdown(mdfile_iterator):

prev_line = header_line

print(f"Could not find a title for {mdfile_iterator.name}. Using {mdfile_name} as the title instead.")
return mdfile_name
return ""


# For a given markdown file, adds syntax highlighting to code blocks.
Expand Down Expand Up @@ -1351,6 +1356,20 @@ def highlight_md_codeblocks(mdfile):
mdfile_iterator.write(new_content)


def prepend_markdown_header(filename: str, mdfile: Iterable[str]):
"""Prepends the filename as a Markdown header.
Args:
filename: the name of the markdown file to prepend.
mdfile: iterator to the markdown file that is both readable
and writable.
"""
file_content = f'# {filename}\n\n' + mdfile.read()
# Reset file position to the beginning to write
mdfile.seek(0)
mdfile.write(file_content)


# Given generated markdown files, incorporate them into the docfx_yaml output.
# The markdown file metadata will be added to top level of the TOC.
def find_markdown_pages(app, outdir):
Expand All @@ -1374,13 +1393,24 @@ def find_markdown_pages(app, outdir):
# For each file, if it is a markdown file move to the top level pages.
for mdfile in markdown_dir.iterdir():
if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore:
mdfile_name = ""
highlight_md_codeblocks(markdown_dir / mdfile.name)
shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")

# Extract the header name for TOC.
with open(mdfile) as mdfile_iterator:
name = extract_header_from_markdown(mdfile_iterator)

if not name:
with open(mdfile, 'r+') as mdfile_iterator:
mdfile_name = mdfile_iterator.name.split("/")[-1].split(".")[0].capitalize()

print(f"Could not find a title for {mdfile_iterator.name}. Using {mdfile_name} as the title instead.")
name = mdfile_name

prepend_markdown_header(name, mdfile_iterator)

shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")

# Add the file to the TOC later.
app.env.markdown_pages.append({
'name': name,
Expand Down
8 changes: 8 additions & 0 deletions tests/markdown_example_alternate_bad_want.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Markdown_example_alternate_bad

==============

There should be a header line before the divider.

##Content header
This is a simple line followed by an h2 header.
6 changes: 6 additions & 0 deletions tests/markdown_example_bad_header_want.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Markdown_example_bad_header

#Test header for a bad formatted markdown file.

##Content header
This is a simple line followed by an h2 header.
6 changes: 6 additions & 0 deletions tests/markdown_example_h2_want.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Markdown_example_h2

## Test header for a simple markdown file.

##Content header
This is a simple line followed by an h2 header.
36 changes: 36 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from docfx_yaml.extension import format_code
from docfx_yaml.extension import extract_product_name
from docfx_yaml.extension import highlight_md_codeblocks
from docfx_yaml.extension import prepend_markdown_header

import unittest
from parameterized import parameterized
Expand Down Expand Up @@ -260,5 +261,40 @@ def test_highlight_md_codeblocks(self, base_filename, want_filename):
self.assertEqual(test_file.read(), mdfile_want.read())


# Filenames to test prepending Markdown title..
test_markdown_filenames = [
[
"tests/markdown_example_bad_header.md",
"tests/markdown_example_bad_header_want.md"
],
[
"tests/markdown_example_h2.md",
"tests/markdown_example_h2_want.md"
],
[
"tests/markdown_example_alternate_bad.md",
"tests/markdown_example_alternate_bad_want.md"
],
]
@parameterized.expand(test_markdown_filenames)
def test_prepend_markdown_header(self, base_filename, want_filename):
# Ensure markdown titles are correctly prepended.

# Copy the base file we'll need to test.
with tempfile.NamedTemporaryFile(mode='r+', delete=False) as test_file:
with open(base_filename) as base_file:
# Use same file name extraction as original code.
file_name = base_file.name.split("/")[-1].split(".")[0].capitalize()
test_file.write(base_file.read())
test_file.flush()
test_file.seek(0)

prepend_markdown_header(file_name, test_file)
test_file.seek(0)

with open(want_filename) as mdfile_want:
self.assertEqual(test_file.read(), mdfile_want.read())


if __name__ == '__main__':
unittest.main()
101 changes: 49 additions & 52 deletions tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from docfx_yaml.extension import parse_markdown_header

import unittest
from parameterized import parameterized

from yaml import load, Loader

Expand Down Expand Up @@ -667,74 +668,70 @@ def test_parse_markdown_header_alternate(self):
self.assertEqual(header_line_got, header_line_want)


def test_extract_header_from_markdown(self):
# Check the header for a normal markdown file.
test_markdown_filenames = [
[
# Check the header for a normal markdown file.
"tests/markdown_example.md"
],
[
# The header should be the same even with the license header.
"tests/markdown_example_header.md"
],
]
@parameterized.expand(test_markdown_filenames)
def test_extract_header_from_markdown(self, markdown_filename):
# Check the header for markdown files.
header_line_want = "Test header for a simple markdown file."

with open('tests/markdown_example.md', 'r') as mdfile:
with open(markdown_filename, 'r') as mdfile:
header_line_got = extract_header_from_markdown(mdfile)

self.assertEqual(header_line_got, header_line_want)

# The header should be the same even with the license header.
header_line_with_license_want = header_line_want

with open('tests/markdown_example_header.md', 'r') as mdfile_license:
header_line_with_license_got = extract_header_from_markdown(mdfile_license)

self.assertEqual(header_line_with_license_got, header_line_with_license_want)


def test_extract_header_from_markdown_alternate_header(self):
# Check the header for an alternate header style.
header_line_want = "This is a simple alternate header"

with open('tests/markdown_example_alternate.md', 'r') as mdfile:
header_line_got = extract_header_from_markdown(mdfile)

self.assertEqual(header_line_got, header_line_want)

# The header should be the same even with the license header.
header_line_with_license_want = header_line_want

with open('tests/markdown_example_alternate_header.md', 'r') as mdfile:
header_line_with_license_got = extract_header_from_markdown(mdfile)

self.assertEqual(header_line_with_license_got, header_line_with_license_want)

# Check the header for an alternate header style.
test_markdown_filenames = [
[
# Check the header for an alternate header style.
"tests/markdown_example_alternate.md"
],
[
# The header should be the same even with the license header.
"tests/markdown_example_alternate_header.md"
],
[
# Check the header for an alternate header style.
"tests/markdown_example_alternate_less.md"
],
]
@parameterized.expand(test_markdown_filenames)
def test_extract_header_from_markdown_alternate_header(self, markdown_filename):
# Check the header for different accepted styles.
header_line_want = "This is a simple alternate header"

with open('tests/markdown_example_alternate_less.md', 'r') as mdfile:
with open(markdown_filename, 'r') as mdfile:
header_line_got = extract_header_from_markdown(mdfile)

self.assertEqual(header_line_got, header_line_want)


def test_extract_header_from_markdown_bad_headers(self):
# Check that the filename is used as header if no valid header is found.
header_line_want = "Markdown_example_bad_header"

with open('tests/markdown_example_bad_header.md', 'r') as mdfile:
header_line_got = extract_header_from_markdown(mdfile)

self.assertEqual(header_line_want, header_line_got)

# Check that only h1 headers are parsed.
header_line_want = "Markdown_example_h2"

with open('tests/markdown_example_h2.md', 'r') as mdfile:
header_line_got = extract_header_from_markdown(mdfile)

self.assertEqual(header_line_want, header_line_got)

# Check that there must be a line before the h1 header breaker.
header_line_want = "Markdown_example_alternate_bad"

with open('tests/markdown_example_alternate_bad.md', 'r') as mdfile:
test_markdown_filenames = [
[
"tests/markdown_example_bad_header.md"
],
[
"tests/markdown_example_h2.md"
],
[
"tests/markdown_example_alternate_bad.md"
],
]
@parameterized.expand(test_markdown_filenames)
def test_extract_header_from_markdown_bad_headers(self, markdown_filename):
# Check that empty string is returned if no valid header is found.
with open(markdown_filename, 'r') as mdfile:
header_line_got = extract_header_from_markdown(mdfile)

self.assertEqual(header_line_want, header_line_got)
self.assertFalse(header_line_got)


def test_parse_docstring_summary(self):
Expand Down

0 comments on commit ccd53bd

Please sign in to comment.