diff --git a/docfx_yaml/extension.py b/docfx_yaml/extension.py index 800db0dd..4e2e9f47 100644 --- a/docfx_yaml/extension.py +++ b/docfx_yaml/extension.py @@ -1268,6 +1268,52 @@ def extract_header_from_markdown(mdfile_iterator): return mdfile_name +# For a given markdown file, adds syntax highlighting to code blocks. +def highlight_md_codeblocks(mdfile): + fence = '```' + fence_with_python = '```python' + new_lines = [] + + with open(mdfile) as mdfile_iterator: + file_content = mdfile_iterator.read() + # If there is an odd number of code block annotations, do not syntax + # highlight. + if file_content.count(fence) % 2 != 0: + print(f'{mdfile_iterator.name} contains wrong format of code blocks. Skipping syntax highlighting.') + return + # Retrieve code block positions to replace + codeblocks = [[m.start(), m.end()] for m in re.finditer( + fence, + file_content)] + + # This is equivalent to grabbing every odd index item. + codeblocks = codeblocks[::2] + # Used to store code blocks that come without language indicators. + blocks_without_indicators = [] + + # Check if the fence comes without a language indicator. If so, include + # this to a list to render. + for start, end in codeblocks: + if file_content[end] == '\n': + blocks_without_indicators.append([start, end]) + + # Stitch content that does not need to be parsed, and replace with + # `fence_with_python` for parsed portions. + prev_start = prev_end = 0 + for start, end in blocks_without_indicators: + new_lines.append(file_content[prev_end:start]) + new_lines.append(fence_with_python) + prev_start, prev_end = start, end + + # Include rest of the content. + new_lines.append(file_content[prev_end:]) + + # Overwrite with newly parsed content. + with open(mdfile, 'w') as mdfile_iterator: + new_content = ''.join(new_lines) + mdfile_iterator.write(new_content) + + # Given generated markdown files, incorporate them into the docfx_yaml output. # The markdown file metadata will be added to top level of the TOC. def find_markdown_pages(app, outdir): @@ -1294,6 +1340,7 @@ def find_markdown_pages(app, outdir): # For each file, if it is a markdown file move to the top level pages. for mdfile in markdown_dir.iterdir(): if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore: + highlight_md_codeblocks(markdown_dir / mdfile.name) shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}") # Extract the header name for TOC. diff --git a/tests/markdown_mixed_highlight.md b/tests/markdown_mixed_highlight.md new file mode 100644 index 00000000..1b9c6cd7 --- /dev/null +++ b/tests/markdown_mixed_highlight.md @@ -0,0 +1,15 @@ +```python +These code blocks should not be highlighted. +``` + +```py +As these come with a language indicator. +``` + +```java +Shouldn't matter which langauge indicator is used. +``` + +``` +But this block should get highlighted. +``` diff --git a/tests/markdown_mixed_highlight_want.md b/tests/markdown_mixed_highlight_want.md new file mode 100644 index 00000000..6e19859e --- /dev/null +++ b/tests/markdown_mixed_highlight_want.md @@ -0,0 +1,15 @@ +```python +These code blocks should not be highlighted. +``` + +```py +As these come with a language indicator. +``` + +```java +Shouldn't matter which langauge indicator is used. +``` + +```python +But this block should get highlighted. +``` diff --git a/tests/markdown_no_highlight.md b/tests/markdown_no_highlight.md new file mode 100644 index 00000000..fc4a03fe --- /dev/null +++ b/tests/markdown_no_highlight.md @@ -0,0 +1,7 @@ +``` +File with missing codeblock +``` + +``` +with no closing bracket + diff --git a/tests/markdown_no_highlight_want.md b/tests/markdown_no_highlight_want.md new file mode 100644 index 00000000..fc4a03fe --- /dev/null +++ b/tests/markdown_no_highlight_want.md @@ -0,0 +1,7 @@ +``` +File with missing codeblock +``` + +``` +with no closing bracket + diff --git a/tests/markdown_syntax_highlight.md b/tests/markdown_syntax_highlight.md new file mode 100644 index 00000000..d3daf654 --- /dev/null +++ b/tests/markdown_syntax_highlight.md @@ -0,0 +1,9 @@ +``` +test markdown file for +highlighing markdown codeblocks +``` + +``` +all code blocks +should be highlighted +``` diff --git a/tests/markdown_syntax_highlight_want.md b/tests/markdown_syntax_highlight_want.md new file mode 100644 index 00000000..5fa792e4 --- /dev/null +++ b/tests/markdown_syntax_highlight_want.md @@ -0,0 +1,9 @@ +```python +test markdown file for +highlighing markdown codeblocks +``` + +```python +all code blocks +should be highlighted +``` diff --git a/tests/test_helpers.py b/tests/test_helpers.py index b2b4f51e..77e4317c 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -4,12 +4,15 @@ from docfx_yaml.extension import search_cross_references from docfx_yaml.extension import format_code from docfx_yaml.extension import extract_product_name +from docfx_yaml.extension import highlight_md_codeblocks import unittest from parameterized import parameterized from yaml import load, Loader +import tempfile + class TestGenerate(unittest.TestCase): def test_indent_code_left(self): # Check that the code indents to left based on first line. @@ -190,5 +193,38 @@ def test_extract_product_name(self): self.assertEqual(short_name_want, short_product_name) + + # Filenames to test markdown syntax highlight with. + test_markdown_filenames = [ + [ + "tests/markdown_syntax_highlight.md", + "tests/markdown_syntax_highlight_want.md" + ], + [ + "tests/markdown_no_highlight.md", + "tests/markdown_no_highlight_want.md" + ], + [ + "tests/markdown_mixed_highlight.md", + "tests/markdown_mixed_highlight_want.md" + ], + ] + @parameterized.expand(test_markdown_filenames) + def test_highlight_md_codeblocks(self, base_filename, want_filename): + # Test to ensure codeblocks in markdown files are correctly highlighted. + + # Copy the base file we'll need to test. + with tempfile.NamedTemporaryFile(mode='r+', delete=False) as test_file: + with open(base_filename) as base_file: + test_file.write(base_file.read()) + test_file.flush() + + highlight_md_codeblocks(test_file.name) + test_file.seek(0) + + with open(want_filename) as mdfile_want: + self.assertEqual(test_file.read(), mdfile_want.read()) + + if __name__ == '__main__': unittest.main()