-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add syntax highlighting support for Markdown pages #170
Changes from 6 commits
50e187f
5c540da
02c7180
fcaea70
39fc824
442a2f7
1a7f3e4
ce3e97c
33e2cb2
c8caae9
492ec10
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1268,6 +1268,55 @@ def extract_header_from_markdown(mdfile_iterator): | |||||
return mdfile_name | ||||||
|
||||||
|
||||||
# For a given markdown file, adds syntax highlighting to code blocks. | ||||||
def highlight_md_codeblocks(mdfile): | ||||||
fence = '```' | ||||||
fence_with_python = '```python' | ||||||
new_lines = [] | ||||||
|
||||||
with open(mdfile) as mdfile_iterator: | ||||||
file_content = mdfile_iterator.read() | ||||||
# If there is an odd number of code block annotations, do not syntax | ||||||
# highlight. | ||||||
if file_content.count(fence) % 2 != 0: | ||||||
print(f'{mdfile_iterator.name} contains mixed or wrong format of code blocks. Skipping syntax highlighting.') | ||||||
return | ||||||
# Retrieve code block positions to replace | ||||||
codeblocks = [[m.start(), m.end()] for m in re.finditer( | ||||||
fence, | ||||||
file_content)] | ||||||
|
||||||
# This is equivalent to grabbing every odd index item. | ||||||
codeblocks = codeblocks[::2] | ||||||
# Used to store code blocks that comes without language indicators. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion applied. |
||||||
blocks_without_indicators = [] | ||||||
|
||||||
# Check if the fence comes with a language indicator. If so, skip this. | ||||||
for start, end in codeblocks: | ||||||
newline_index = file_content.find('\n', start) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we just check what the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Checking one character after the end of the fence makes much more sense. Updated! |
||||||
# There should be exactly 3 characters between the start of the | ||||||
# fence and the newline, otherwise there probably is a language | ||||||
# indicator. | ||||||
if newline_index - start == 3: | ||||||
blocks_without_indicators.append([start, end]) | ||||||
|
||||||
# Stitch content that does not need to be parsed, and replace with | ||||||
# `replace_string` for parsed portions. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated reference to old variable name. |
||||||
prev_start = prev_end = 0 | ||||||
for start, end in blocks_without_indicators: | ||||||
new_lines.append(file_content[prev_end:start]) | ||||||
new_lines.append(fence_with_python) | ||||||
prev_start, prev_end = start, end | ||||||
|
||||||
# Include rest of the content | ||||||
new_lines.append(file_content[prev_end:]) | ||||||
|
||||||
# Overwrite with newly parsed content. | ||||||
with open(mdfile, 'w') as mdfile_iterator: | ||||||
new_content = ''.join(new_lines) | ||||||
mdfile_iterator.write(new_content) | ||||||
|
||||||
|
||||||
# Given generated markdown files, incorporate them into the docfx_yaml output. | ||||||
# The markdown file metadata will be added to top level of the TOC. | ||||||
def find_markdown_pages(app, outdir): | ||||||
|
@@ -1294,6 +1343,7 @@ def find_markdown_pages(app, outdir): | |||||
# For each file, if it is a markdown file move to the top level pages. | ||||||
for mdfile in markdown_dir.iterdir(): | ||||||
if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore: | ||||||
highlight_md_codeblocks(f"{markdown_dir}/{mdfile.name}") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the string/name conversion? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, no need. I was going back and forth myself between passing a file or just the file path or the string, and forgot to clean it up here. |
||||||
shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}") | ||||||
|
||||||
# Extract the header name for TOC. | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,15 @@ | ||||||
```python | ||||||
These code block should not be highlighted. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion applied. |
||||||
``` | ||||||
|
||||||
```py | ||||||
As these comes with a language indicator. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion applied. |
||||||
``` | ||||||
|
||||||
```java | ||||||
Shouldn't matter which langauge indicator is used. | ||||||
``` | ||||||
|
||||||
``` | ||||||
But this should be highlighted. | ||||||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
```python | ||
These code block should not be highlighted. | ||
``` | ||
|
||
```py | ||
As these comes with a language indicator. | ||
``` | ||
|
||
```java | ||
Shouldn't matter which langauge indicator is used. | ||
``` | ||
|
||
```python | ||
But this should be highlighted. | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
``` | ||
File with missing codeblock | ||
``` | ||
|
||
``` | ||
with no closing bracket | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
``` | ||
File with missing codeblock | ||
``` | ||
|
||
``` | ||
with no closing bracket | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
``` | ||
test markdown file for | ||
highlighing markdown codeblocks | ||
``` | ||
|
||
``` | ||
all code blocks | ||
should be highlighted | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
```python | ||
test markdown file for | ||
highlighing markdown codeblocks | ||
``` | ||
|
||
```python | ||
all code blocks | ||
should be highlighted | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,12 +4,16 @@ | |
from docfx_yaml.extension import search_cross_references | ||
from docfx_yaml.extension import format_code | ||
from docfx_yaml.extension import extract_product_name | ||
from docfx_yaml.extension import highlight_md_codeblocks | ||
|
||
import unittest | ||
from parameterized import parameterized | ||
|
||
from yaml import load, Loader | ||
|
||
import shutil | ||
import os | ||
|
||
class TestGenerate(unittest.TestCase): | ||
def test_indent_code_left(self): | ||
# Check that the code indents to left based on first line. | ||
|
@@ -190,5 +194,41 @@ def test_extract_product_name(self): | |
|
||
self.assertEqual(short_name_want, short_product_name) | ||
|
||
|
||
# Filenames to test markdown syntax highlight with. | ||
test_markdown_filenames = [ | ||
[ | ||
"tests/markdown_syntax_highlight.md", | ||
"tests/markdown_syntax_highlight_got.md", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should either use a tmp file, clean these up every time (not after the assert), or include them in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to use |
||
"tests/markdown_syntax_highlight_want.md" | ||
], | ||
[ | ||
"tests/markdown_no_highlight.md", | ||
"tests/markdown_no_highlight_got.md", | ||
"tests/markdown_no_highlight_want.md" | ||
], | ||
[ | ||
"tests/markdown_mixed_highlight.md", | ||
"tests/markdown_mixed_highlight_got.md", | ||
"tests/markdown_mixed_highlight_want.md" | ||
], | ||
] | ||
@parameterized.expand(test_markdown_filenames) | ||
def test_highlight_md_codeblocks(self, base_filename, test_filename, want_filename): | ||
# Test to ensure codeblocks in markdown files are correctly highlighted. | ||
|
||
# Copy the base file we'll need to test. | ||
shutil.copyfile(base_filename, test_filename) | ||
|
||
highlight_md_codeblocks(test_filename) | ||
|
||
mdfile_got = open(test_filename) | ||
mdfile_want = open(want_filename) | ||
|
||
self.assertEqual(mdfile_got.read(), mdfile_want.read()) | ||
# Clean up test file. | ||
os.remove(test_filename) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a little concerned about this because we're kind of faking markdown parsing. A code fence can also use four backticks sometimes... So, I think this alright for now, but I'd prefer a solution on the rendering side of the Markdown.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The background of this issue is that the Markdown plugin that's generating all of the markdown pages, is simply not adding the language indicators to the code fences. If there are four backticks in the code fence, it will not be generating from the Markdown plugin but directly inserted into the file which we can simply fix it in the source (or ask to include it with a language indicator).
I'd prefer this be solved at the Markdown plugin level as well :( Happy to file an issue on this to track for future improvement.