googleapis · dandhlee · Jan 28, 2022 · Jan 21, 2022 · Jan 21, 2022 · Jan 21, 2022
@@ -1268,6 +1268,55 @@ def extract_header_from_markdown(mdfile_iterator):
     return mdfile_name
 
 
+# For a given markdown file, adds syntax highlighting to code blocks.
+def highlight_md_codeblocks(mdfile):
+    fence = '```'
+    fence_with_python = '```python'
+    new_lines = []
+
+    with open(mdfile) as mdfile_iterator:
+        file_content = mdfile_iterator.read()
+        # If there is an odd number of code block annotations, do not syntax
+        # highlight.
+        if file_content.count(fence) % 2 != 0:
+            print(f'{mdfile_iterator.name} contains mixed or wrong format of code blocks. Skipping syntax highlighting.')
+            return
+        # Retrieve code block positions to replace
+        codeblocks = [[m.start(), m.end()] for m in re.finditer(
+                                                      fence,
+                                                      file_content)]
+
+        # This is equivalent to grabbing every odd index item.
+        codeblocks = codeblocks[::2]
+        # Used to store code blocks that comes without language indicators.
-        # Used to store code blocks that comes without language indicators.
+        # Used to store code blocks that come without language indicators.
-        # Used to store code blocks that comes without language indicators.
+        # Used to store code blocks that come without language indicators.
+        blocks_without_indicators = []
+
+        # Check if the fence comes with a language indicator. If so, skip this.
+        for start, end in codeblocks:
+            newline_index = file_content.find('\n', start)
+            # There should be exactly 3 characters between the start of the
+            # fence and the newline, otherwise there probably is a language
+            # indicator.
+            if newline_index - start == 3:
+                blocks_without_indicators.append([start, end])
+
+        # Stitch content that does not need to be parsed, and replace with
+        # `replace_string` for parsed portions.
+        prev_start = prev_end = 0
+        for start, end in blocks_without_indicators:
+            new_lines.append(file_content[prev_end:start])
+            new_lines.append(fence_with_python)
+            prev_start, prev_end = start, end
+
+        # Include rest of the content
+        new_lines.append(file_content[prev_end:])
+
+    # Overwrite with newly parsed content.
+    with open(mdfile, 'w') as mdfile_iterator:
+        new_content = ''.join(new_lines)
+        mdfile_iterator.write(new_content)
+
+
 # Given generated markdown files, incorporate them into the docfx_yaml output.
 # The markdown file metadata will be added to top level of the TOC.
 def find_markdown_pages(app, outdir):
@@ -1294,6 +1343,7 @@ def find_markdown_pages(app, outdir):
     # For each file, if it is a markdown file move to the top level pages.
     for mdfile in markdown_dir.iterdir():
         if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore:
+            highlight_md_codeblocks(f"{markdown_dir}/{mdfile.name}")
             shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")
 
             # Extract the header name for TOC.

@@ -0,0 +1,15 @@
+```python
+These code block should not be highlighted.
-These code block should not be highlighted.
+These code blocks should not be highlighted.
-These code block should not be highlighted.
+These code blocks should not be highlighted.
+```
+
+```py
+As these comes with a language indicator.
-As these comes with a language indicator.
+As these come with a language indicator.
-As these comes with a language indicator.
+As these come with a language indicator.
+```
+
+```java
+Shouldn't matter which langauge indicator is used.
+```
+
+```
+But this should be highlighted.
+```
@@ -0,0 +1,15 @@
+```python
+These code block should not be highlighted.
+```
+
+```py
+As these comes with a language indicator.
+```
+
+```java
+Shouldn't matter which langauge indicator is used.
+```
+
+```python
+But this should be highlighted.
+```
@@ -0,0 +1,7 @@
+```
+File with missing codeblock
+```
+
+```
+with no closing bracket
+
@@ -0,0 +1,7 @@
+```
+File with missing codeblock
+```
+
+```
+with no closing bracket
+
@@ -0,0 +1,9 @@
+```
+test markdown file for
+highlighing markdown codeblocks
+```
+
+```
+all code blocks
+should be highlighted
+```
@@ -0,0 +1,9 @@
+```python
+test markdown file for
+highlighing markdown codeblocks
+```
+
+```python
+all code blocks
+should be highlighted
+```
@@ -4,12 +4,16 @@
 from docfx_yaml.extension import search_cross_references
 from docfx_yaml.extension import format_code
 from docfx_yaml.extension import extract_product_name
+from docfx_yaml.extension import highlight_md_codeblocks
 
 import unittest
 from parameterized import parameterized
 
 from yaml import load, Loader
 
+import shutil
+import os
+
 class TestGenerate(unittest.TestCase):
     def test_indent_code_left(self):
         # Check that the code indents to left based on first line.
@@ -190,5 +194,41 @@ def test_extract_product_name(self):
 
         self.assertEqual(short_name_want, short_product_name)
 
+
+    # Filenames to test markdown syntax highlight with.
+    test_markdown_filenames = [
+        [
+            "tests/markdown_syntax_highlight.md",
+            "tests/markdown_syntax_highlight_got.md",
+            "tests/markdown_syntax_highlight_want.md"
+        ],
+        [
+            "tests/markdown_no_highlight.md",
+            "tests/markdown_no_highlight_got.md",
+            "tests/markdown_no_highlight_want.md"
+        ],
+        [
+            "tests/markdown_mixed_highlight.md",
+            "tests/markdown_mixed_highlight_got.md",
+            "tests/markdown_mixed_highlight_want.md"
+        ],
+    ]
+    @parameterized.expand(test_markdown_filenames)
+    def test_highlight_md_codeblocks(self, base_filename, test_filename, want_filename):
+        # Test to ensure codeblocks in markdown files are correctly highlighted.
+
+        # Copy the base file we'll need to test.
+        shutil.copyfile(base_filename, test_filename)
+
+        highlight_md_codeblocks(test_filename)
+
+        mdfile_got = open(test_filename)
+        mdfile_want = open(want_filename)
+
+        self.assertEqual(mdfile_got.read(), mdfile_want.read())
+        # Clean up test file.
+        os.remove(test_filename)
+
+
 if __name__ == '__main__':
     unittest.main()