fix: properly handle Raises section for GoogleDocstring (#56)

* fix: add addtional handler for GoogleDocstring * test: add unittest * fix: simplify function call and references in test * fix: update test to include xref version * fix: update variable name to lowercase
googleapis · Jun 24, 2021 · 793dd48 · 793dd48
1 parent d1e18c7
commit 793dd48
Show file tree

Hide file tree

Showing 2 changed files with 194 additions and 10 deletions.
diff --git a/docfx_yaml/extension.py b/docfx_yaml/extension.py
@@ -72,7 +72,10 @@ class Bcolors:
 REFMETHOD = 'meth'
 REFFUNCTION = 'func'
 INITPY = '__init__.py'
+# Regex expression for checking references of pattern like ":class:`~package_v1.module`"
 REF_PATTERN = ':(py:)?(func|class|meth|mod|ref|attr|exc):`~?[a-zA-Z0-9_\.<> ]*?`'
+# Regex expression for checking references of pattern like "~package_v1.subpackage.module"
+REF_PATTERN_LAST = '~(([a-zA-Z0-9_<>]*\.)*[a-zA-Z0-9_<>]*)'
 
 PROPERTY = 'property'
 
@@ -184,23 +187,31 @@ def _refact_example_in_module_summary(lines):
     return new_lines
 
 
-def _resolve_reference_in_module_summary(lines):
+def _resolve_reference_in_module_summary(pattern, lines):
     new_lines = []
     for line in lines:
-        matched_objs = list(re.finditer(REF_PATTERN, line))
+        matched_objs = list(re.finditer(pattern, line))
         new_line = line
         for matched_obj in matched_objs:
             start = matched_obj.start()
             end = matched_obj.end()
             matched_str = line[start:end]
-            if '<' in matched_str and '>' in matched_str:
-                # match string like ':func:`***<***>`'
-                index = matched_str.index('<')
-                ref_name = matched_str[index+1:-2]
+            if pattern == REF_PATTERN:
+                if '<' in matched_str and '>' in matched_str:
+                    # match string like ':func:`***<***>`'
+                    index = matched_str.index('<')
+                    ref_name = matched_str[index+1:-2]
+                else:
+                    # match string like ':func:`~***`' or ':func:`***`'
+                    index = matched_str.index('~') if '~' in matched_str else matched_str.index('`')
+                    ref_name = matched_str[index+1:-1]
             else:
-                # match string like ':func:`~***`' or ':func:`***`'
-                index = matched_str.index('~') if '~' in matched_str else matched_str.index('`')
-                ref_name = matched_str[index+1:-1]
+                index = matched_str.rfind('.') + 1
+                if index == 0:
+                    # If there is no dot, push index to not include tilde
+                    index = 1
+                # Find the last component of the target. "~Queue.get" only returns <xref:get>
+                ref_name = matched_str[index:]
             new_line = new_line.replace(matched_str, '<xref:{}>'.format(ref_name))
         new_lines.append(new_line)
     return new_lines
@@ -283,11 +294,59 @@ def _extract_docstring_info(summary_info, summary, name):
         ':raises': 'exceptions',
         ':raises:': 'exceptions'
     }
+
+    initial_index = -1
+
+    # Prevent GoogleDocstring crashing on custom types and parse all xrefs to normal
+    if '~' in summary or '<xref:' in summary:
+        type_pairs = []
+        # Find first character after one of the three combination
+        initial_index = min(
+          max(0, summary.find('~')), 
+          max(0, summary.find('<xref'))
+        )
+
+        summary_part = summary[initial_index:]
+
+        # Remove all occurrences of "~xref" and "<xref:type>"
+        while '~' in summary_part or "<xref:" in summary_part:
+
+            # Expecting format of "~xref"
+            if '~' in summary_part:
+                initial_index += summary_part.find('~')
+                original_type = summary[initial_index:initial_index+(summary[initial_index:].find(':'))]
+                initial_index += len(original_type)
+                original_type = " ".join(filter(None, re.split(r'\n|  |\|\s|\t', original_type)))
+                safe_type = original_type[1:]
+
+            # Expecting format of "<xref:type>:"
+            elif "<xref:" in summary_part:
+                initial_index += summary_part.find("<xref")
+                original_type = summary[initial_index:initial_index+(summary[initial_index:].find('>'))+1]
+                initial_index += len(original_type)
+                original_type = " ".join(filter(None, re.split(r'\n|  |\|\s|\t', original_type)))
+                safe_type = original_type[6:-1]
+            else:
+                raise ValueError("Encountered unexpected type in Exception docstring.")
+
+            type_pairs.append([original_type, safe_type])
+            summary_part = summary[initial_index:]
+
+        # Replace all the found occurrences
+        for pairs in type_pairs:
+            original_type, safe_type = pairs[0], pairs[1]
+            summary = summary.replace(original_type, safe_type)
 
     # Clean the string by cleaning newlines and backlashes, then split by white space.
     config = Config(napoleon_use_param=True, napoleon_use_rtype=True)
     # Convert Google style to reStructuredText
     parsed_text = str(GoogleDocstring(summary, config))
+
+    # Revert back to original type
+    if initial_index > -1:
+        for pairs in type_pairs:
+            original_type, safe_type = pairs[0], pairs[1]
+            parsed_text = parsed_text.replace(safe_type, original_type)
 
     # Trim the top summary but maintain its formatting.
     indexes = []
@@ -513,7 +572,11 @@ def _update_friendly_package_name(path):
 
     # Add extracted summary
     if lines != []:
-        lines = _resolve_reference_in_module_summary(lines)
+        # Resolve references for xrefs in two different formats.
+        # REF_PATTERN checks for patterns like ":class:`~google.package.module`"
+        lines = _resolve_reference_in_module_summary(REF_PATTERN, lines)
+        # REF_PATTERN_LAST checks for patterns like "~package.module"
+        lines = _resolve_reference_in_module_summary(REF_PATTERN_LAST, lines)
         summary = app.docfx_transform_string('\n'.join(_refact_example_in_module_summary(lines)))
 
         # Extract summary info into respective sections.

diff --git a/tests/test_unit.py b/tests/test_unit.py
@@ -1,5 +1,8 @@
 from docfx_yaml.extension import find_unique_name
 from docfx_yaml.extension import disambiguate_toc_name
+from docfx_yaml.extension import _resolve_reference_in_module_summary
+from docfx_yaml.extension import REF_PATTERN
+from docfx_yaml.extension import REF_PATTERN_LAST
 from docfx_yaml.extension import _extract_docstring_info
 
 import unittest
@@ -45,6 +48,69 @@ def test_disambiguate_toc_name(self):
 
         self.assertEqual(yaml_want, yaml_got)
 
+
+    def test_reference_in_summary(self):
+        lines_got = """
+If a ``stream`` is attached to this download, then the downloaded
+resource will be written to the stream.
+
+Args:
+    transport (~requests.Session): A ``requests`` object which can
+        make authenticated requests.
+
+    timeout (Optional[Union[float, Tuple[float, float]]]):
+        The number of seconds to wait for the server response.
+        Depending on the retry strategy, a request may be repeated
+        several times using the same timeout each time.
+
+        Can also be passed as a tuple (connect_timeout, read_timeout).
+        See :meth:`requests.Session.request` documentation for details.
+
+Returns:
+    ~requests.Response: The HTTP response returned by ``transport``.
+
+Raises:
+    ~google.resumable_media.common.DataCorruption: If the download's
+        checksum doesn't agree with server-computed checksum.
+    ValueError: If the current :class:`Download` has already
+        finished.
+"""
+        lines_got = lines_got.split("\n")
+
+        # Resolve over different regular expressions for different types of reference patterns.
+        lines_got = _resolve_reference_in_module_summary(REF_PATTERN, lines_got)
+        lines_got = _resolve_reference_in_module_summary(REF_PATTERN_LAST, lines_got)
+
+        lines_want = """
+If a ``stream`` is attached to this download, then the downloaded
+resource will be written to the stream.
+
+Args:
+    transport (<xref:Session>): A ``requests`` object which can
+        make authenticated requests.
+
+    timeout (Optional[Union[float, Tuple[float, float]]]):
+        The number of seconds to wait for the server response.
+        Depending on the retry strategy, a request may be repeated
+        several times using the same timeout each time.
+
+        Can also be passed as a tuple (connect_timeout, read_timeout).
+        See <xref:requests.Session.request> documentation for details.
+
+Returns:
+    <xref:Response>: The HTTP response returned by ``transport``.
+
+Raises:
+    <xref:DataCorruption>: If the download's
+        checksum doesn't agree with server-computed checksum.
+    ValueError: If the current <xref:Download> has already
+        finished.
+"""
+        lines_want = lines_want.split("\n")
+
+        self.assertEqual(lines_got, lines_want)
+
+
     # Variables used for testing _extract_docstring_info
     top_summary1_want = "\nSimple test for docstring.\n\n"
     summary_info1_want = {
@@ -162,6 +228,7 @@ def test_extract_docstring_info_check_parser(self):
         self.assertEqual(top_summary3_got, top_summary3_want)
         self.assertEqual(summary_info3_got, summary_info3_want)
 
+
     def test_extract_docstring_info_check_error(self):
         ## Test for incorrectly formmatted docstring raising error
         summary4 = """
@@ -172,5 +239,59 @@ def test_extract_docstring_info_check_error(self):
         with self.assertRaises(ValueError):
             _extract_docstring_info({}, summary4, "error string")
 
+
+    def test_extract_docstring_info_with_xref(self):
+        ## Test with xref included in the summary, ensure they're processed as-is
+        summary_info_want = {
+            'variables': {
+                'arg1': {
+                    'var_type': '<xref:google.spanner_v1.type.Type>',
+                    'description': 'simple description.'
+                },
+                'arg2': {
+                    'var_type': '~google.spanner_v1.type.dict',
+                    'description': 'simple description for `arg2`.'
+                }
+            },
+            'returns': [
+                {
+                    'var_type': '<xref:Pair>', 
+                    'description': 'simple description for return value.'
+                }
+            ],
+            'exceptions': [
+                {
+                    'var_type': '<xref:SpannerException>', 
+                    'description': 'if `condition x`.'
+                }
+            ]
+        }
+
+        summary = """
+Simple test for docstring.
+
+:type arg1: <xref:google.spanner_v1.type.Type>
+:param arg1: simple description.
+:param arg2: simple description for `arg2`.
+:type arg2: ~google.spanner_v1.type.dict
+
+:rtype: <xref:Pair>
+:returns: simple description for return value.
+
+:raises <xref:SpannerException>: if `condition x`. 
+"""
+
+        summary_info_got = {
+            'variables': {},
+            'returns': [],
+            'exceptions': []
+        }
+
+        top_summary_got = _extract_docstring_info(summary_info_got, summary, "")
+
+        # Same as the top summary from previous example, compare with that
+        self.assertEqual(top_summary_got, self.top_summary1_want)
+        self.assertEqual(summary_info_got, summary_info_want)
+
 if __name__ == '__main__':
     unittest.main()