feat: process xrefs properly (#78)

* accept change from upstream * feat: add full support for xrefs * test: update test case * fix: expand xref format for proper processing * test: update unit test * fix: handle xrefs for other products in the future
googleapis · Jul 28, 2021 · fcc1989 · fcc1989
1 parent da98240
commit fcc1989
Show file tree

Hide file tree

Showing 2 changed files with 73 additions and 47 deletions.
diff --git a/docfx_yaml/extension.py b/docfx_yaml/extension.py
@@ -102,6 +102,8 @@ def build_init(app):
     # This stores uidnames of docstrings already parsed
     app.env.docfx_uid_names = {}
 
+    app.env.docfx_xrefs = {}
+
     remote = getoutput('git remote -v')
 
     try:
@@ -190,7 +192,7 @@ def _refact_example_in_module_summary(lines):
 
 
 def _resolve_reference_in_module_summary(pattern, lines):
-    new_lines = []
+    new_lines, xrefs = [], []
     for line in lines:
         matched_objs = list(re.finditer(pattern, line))
         new_line = line
@@ -207,16 +209,35 @@ def _resolve_reference_in_module_summary(pattern, lines):
                     # match string like ':func:`~***`' or ':func:`***`'
                     index = matched_str.index('~') if '~' in matched_str else matched_str.index('`')
                     ref_name = matched_str[index+1:-1]
+
+                index = ref_name.rfind('.') + 1
+                # Find the last component of the target. "~Queue.get" only returns <xref:get>
+                ref_name = ref_name[index:]
+
             else:
                 index = matched_str.rfind('.') + 1
                 if index == 0:
                     # If there is no dot, push index to not include tilde
                     index = 1
-                # Find the last component of the target. "~Queue.get" only returns <xref:get>
                 ref_name = matched_str[index:]
-            new_line = new_line.replace(matched_str, '<xref:{}>'.format(ref_name))
+
+            # Find the uid to add for xref
+            index = matched_str.find("google.cloud")
+            if index > -1:
+                xref = matched_str[index:]
+                while not xref[-1].isalnum():
+                    xref = xref[:-1]
+                xrefs.append(xref)
+
+            # Check to see if we should create an xref for it.
+            if 'google.cloud' in matched_str:
+                new_line = new_line.replace(matched_str, '<xref uid=\"{}\">{}</xref>'.format(xref, ref_name))
+            # If it not a Cloud library, don't create xref for it.
+            else:
+                new_line = new_line.replace(matched_str, '`{}`'.format(ref_name))
+
         new_lines.append(new_line)
-    return new_lines
+    return new_lines, xrefs
 
 
 def enumerate_extract_signature(doc, max_args=20):
@@ -286,6 +307,8 @@ def _extract_signature(obj_sig):
 # Given documentation docstring, parse them into summary_info.
 def _extract_docstring_info(summary_info, summary, name):
     top_summary = ""
+    # Return clean summary if returning early.
+    parsed_text = summary
 
     # Initialize known types needing further processing.
     var_types = {
@@ -300,56 +323,38 @@ def _extract_docstring_info(summary_info, summary, name):
     initial_index = -1
 
     # Prevent GoogleDocstring crashing on custom types and parse all xrefs to normal
-    if '~' in summary or '<xref:' in summary:
+    if '<xref:' in parsed_text:
         type_pairs = []
-        # Find first character after one of the three combination
-        initial_index = min(
-          max(0, summary.find('~')), 
-          max(0, summary.find('<xref'))
-        )
+        initial_index = max(0, parsed_text.find('<xref'))
 
-        summary_part = summary[initial_index:]
+        summary_part = parsed_text[initial_index:]
 
-        # Remove all occurrences of "~xref" and "<xref:type>"
-        while '~' in summary_part or "<xref:" in summary_part:
-
-            # Expecting format of "~xref"
-            if '~' in summary_part:
-                initial_index += summary_part.find('~')
-                original_type = summary[initial_index:initial_index+(summary[initial_index:].find(':'))]
-                initial_index += len(original_type)
-                original_type = " ".join(filter(None, re.split(r'\n|  |\|\s|\t', original_type)))
-                safe_type = original_type[1:]
+        # Remove all occurrences of "<xref:type>"
+        while "<xref:" in summary_part:
 
             # Expecting format of "<xref:type>:"
-            elif "<xref:" in summary_part:
+            if "<xref:" in summary_part:
                 initial_index += summary_part.find("<xref")
-                original_type = summary[initial_index:initial_index+(summary[initial_index:].find('>'))+1]
+                original_type = parsed_text[initial_index:initial_index+(parsed_text[initial_index:].find('>'))+1]
                 initial_index += len(original_type)
                 original_type = " ".join(filter(None, re.split(r'\n|  |\|\s|\t', original_type)))
-                safe_type = original_type[6:-1]
+                safe_type = 'xref_' + original_type[6:-1]
             else:
                 raise ValueError("Encountered unexpected type in Exception docstring.")
 
             type_pairs.append([original_type, safe_type])
-            summary_part = summary[initial_index:]
+            summary_part = parsed_text[initial_index:]
 
         # Replace all the found occurrences
         for pairs in type_pairs:
             original_type, safe_type = pairs[0], pairs[1]
-            summary = summary.replace(original_type, safe_type)
+            parsed_text = parsed_text.replace(original_type, safe_type)
 
     # Clean the string by cleaning newlines and backlashes, then split by white space.
     config = Config(napoleon_use_param=True, napoleon_use_rtype=True)
     # Convert Google style to reStructuredText
-    parsed_text = str(GoogleDocstring(summary, config))
+    parsed_text = str(GoogleDocstring(parsed_text, config))
 
-    # Revert back to original type
-    if initial_index > -1:
-        for pairs in type_pairs:
-            original_type, safe_type = pairs[0], pairs[1]
-            parsed_text = parsed_text.replace(safe_type, original_type)
-
     # Trim the top summary but maintain its formatting.
     indexes = []
     for types in var_types:
@@ -378,6 +383,12 @@ def _extract_docstring_info(summary_info, summary, name):
     top_summary = parsed_text[:index]
     parsed_text = parsed_text[index:]
 
+    # Revert back to original type
+    if initial_index > -1:
+        for pairs in type_pairs:
+            original_type, safe_type = pairs[0], pairs[1]
+            parsed_text = parsed_text.replace(safe_type, original_type)
+
     # Clean up whitespace and other characters
     parsed_text = " ".join(filter(None, re.split(r'\|\s', parsed_text))).split()
 
@@ -576,9 +587,17 @@ def _update_friendly_package_name(path):
     if lines != []:
         # Resolve references for xrefs in two different formats.
         # REF_PATTERN checks for patterns like ":class:`~google.package.module`"
-        lines = _resolve_reference_in_module_summary(REF_PATTERN, lines)
+        lines, xrefs = _resolve_reference_in_module_summary(REF_PATTERN, lines)
+        for xref in xrefs:
+            if xref not in app.env.docfx_xrefs:
+                app.env.docfx_xrefs[xref] = ''
+
         # REF_PATTERN_LAST checks for patterns like "~package.module"
-        lines = _resolve_reference_in_module_summary(REF_PATTERN_LAST, lines)
+        lines, xrefs = _resolve_reference_in_module_summary(REF_PATTERN_LAST, lines)
+        for xref in xrefs:
+            if xref not in app.env.docfx_xrefs:
+                app.env.docfx_xrefs[xref] = ''
+
         summary = app.docfx_transform_string('\n'.join(_refact_example_in_module_summary(lines)))
 
         # Extract summary info into respective sections.
@@ -1175,6 +1194,14 @@ def convert_module_to_package_if_needed(obj):
 
         file_name_set.add(filename)
 
+    '''
+    # TODO: handle xref for other products.
+    xref_file = os.path.join(normalized_outdir, 'xrefs.yml')
+    with open(xref_file, 'w') as xref_file_obj:
+        for xref in app.env.docfx_xrefs:
+            xref_file_obj.write(f'{xref}\n')
+    '''
+
 def missing_reference(app, env, node, contnode):
     reftarget = ''
     refdoc = ''

diff --git a/tests/test_unit.py b/tests/test_unit.py
@@ -81,7 +81,7 @@ def test_reference_in_summary(self):
 resource will be written to the stream.
 
 Args:
-    transport (~requests.Session): A ``requests`` object which can
+    transport (~google.cloud.requests.Session): A ``requests`` object which can
         make authenticated requests.
 
     timeout (Optional[Union[float, Tuple[float, float]]]):
@@ -90,29 +90,28 @@ def test_reference_in_summary(self):
         several times using the same timeout each time.
 
         Can also be passed as a tuple (connect_timeout, read_timeout).
-        See :meth:`requests.Session.request` documentation for details.
+        See :meth:`google.cloud.requests.Session.request` documentation for details.
 
 Returns:
-    ~requests.Response: The HTTP response returned by ``transport``.
+    ~google.cloud.requests.Response: The HTTP response returned by ``transport``.
 
 Raises:
-    ~google.resumable_media.common.DataCorruption: If the download's
+    ~google.cloud.resumable_media.common.DataCorruption: If the download's
         checksum doesn't agree with server-computed checksum.
     ValueError: If the current :class:`Download` has already
         finished.
 """
         lines_got = lines_got.split("\n")
-
         # Resolve over different regular expressions for different types of reference patterns.
-        lines_got = _resolve_reference_in_module_summary(REF_PATTERN, lines_got)
-        lines_got = _resolve_reference_in_module_summary(REF_PATTERN_LAST, lines_got)
+        lines_got, xrefs = _resolve_reference_in_module_summary(REF_PATTERN, lines_got)
+        lines_got, xrefs = _resolve_reference_in_module_summary(REF_PATTERN_LAST, lines_got)
 
         lines_want = """
 If a ``stream`` is attached to this download, then the downloaded
 resource will be written to the stream.
 
 Args:
-    transport (<xref:Session>): A ``requests`` object which can
+    transport (<xref uid="google.cloud.requests.Session">Session</xref>): A ``requests`` object which can
         make authenticated requests.
 
     timeout (Optional[Union[float, Tuple[float, float]]]):
@@ -121,15 +120,15 @@ def test_reference_in_summary(self):
         several times using the same timeout each time.
 
         Can also be passed as a tuple (connect_timeout, read_timeout).
-        See <xref:requests.Session.request> documentation for details.
+        See <xref uid="google.cloud.requests.Session.request">request</xref> documentation for details.
 
 Returns:
-    <xref:Response>: The HTTP response returned by ``transport``.
+    <xref uid="google.cloud.requests.Response">Response</xref>: The HTTP response returned by ``transport``.
 
 Raises:
-    <xref:DataCorruption>: If the download's
+    <xref uid="google.cloud.resumable_media.common.DataCorruption">DataCorruption</xref>: If the download's
         checksum doesn't agree with server-computed checksum.
-    ValueError: If the current <xref:Download> has already
+    ValueError: If the current `Download` has already
         finished.
 """
         lines_want = lines_want.split("\n")