From 22485e82b1e4c3be2f7589434d53c75b28921266 Mon Sep 17 00:00:00 2001
From: Dan Lee <71398022+dandhlee@users.noreply.github.com>
Date: Fri, 30 Jul 2021 11:41:40 -0400
Subject: [PATCH] fix: parse xrefs differently with new xref format (#90)

* fix: parse xrefs differently with new xref format

* fix: update Docstring parser

* test: update unit test

* fix: move error catching to another block

* test: update unittest to include error scenarios

* fix: clean up messy formatting
---
 docfx_yaml/extension.py | 49 ++++++++++++++++++++++++++++++-----------
 tests/test_unit.py      | 25 ++++++++++++++-------
 2 files changed, 53 insertions(+), 21 deletions(-)
diff --git a/docfx_yaml/extension.py b/docfx_yaml/extension.py
index d0e8316c..6b9b6496 100644
--- a/docfx_yaml/extension.py
+++ b/docfx_yaml/extension.py
@@ -327,24 +327,37 @@ def _extract_docstring_info(summary_info, summary, name):
     }
     
     initial_index = -1
+    front_tag = '<xref'
+    end_tag = '/xref>'
+    end_len = len(end_tag)
         
     # Prevent GoogleDocstring crashing on custom types and parse all xrefs to normal
-    if '<xref:' in parsed_text:
+    if front_tag in parsed_text:
         type_pairs = []
-        initial_index = max(0, parsed_text.find('<xref'))
+        # Constant length for end of xref tag
+        initial_index = max(0, parsed_text.find(front_tag))
 
         summary_part = parsed_text[initial_index:]
        
-        # Remove all occurrences of "<xref:type>"
-        while "<xref:" in summary_part:
+        # Remove all occurrences of "<xref uid="uid">text</xref>"
+        while front_tag in summary_part:
 
-            # Expecting format of "<xref:type>:"
-            if "<xref:" in summary_part:
-                initial_index += summary_part.find("<xref")
-                original_type = parsed_text[initial_index:initial_index+(parsed_text[initial_index:].find('>'))+1]
+            # Expecting format of "<xref uid="uid">text</xref>"
+            if front_tag in summary_part:
+                # Retrieve the index for starting position of xref tag
+                initial_index += summary_part.find(front_tag)
+
+                # Find the index of the end of xref tag, relative to the start of xref tag
+                end_tag_index = initial_index + parsed_text[initial_index:].find(end_tag) + end_len
+
+                # Retrieve the entire xref tag
+                original_type = parsed_text[initial_index:end_tag_index]
                 initial_index += len(original_type)
                 original_type = " ".join(filter(None, re.split(r'\n|  |\|\s|\t', original_type)))
-                safe_type = 'xref_' + original_type[6:-1]
+
+                # Extract text from "<xref uid="uid">text</xref>"
+                index = original_type.find(">")
+                safe_type = 'xref_' + original_type[index+1:index+(original_type[index:].find("<"))]
             else:
                 raise ValueError("Encountered unexpected type in Exception docstring.")
 
@@ -451,10 +464,20 @@ def _extract_docstring_info(summary_info, summary, name):
             cur_type = word
             if cur_type in [':type', ':param', ':raises', ':raises:']:
                 index += 1
-                arg_name = parsed_text[index][:-1]
-                # Initialize empty dictionary if it doesn't exist already
-                if arg_name not in summary_info[var_types[cur_type]] and ':raises' not in cur_type:
-                    summary_info[var_types[cur_type]][arg_name] = {}
+                # Exception that's not xref should be treated same as other names
+                if ':raises' not in cur_type or 'xref' not in parsed_text[index]:
+                    arg_name = parsed_text[index][:-1]
+                # xrefs are treated by taking its second half and combining the two
+                elif ':raises' in cur_type and 'xref' in parsed_text[index]:
+                    arg_name = f'{parsed_text[index]} {parsed_text[index+1][:-1]}'
+                    index += 1
+
+                try:
+                    # Initialize empty dictionary if it doesn't exist already
+                    if arg_name not in summary_info[var_types[cur_type]] and ':raises' not in cur_type:
+                        summary_info[var_types[cur_type]][arg_name] = {}
+                except KeyError:
+                    raise KeyError(f"Encountered wrong formatting, please check docstring for {name}")
 
             # Empty target string
             words = []
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 1353f56f..7d11bdde 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -335,13 +335,22 @@ def test_extract_docstring_info_check_error(self):
         with self.assertRaises(ValueError):
             _extract_docstring_info({}, summary4, "error string")
 
+        summary5 = """
+Description of malformed docstring.
+
+Raises:
+    Error that should fail: if condition `x`.
+"""
+        with self.assertRaises(KeyError):
+            _extract_docstring_info({}, summary5, "malformed docstring")
+
 
     def test_extract_docstring_info_with_xref(self):
         ## Test with xref included in the summary, ensure they're processed as-is
         summary_info_want = {
             'variables': {
                 'arg1': {
-                    'var_type': '<xref:google.spanner_v1.type.Type>',
+                    'var_type': '<xref uid="google.spanner_v1.type.Type">Type</xref>',
                     'description': 'simple description.'
                 },
                 'arg2': {
@@ -351,13 +360,13 @@ def test_extract_docstring_info_with_xref(self):
             },
             'returns': [
                 {
-                    'var_type': '<xref:Pair>', 
+                    'var_type': '<xref uid="Pair">Pair</xref>', 
                     'description': 'simple description for return value.'
                 }
             ],
             'exceptions': [
                 {
-                    'var_type': '<xref:SpannerException>', 
+                    'var_type': '<xref uid="SpannerException">SpannerException</xref>', 
                     'description': 'if `condition x`.'
                 }
             ]
@@ -366,15 +375,15 @@ def test_extract_docstring_info_with_xref(self):
         summary = """
 Simple test for docstring.
 
-:type arg1: <xref:google.spanner_v1.type.Type>
+:type arg1: <xref uid="google.spanner_v1.type.Type">Type</xref>
 :param arg1: simple description.
 :param arg2: simple description for `arg2`.
 :type arg2: ~google.spanner_v1.type.dict
 
-:rtype: <xref:Pair>
+:rtype: <xref uid="Pair">Pair</xref>
 :returns: simple description for return value.
 
-:raises <xref:SpannerException>: if `condition x`. 
+:raises <xref uid="SpannerException">SpannerException</xref>: if `condition x`. 
 """
 
         summary_info_got = {
@@ -384,10 +393,10 @@ def test_extract_docstring_info_with_xref(self):
         }
 
         top_summary_got = _extract_docstring_info(summary_info_got, summary, "")
-
+        self.maxDiff = None
         # Same as the top summary from previous example, compare with that
         self.assertEqual(top_summary_got, self.top_summary1_want)
-        self.assertEqual(summary_info_got, summary_info_want)
+        self.assertDictEqual(summary_info_got, summary_info_want)
 
 if __name__ == '__main__':
     unittest.main()