Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: parse xrefs differently with new xref format #90

Merged
merged 6 commits into from
Jul 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 36 additions & 13 deletions docfx_yaml/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,24 +327,37 @@ def _extract_docstring_info(summary_info, summary, name):
}

initial_index = -1
front_tag = '<xref'
end_tag = '/xref>'
end_len = len(end_tag)

# Prevent GoogleDocstring crashing on custom types and parse all xrefs to normal
if '<xref:' in parsed_text:
if front_tag in parsed_text:
type_pairs = []
initial_index = max(0, parsed_text.find('<xref'))
# Constant length for end of xref tag
initial_index = max(0, parsed_text.find(front_tag))

summary_part = parsed_text[initial_index:]

# Remove all occurrences of "<xref:type>"
while "<xref:" in summary_part:
# Remove all occurrences of "<xref uid="uid">text</xref>"
while front_tag in summary_part:

# Expecting format of "<xref:type>:"
if "<xref:" in summary_part:
initial_index += summary_part.find("<xref")
original_type = parsed_text[initial_index:initial_index+(parsed_text[initial_index:].find('>'))+1]
# Expecting format of "<xref uid="uid">text</xref>"
if front_tag in summary_part:
# Retrieve the index for starting position of xref tag
initial_index += summary_part.find(front_tag)

# Find the index of the end of xref tag, relative to the start of xref tag
end_tag_index = initial_index + parsed_text[initial_index:].find(end_tag) + end_len

# Retrieve the entire xref tag
original_type = parsed_text[initial_index:end_tag_index]
initial_index += len(original_type)
original_type = " ".join(filter(None, re.split(r'\n| |\|\s|\t', original_type)))
safe_type = 'xref_' + original_type[6:-1]

# Extract text from "<xref uid="uid">text</xref>"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit late now, but we could also add this UID to the references section with however we want it to show up. Then, we wouldn't need to do any HTML parsing.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll keep that in mind perhaps for non-Cloud or third party xrefs.

index = original_type.find(">")
safe_type = 'xref_' + original_type[index+1:index+(original_type[index:].find("<"))]
else:
raise ValueError("Encountered unexpected type in Exception docstring.")

Expand Down Expand Up @@ -451,10 +464,20 @@ def _extract_docstring_info(summary_info, summary, name):
cur_type = word
if cur_type in [':type', ':param', ':raises', ':raises:']:
index += 1
arg_name = parsed_text[index][:-1]
# Initialize empty dictionary if it doesn't exist already
if arg_name not in summary_info[var_types[cur_type]] and ':raises' not in cur_type:
summary_info[var_types[cur_type]][arg_name] = {}
# Exception that's not xref should be treated same as other names
if ':raises' not in cur_type or 'xref' not in parsed_text[index]:
arg_name = parsed_text[index][:-1]
# xrefs are treated by taking its second half and combining the two
elif ':raises' in cur_type and 'xref' in parsed_text[index]:
arg_name = f'{parsed_text[index]} {parsed_text[index+1][:-1]}'
index += 1

try:
# Initialize empty dictionary if it doesn't exist already
if arg_name not in summary_info[var_types[cur_type]] and ':raises' not in cur_type:
summary_info[var_types[cur_type]][arg_name] = {}
except KeyError:
raise KeyError(f"Encountered wrong formatting, please check docstring for {name}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure this will work. What if there is a release with bad docs? We still need to generate docs for it. So, we should make our best guess and move on.

Copy link
Collaborator Author

@dandhlee dandhlee Jul 30, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Repos should have a docs-presubmit check for PRs, which runs the docs job to see whether it successfully runs this plugin on the repo for sphinx-build or not. For example see googleapis/python-firestore#407

While it's usually not set to be a required check, this lets contributors know that docs job would fail or pass before triggering a release.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. Might need to update this depending on how back filling old libraries goes.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do have the Kokoro job set up that could try and run it for all versions, I'll try testing on that for older versions.


# Empty target string
words = []
Expand Down
25 changes: 17 additions & 8 deletions tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,13 +335,22 @@ def test_extract_docstring_info_check_error(self):
with self.assertRaises(ValueError):
_extract_docstring_info({}, summary4, "error string")

summary5 = """
Description of malformed docstring.

Raises:
Error that should fail: if condition `x`.
"""
with self.assertRaises(KeyError):
_extract_docstring_info({}, summary5, "malformed docstring")


def test_extract_docstring_info_with_xref(self):
## Test with xref included in the summary, ensure they're processed as-is
summary_info_want = {
'variables': {
'arg1': {
'var_type': '<xref:google.spanner_v1.type.Type>',
'var_type': '<xref uid="google.spanner_v1.type.Type">Type</xref>',
'description': 'simple description.'
},
'arg2': {
Expand All @@ -351,13 +360,13 @@ def test_extract_docstring_info_with_xref(self):
},
'returns': [
{
'var_type': '<xref:Pair>',
'var_type': '<xref uid="Pair">Pair</xref>',
'description': 'simple description for return value.'
}
],
'exceptions': [
{
'var_type': '<xref:SpannerException>',
'var_type': '<xref uid="SpannerException">SpannerException</xref>',
'description': 'if `condition x`.'
}
]
Expand All @@ -366,15 +375,15 @@ def test_extract_docstring_info_with_xref(self):
summary = """
Simple test for docstring.

:type arg1: <xref:google.spanner_v1.type.Type>
:type arg1: <xref uid="google.spanner_v1.type.Type">Type</xref>
:param arg1: simple description.
:param arg2: simple description for `arg2`.
:type arg2: ~google.spanner_v1.type.dict

:rtype: <xref:Pair>
:rtype: <xref uid="Pair">Pair</xref>
:returns: simple description for return value.

:raises <xref:SpannerException>: if `condition x`.
:raises <xref uid="SpannerException">SpannerException</xref>: if `condition x`.
"""

summary_info_got = {
Expand All @@ -384,10 +393,10 @@ def test_extract_docstring_info_with_xref(self):
}

top_summary_got = _extract_docstring_info(summary_info_got, summary, "")

self.maxDiff = None
# Same as the top summary from previous example, compare with that
self.assertEqual(top_summary_got, self.top_summary1_want)
self.assertEqual(summary_info_got, summary_info_want)
self.assertDictEqual(summary_info_got, summary_info_want)

if __name__ == '__main__':
unittest.main()