diff --git a/docfx_yaml/extension.py b/docfx_yaml/extension.py index 9e373432..b7ad6743 100644 --- a/docfx_yaml/extension.py +++ b/docfx_yaml/extension.py @@ -270,6 +270,124 @@ def _extract_signature(obj_sig): return signature, parameters +# Given documentation docstring, parse them into summary_info. +def _extract_docstring_info(summary_info, summary, name): + top_summary = "" + + # Initialize known types needing further processing. + var_types = { + ':rtype:': 'returns', + ':returns:': 'returns', + ':type': 'variables', + ':param': 'variables', + ':raises': 'exceptions', + ':raises:': 'exceptions' + } + + # Clean the string by cleaning newlines and backlashes, then split by white space. + config = Config(napoleon_use_param=True, napoleon_use_rtype=True) + # Convert Google style to reStructuredText + parsed_text = str(GoogleDocstring(summary, config)) + + # Trim the top summary but maintain its formatting. + indexes = [] + for types in var_types: + # Ensure that we look for exactly the string we want. + # Adding the extra space for non-colon ending types + # helps determine if we simply ran into desired occurrence + # or if we ran into a similar looking syntax but shouldn't + # parse upon it. + types += ' ' if types[-1] != ':' else '' + if types in parsed_text: + index = parsed_text.find(types) + if index > -1: + # For now, skip on parsing custom fields like attribute + if types == ':type ' and 'attribute::' in parsed_text: + continue + indexes.append(index) + + # If we found types needing further processing, locate its index, + # if we found empty array for indexes, stop processing further. + index = min(indexes) if indexes else 0 + + # Store the top summary separately. + if index == 0: + return summary + + top_summary = parsed_text[:index] + parsed_text = parsed_text[index:] + + # Clean up whitespace and other characters + parsed_text = " ".join(filter(None, re.split(r'\|\s', parsed_text))).split() + + cur_type = '' + words = [] + arg_name = '' + index = 0 + # Used to track return type and description + r_type, r_descr = '', '' + + # Using counter iteration to easily extract names rather than + # coming up with more complicated stopping logic for each tags. + while index <= len(parsed_text): + word = parsed_text[index] if index < len(parsed_text) else "" + # Check if we encountered specific words. + if word in var_types or index == len(parsed_text): + # Finish processing previous section. + if cur_type: + if cur_type == ':type': + summary_info[var_types[cur_type]][arg_name]['var_type'] = " ".join(words) + elif cur_type == ':param': + summary_info[var_types[cur_type]][arg_name]['description'] = " ".join(words) + elif ":raises" in cur_type: + summary_info[var_types[cur_type]].append({ + 'var_type': arg_name, + 'description': " ".join(words) + }) + else: + if cur_type == ':rtype:': + r_type = " ".join(words) + else: + r_descr = " ".join(words) + if r_type and r_descr: + summary_info[var_types[cur_type]].append({ + 'var_type': r_type, + 'description': r_descr + }) + r_type, r_descr = '', '' + + else: + + # If after we processed the top summary and get in this state, + # likely we encountered a type that's not covered above or the docstring + # was formatted badly. This will likely break docfx job later on, should not + # process further. + if word not in var_types: + raise ValueError(f"Encountered wrong formatting, please check docstring for {name}") + + # Reached end of string, break after finishing processing + if index == len(parsed_text): + break + + # Start processing for new section + cur_type = word + if cur_type in [':type', ':param', ':raises', ':raises:']: + index += 1 + arg_name = parsed_text[index][:-1] + # Initialize empty dictionary if it doesn't exist already + if arg_name not in summary_info[var_types[cur_type]] and ':raises' not in cur_type: + summary_info[var_types[cur_type]][arg_name] = {} + + # Empty target string + words = [] + else: + words.append(word) + + index += 1 + + return top_summary + + def _create_datam(app, cls, module, name, _type, obj, lines=None): """ Build the data structure for an autodoc class @@ -291,108 +409,6 @@ def _update_friendly_package_name(path): return path - def _extract_docstring_info(summary_info, summary): - top_summary = "" - - # Initialize known types needing further processing. - var_types = { - ':rtype:': 'returns', - ':returns:': 'returns', - ':type': 'variables', - ':param': 'variables', - ':raises': 'exceptions', - ':raises:': 'exceptions' - } - - # Clean the string by cleaning newlines and backlashes, then split by white space. - config = Config(napoleon_use_param=True, napoleon_use_rtype=True) - # Convert Google style to reStructuredText - parsed_text = str(GoogleDocstring(summary, config)) - - # Trim the top summary but maintain its formatting. - indexes = [] - for types in var_types: - index = parsed_text.find(types) - if index > -1: - # For now, skip on parsing custom fields like attribute - if types == ':type' and 'attribute::' in parsed_text: - continue - indexes.append(index) - - # If we found types needing further processing, locate its index, - # if we found empty array for indexes, stop processing further. - index = min(indexes) if indexes else 0 - - # Store the top summary separately. - if index == 0: - top_summary = summary - else: - top_summary = parsed_text[:index] - parsed_text = parsed_text[index:] - - # Clean up whitespace and other characters - parsed_text = " ".join(filter(None, re.split(r'\n| |\|\s', parsed_text))).split(" ") - - cur_type = '' - words = [] - arg_name = '' - index = 0 - - # Using counter iteration to easily extract names rather than - # coming up with more complicated stopping logic for each tags. - while index <= len(parsed_text): - word = parsed_text[index] if index < len(parsed_text) else "" - # Check if we encountered specific words. - if word in var_types or index == len(parsed_text): - # Finish processing previous section. - if cur_type: - if cur_type == ':type': - summary_info[var_types[cur_type]][arg_name]['var_type'] = " ".join(words) - elif cur_type == ':param': - summary_info[var_types[cur_type]][arg_name]['description'] = " ".join(words) - elif ":raises" in cur_type: - summary_info[var_types[cur_type]].append({ - 'var_type': arg_name, - 'description': " ".join(words) - }) - elif cur_type == ':rtype:': - arg_name = " ".join(words) - else: - summary_info[var_types[cur_type]].append({ - 'var_type': arg_name, - 'description': " ".join(words) - }) - else: - # If after we processed the top summary and get in this state, - # likely we encountered a type that's not covered above or the docstring - # was formatted badly. This will likely break docfx job later on, should not - # process further. - if word not in var_types: - raise ValueError("Encountered wrong formatting, please check docstrings") - - # Reached end of string, break after finishing processing - if index == len(parsed_text): - break - - # Start processing for new section - cur_type = word - if cur_type in [':type', ':param', ':raises', ':raises:']: - index += 1 - arg_name = parsed_text[index][:-1] - # Initialize empty dictionary if it doesn't exist already - if arg_name not in summary_info[var_types[cur_type]] and ':raises' not in cur_type: - summary_info[var_types[cur_type]][arg_name] = {} - - # Empty target string - words = [] - else: - words.append(word) - - index += 1 - - return top_summary - - if lines is None: lines = [] short_name = name.split('.')[-1] @@ -421,7 +437,6 @@ def _extract_docstring_info(summary_info, summary): lines = lines.split("\n") if lines else [] except TypeError as e: print("couldn't getdoc from method, function: {}".format(e)) - elif _type in [PROPERTY]: lines = inspect.getdoc(obj) @@ -503,7 +518,7 @@ def _extract_docstring_info(summary_info, summary): # Extract summary info into respective sections. if summary: - top_summary = _extract_docstring_info(summary_info, summary) + top_summary = _extract_docstring_info(summary_info, summary, name) datam['summary'] = top_summary if args or sig or summary_info: diff --git a/tests/test_unit.py b/tests/test_unit.py index 5a605607..984257a2 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -1,5 +1,6 @@ from docfx_yaml.extension import find_unique_name from docfx_yaml.extension import disambiguate_toc_name +from docfx_yaml.extension import _extract_docstring_info import unittest @@ -44,5 +45,132 @@ def test_disambiguate_toc_name(self): self.assertEqual(yaml_want, yaml_got) + # Variables used for testing _extract_docstring_info + top_summary1_want = "\nSimple test for docstring.\n\n" + summary_info1_want = { + 'variables': { + 'arg1': { + 'var_type': 'int', + 'description': 'simple description.' + }, + 'arg2': { + 'var_type': 'str', + 'description': 'simple description for `arg2`.' + } + }, + 'returns': [ + { + 'var_type': 'str', + 'description': 'simple description for return value.' + } + ], + 'exceptions': [ + { + 'var_type': 'AttributeError', + 'description': 'if `condition x`.' + } + ] + } + + + def test_extract_docstring_info_normal_input(self): + + ## Test for normal input + summary_info1_got = { + 'variables': {}, + 'returns': [], + 'exceptions': [] + } + + summary1 = """ +Simple test for docstring. + +Args: + arg1(int): simple description. + arg2(str): simple description for `arg2`. + +Returns: + str: simple description for return value. + +Raises: + AttributeError: if `condition x`. +""" + + top_summary1_got = _extract_docstring_info(summary_info1_got, summary1, "") + + self.assertEqual(top_summary1_got, self.top_summary1_want) + self.assertEqual(summary_info1_got, self.summary_info1_want) + + + def test_extract_docstring_info_mixed_format(self): + ## Test for input coming in mixed format. + summary2 = """ +Simple test for docstring. + +:type arg1: int +:param arg1: simple description. +:param arg2: simple description for `arg2`. +:type arg2: str + +:rtype: str +:returns: simple description for return value. + +:raises AttributeError: if `condition x`. +""" + + summary_info2_got = { + 'variables': {}, + 'returns': [], + 'exceptions': [] + } + + top_summary2_got = _extract_docstring_info(summary_info2_got, summary2, "") + + # Output should be same as test 1 with normal input. + self.assertEqual(top_summary2_got, self.top_summary1_want) + self.assertEqual(summary_info2_got, self.summary_info1_want) + + + def test_extract_docstring_info_check_parser(self): + ## Test for parser to correctly scan docstring tokens and not custom fields + summary_info3_want = { + 'variables': {}, + 'returns': [], + 'exceptions': [] + } + + summary3 = """ +Union[int, None]: Expiration time in milliseconds for a partition. + +If :attr:`partition_expiration` is set and is +not set, :attr:`type_` will default to +:attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. +It could return :param: with :returns as well. +""" + + summary_info3_got = { + 'variables': {}, + 'returns': [], + 'exceptions': [] + } + + # Nothing should change + top_summary3_want = summary3 + + top_summary3_got = _extract_docstring_info(summary_info3_got, summary3, "") + + self.assertEqual(top_summary3_got, top_summary3_want) + self.assertEqual(summary_info3_got, summary_info3_want) + + def test_extract_docstring_info_check_error(self): + ## Test for incorrectly formmatted docstring raising error + summary4 = """ +Description of docstring which should fail. + +:returns:param: +""" + with self.assertRaises(ValueError): + _extract_docstring_info({}, summary4, "error string") + if __name__ == '__main__': unittest.main()