From 6dbb43462336f341f74be3a6ec0fdec7cd40530b Mon Sep 17 00:00:00 2001 From: gnikit Date: Tue, 15 Mar 2022 18:17:08 +0000 Subject: [PATCH] Rewrites the documentation parsing. This should make it easier to extend the parsing documentation functionality of FORD and Doxygen in the future --- fortls/objects.py | 2 +- fortls/parse_fortran.py | 163 ++++++++++++++++++++++++++++------------ 2 files changed, 114 insertions(+), 51 deletions(-) diff --git a/fortls/objects.py b/fortls/objects.py index e90b15bc..7ace630b 100644 --- a/fortls/objects.py +++ b/fortls/objects.py @@ -1859,7 +1859,7 @@ def __init__(self, file_obj=None): self.END_SCOPE_REGEX: Pattern = None self.enc_scope_name: str = None self.last_obj = None - self.pending_doc = None + self.pending_doc: str = None def create_none_scope(self): """Create empty scope to hold non-module contained items""" diff --git a/fortls/parse_fortran.py b/fortls/parse_fortran.py index 2c9665da..950d3cdb 100644 --- a/fortls/parse_fortran.py +++ b/fortls/parse_fortran.py @@ -1238,7 +1238,7 @@ def parse( line_number = 0 block_id_stack = [] - doc_string: str = None + docs: list[str] = [] # list used to temporarily store docstrings counters = Counter( do=0, ifs=0, @@ -1263,17 +1263,13 @@ def parse( if line == "": continue # Skip empty lines - # Parse Documentation comments and skip all other comments - # this function should also nullify doc_string - idx = self._parse_docs(line, line_number, file_ast, doc_string) + + # Parse documentation strings to AST nodes, this implicitly operates + # on docs, i.e. appends or nullifies it + idx = self.parse_docs(line, line_number, file_ast, docs) if idx: - line_number = idx[0] - doc_string = idx[1] + line_number = idx continue - if doc_string: - file_ast.add_doc("!! " + doc_string) - self.parser_debug("Doc", doc_string, line_number) - doc_string = None # Handle preprocessing regions do_skip = False for pp_reg in pp_skips: @@ -1298,12 +1294,8 @@ def parse( comm_ind = line_stripped.find("!") if comm_ind >= 0: line_no_comment = line[:comm_ind] - line_post_comment = line[comm_ind:] line_stripped = line_stripped[:comm_ind] - # Look for trailing doc string - doc_match = FRegex.FREE_DOC.match(line_post_comment) - if doc_match: - doc_string = line_post_comment[doc_match.end(0) :].strip() + docs = self.get_single_line_docstring(line[comm_ind:]) else: line_no_comment = line # Split lines with semicolons, place the multiple lines into a stack @@ -1729,48 +1721,119 @@ def _parse_contains(self, line: str, ln: int, file_ast: fortran_ast): self.parser_debug("CONTAINS", self.line, ln) return True - def _parse_docs( - self, - line: str, - ln: int, - file_ast: fortran_ast, - doc_string: str, - ): - match = self.COMMENT_LINE_MATCH.match(line) - if not match: + def parse_docs(self, line: str, ln: int, file_ast: fortran_ast, docs: list[str]): + """Parse documentation stings of style Doxygen or FORD. + Multiline docstrings are detected if the first comment starts with `!>` + docstring continuations are detected with either `!>`, `!<` or `!!` + + Parameters + ---------- + line : str + Document line + ln : int + Line number + file_ast : fortran_ast + AST object + docs : list[str] + Docstrings that are pending processing e.g. single line docstrings + """ + + def format(docs: list[str]) -> str: + if len(docs) == 1: + return f"!! {docs[0]}" + return "!! " + "\n!! ".join(docs) + + def add_line_comment(file_ast: fortran_ast, docs: list[str]): + # Handle dangling comments from previous line + if docs: + file_ast.add_doc(format(docs)) + log.debug(f"{format(docs)} !!! Doc string({ln})") + docs[:] = [] # empty the documentation stack + + # Check for comments in line + if not self.COMMENT_LINE_MATCH.match(line): + add_line_comment(file_ast, docs) return False # Check for documentation doc_match = self.DOC_COMMENT_MATCH.match(line) if not doc_match: - return ln, doc_string - doc_lines = [line[doc_match.end(0) :].strip()] - if doc_match.group(1) == ">": - doc_forward = True - else: - if doc_string: - doc_lines = [doc_string] + doc_lines - doc_string = None - doc_forward = False + add_line_comment(file_ast, docs) + return False + _ln = ln - if ln < self.nLines: - for i in range(ln, self.nLines): - # @note this gets the next line, index is 0-based - next_line = self.get_line(i, pp_content=True) - match = self.DOC_COMMENT_MATCH.match(next_line) - if not match: - ln = i # move the line number at the end of the docstring - break - doc_lines.append(next_line[match.end(0) :].strip()) + ln, docs[:], predocmark = self.get_docstring(ln, line, doc_match, docs) - # Count the total length of all the stings in doc_lines + # Count the total length of all the stings in docs # most efficient implementation, see: shorturl.at/dfmyV - if len("".join(doc_lines)) > 0: - file_ast.add_doc("!! " + "\n!! ".join(doc_lines), forward=doc_forward) - # if debug: - for (i, doc_line) in enumerate(doc_lines): + if len("".join(docs)) > 0: + file_ast.add_doc(format(docs), forward=predocmark) + for (i, doc_line) in enumerate(docs): log.debug(f"{doc_line} !!! Doc string({_ln + i})") - # self.parser_debug("Doc", doc_line, line_number + i) - return ln, doc_string + docs[:] = [] + return ln + + def get_docstring( + self, ln: int, line: str, match: Pattern, docs: list[str] + ) -> tuple[int, list[str], bool]: + """Extract entire documentation strings from the current file position + + Parameters + ---------- + ln : int + Line number + line : str + Document line, not necessarily produced by `get_line()` + match : Pattern + Regular expression DOC match + docs : list[str] + Docstrings that are pending processing e.g. single line docstrings + + Returns + ------- + tuple[int, list[str], bool] + The new line number at the end of the docstring, the docstring and + a boolean flag indicating whether the docstring precedes the AST node + (Doxygen style) or succeeds it (traditional FORD style) + """ + docstring: list[str] = docs + docstring.append(line[match.end(0) :].strip()) + predocmark = True if match.group(1) == ">" else False + + if ln >= self.nLines: + return ln, docstring, predocmark + + # @note line index is 0-based + # Start from the current line until EOF and check for docs + for i in range(ln, self.nLines): + next_line = self.get_line(i, pp_content=True) + match = self.DOC_COMMENT_MATCH.match(next_line) + if not match: + ln = i + break + docstring.append(next_line[match.end(0) :].strip()) + return ln, docstring, predocmark + + def get_single_line_docstring(self, line: str) -> list[str]: + """Get a docstring of a single line. This is the same for both Legacy + and Modern Fortran + + Parameters + ---------- + line : str + Line of code + + Returns + ------- + list[str] + A list containing the docstring. List will be empty if there is no + match or the match is an empty string itself + """ + match = FRegex.FREE_DOC.match(line) + if not match: + return [] + # if the string is empty return an empty list instead + doc = line[match.end(0) :].strip() + return [doc] if doc else [] @staticmethod def parser_debug(msg: str, line: str, ln: int, scope: bool = False): @@ -1779,7 +1842,7 @@ def parser_debug(msg: str, line: str, ln: int, scope: bool = False): else: log.debug(f"{line.strip()} !!! {msg} statement({ln})") - def get_comment_regexs(self): + def get_comment_regexs(self) -> tuple[Pattern, Pattern]: if self.fixed: return FRegex.FIXED_COMMENT, FRegex.FIXED_DOC return FRegex.FREE_COMMENT, FRegex.FREE_DOC