Skip to content

Commit

Permalink
Rewrites the documentation parsing.
Browse files Browse the repository at this point in the history
This should make it easier to extend the parsing documentation
functionality of FORD and Doxygen in the future
  • Loading branch information
gnikit committed Mar 15, 2022
1 parent 6aac856 commit 6dbb434
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 51 deletions.
2 changes: 1 addition & 1 deletion fortls/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -1859,7 +1859,7 @@ def __init__(self, file_obj=None):
self.END_SCOPE_REGEX: Pattern = None
self.enc_scope_name: str = None
self.last_obj = None
self.pending_doc = None
self.pending_doc: str = None

def create_none_scope(self):
"""Create empty scope to hold non-module contained items"""
Expand Down
163 changes: 113 additions & 50 deletions fortls/parse_fortran.py
Original file line number Diff line number Diff line change
Expand Up @@ -1238,7 +1238,7 @@ def parse(

line_number = 0
block_id_stack = []
doc_string: str = None
docs: list[str] = [] # list used to temporarily store docstrings
counters = Counter(
do=0,
ifs=0,
Expand All @@ -1263,17 +1263,13 @@ def parse(

if line == "":
continue # Skip empty lines
# Parse Documentation comments and skip all other comments
# this function should also nullify doc_string
idx = self._parse_docs(line, line_number, file_ast, doc_string)

# Parse documentation strings to AST nodes, this implicitly operates
# on docs, i.e. appends or nullifies it
idx = self.parse_docs(line, line_number, file_ast, docs)
if idx:
line_number = idx[0]
doc_string = idx[1]
line_number = idx
continue
if doc_string:
file_ast.add_doc("!! " + doc_string)
self.parser_debug("Doc", doc_string, line_number)
doc_string = None
# Handle preprocessing regions
do_skip = False
for pp_reg in pp_skips:
Expand All @@ -1298,12 +1294,8 @@ def parse(
comm_ind = line_stripped.find("!")
if comm_ind >= 0:
line_no_comment = line[:comm_ind]
line_post_comment = line[comm_ind:]
line_stripped = line_stripped[:comm_ind]
# Look for trailing doc string
doc_match = FRegex.FREE_DOC.match(line_post_comment)
if doc_match:
doc_string = line_post_comment[doc_match.end(0) :].strip()
docs = self.get_single_line_docstring(line[comm_ind:])
else:
line_no_comment = line
# Split lines with semicolons, place the multiple lines into a stack
Expand Down Expand Up @@ -1729,48 +1721,119 @@ def _parse_contains(self, line: str, ln: int, file_ast: fortran_ast):
self.parser_debug("CONTAINS", self.line, ln)
return True

def _parse_docs(
self,
line: str,
ln: int,
file_ast: fortran_ast,
doc_string: str,
):
match = self.COMMENT_LINE_MATCH.match(line)
if not match:
def parse_docs(self, line: str, ln: int, file_ast: fortran_ast, docs: list[str]):
"""Parse documentation stings of style Doxygen or FORD.
Multiline docstrings are detected if the first comment starts with `!>`
docstring continuations are detected with either `!>`, `!<` or `!!`
Parameters
----------
line : str
Document line
ln : int
Line number
file_ast : fortran_ast
AST object
docs : list[str]
Docstrings that are pending processing e.g. single line docstrings
"""

def format(docs: list[str]) -> str:
if len(docs) == 1:
return f"!! {docs[0]}"
return "!! " + "\n!! ".join(docs)

def add_line_comment(file_ast: fortran_ast, docs: list[str]):
# Handle dangling comments from previous line
if docs:
file_ast.add_doc(format(docs))
log.debug(f"{format(docs)} !!! Doc string({ln})")
docs[:] = [] # empty the documentation stack

# Check for comments in line
if not self.COMMENT_LINE_MATCH.match(line):
add_line_comment(file_ast, docs)
return False
# Check for documentation
doc_match = self.DOC_COMMENT_MATCH.match(line)
if not doc_match:
return ln, doc_string
doc_lines = [line[doc_match.end(0) :].strip()]
if doc_match.group(1) == ">":
doc_forward = True
else:
if doc_string:
doc_lines = [doc_string] + doc_lines
doc_string = None
doc_forward = False
add_line_comment(file_ast, docs)
return False

_ln = ln
if ln < self.nLines:
for i in range(ln, self.nLines):
# @note this gets the next line, index is 0-based
next_line = self.get_line(i, pp_content=True)
match = self.DOC_COMMENT_MATCH.match(next_line)
if not match:
ln = i # move the line number at the end of the docstring
break
doc_lines.append(next_line[match.end(0) :].strip())
ln, docs[:], predocmark = self.get_docstring(ln, line, doc_match, docs)

# Count the total length of all the stings in doc_lines
# Count the total length of all the stings in docs
# most efficient implementation, see: shorturl.at/dfmyV
if len("".join(doc_lines)) > 0:
file_ast.add_doc("!! " + "\n!! ".join(doc_lines), forward=doc_forward)
# if debug:
for (i, doc_line) in enumerate(doc_lines):
if len("".join(docs)) > 0:
file_ast.add_doc(format(docs), forward=predocmark)
for (i, doc_line) in enumerate(docs):
log.debug(f"{doc_line} !!! Doc string({_ln + i})")
# self.parser_debug("Doc", doc_line, line_number + i)
return ln, doc_string
docs[:] = []
return ln

def get_docstring(
self, ln: int, line: str, match: Pattern, docs: list[str]
) -> tuple[int, list[str], bool]:
"""Extract entire documentation strings from the current file position
Parameters
----------
ln : int
Line number
line : str
Document line, not necessarily produced by `get_line()`
match : Pattern
Regular expression DOC match
docs : list[str]
Docstrings that are pending processing e.g. single line docstrings
Returns
-------
tuple[int, list[str], bool]
The new line number at the end of the docstring, the docstring and
a boolean flag indicating whether the docstring precedes the AST node
(Doxygen style) or succeeds it (traditional FORD style)
"""
docstring: list[str] = docs
docstring.append(line[match.end(0) :].strip())
predocmark = True if match.group(1) == ">" else False

if ln >= self.nLines:
return ln, docstring, predocmark

# @note line index is 0-based
# Start from the current line until EOF and check for docs
for i in range(ln, self.nLines):
next_line = self.get_line(i, pp_content=True)
match = self.DOC_COMMENT_MATCH.match(next_line)
if not match:
ln = i
break
docstring.append(next_line[match.end(0) :].strip())
return ln, docstring, predocmark

def get_single_line_docstring(self, line: str) -> list[str]:
"""Get a docstring of a single line. This is the same for both Legacy
and Modern Fortran
Parameters
----------
line : str
Line of code
Returns
-------
list[str]
A list containing the docstring. List will be empty if there is no
match or the match is an empty string itself
"""
match = FRegex.FREE_DOC.match(line)
if not match:
return []
# if the string is empty return an empty list instead
doc = line[match.end(0) :].strip()
return [doc] if doc else []

@staticmethod
def parser_debug(msg: str, line: str, ln: int, scope: bool = False):
Expand All @@ -1779,7 +1842,7 @@ def parser_debug(msg: str, line: str, ln: int, scope: bool = False):
else:
log.debug(f"{line.strip()} !!! {msg} statement({ln})")

def get_comment_regexs(self):
def get_comment_regexs(self) -> tuple[Pattern, Pattern]:
if self.fixed:
return FRegex.FIXED_COMMENT, FRegex.FIXED_DOC
return FRegex.FREE_COMMENT, FRegex.FREE_DOC
Expand Down

0 comments on commit 6dbb434

Please sign in to comment.