Skip to content

Commit

Permalink
Extend custom post-lexer to yield newlines after dedents
Browse files Browse the repository at this point in the history
  • Loading branch information
Scony committed Apr 30, 2024
1 parent 48dd006 commit 594405d
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 11 deletions.
2 changes: 1 addition & 1 deletion gdtoolkit/parser/gdscript.lark
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func_args: "(" [func_arg ("," func_arg)* [trailing_comma]] ")"
func_arg_regular: NAME ["=" expr]
func_arg_inf: NAME ":" "=" expr
func_arg_typed: NAME ":" TYPE_HINT ["=" expr]
_func_suite: _func_body
_func_suite: _func_body _NL
| _func_stmt
_func_body: _NL _INDENT (_func_stmt+ | _func_stmt* (_simple_func_stmt | annotation+)) _DEDENT
_func_stmt: _simple_func_stmt _NL
Expand Down
63 changes: 63 additions & 0 deletions gdtoolkit/parser/gdscript_indenter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from typing import Iterator

from lark import Token, indenter


class GDScriptIndenter(indenter.Indenter):
NL_type = "_NL"
OPEN_PAREN_types = ["LPAR", "LSQB", "LBRACE"]
CLOSE_PAREN_types = ["RPAR", "RSQB", "RBRACE"]
INDENT_type = "_INDENT"
DEDENT_type = "_DEDENT"
# TODO: guess tab length
tab_len = 4

def handle_NL(self, token: Token) -> Iterator[Token]:
if self.paren_level > 0:
return # TODO: special handling for lambdas

yield token

indent_str = token.rsplit("\n", 1)[1] # Tabs and spaces
indent = indent_str.count(" ") + indent_str.count("\t") * self.tab_len

if indent > self.indent_level[-1]:
self.indent_level.append(indent)
yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
else:
while indent < self.indent_level[-1]:
self.indent_level.pop()
yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
# produce extra newline after dedent to simplify grammar:
yield token

if indent != self.indent_level[-1]:
raise DedentError(
"Unexpected dedent to column %s. Expected dedent to %s"
% (indent, self.indent_level[-1])
)

def _process(self, stream):
for token in stream:
if token.type == self.NL_type:
yield from self.handle_NL(token)
else:
yield token

if token.type in self.OPEN_PAREN_types:
self.paren_level += 1
elif token.type in self.CLOSE_PAREN_types:
self.paren_level -= 1
assert self.paren_level >= 0

while len(self.indent_level) > 1:
self.indent_level.pop()
yield Token(self.DEDENT_type, "")

assert self.indent_level == [0], self.indent_level

# def process(self, stream):
# import pdb;pdb.set_trace()
# self.paren_level = 0
# self.indent_level = [0]
# return self._process(stream)
12 changes: 2 additions & 10 deletions gdtoolkit/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,7 @@

from lark import Lark, Tree, indenter


class Indenter(indenter.Indenter):
NL_type = "_NL"
OPEN_PAREN_types = ["LPAR", "LSQB", "LBRACE"]
CLOSE_PAREN_types = ["RPAR", "RSQB", "RBRACE"]
INDENT_type = "_INDENT"
DEDENT_type = "_DEDENT"
# TODO: guess tab length
tab_len = 4
from .gdscript_indenter import GDScriptIndenter


# TODO: when upgrading to Python 3.8, replace with functools.cached_property
Expand Down Expand Up @@ -92,7 +84,7 @@ def _get_parser(
grammar_filepath,
parser="lalr",
start="start",
postlex=Indenter(), # type: ignore
postlex=GDScriptIndenter(), # type: ignore
propagate_positions=add_metadata,
maybe_placeholders=False,
cache=cache_filepath,
Expand Down

0 comments on commit 594405d

Please sign in to comment.