Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename lexer_state->lexer_thread, and make a few adjustments for the benefit of Lark-Cython #1118

Merged
merged 1 commit into from
Feb 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lark/lark.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
lexer = self._build_lexer(dont_ignore)
else:
lexer = self.lexer
lexer_thread = LexerThread(lexer, text)
lexer_thread = LexerThread.from_text(lexer, text)
stream = lexer_thread.lex(None)
if self.options.postlex:
return self.options.postlex.process(stream)
Expand Down
15 changes: 9 additions & 6 deletions lark/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,18 +352,21 @@ class LexerThread:
"""A thread that ties a lexer instance and a lexer state, to be used by the parser
"""

def __init__(self, lexer, text):
def __init__(self, lexer: 'Lexer', lexer_state: LexerState):
self.lexer = lexer
self.state = LexerState(text)
self.state = lexer_state

@classmethod
def from_text(cls, lexer: 'Lexer', text: str):
return cls(lexer, LexerState(text))

def lex(self, parser_state):
return self.lexer.lex(self.state, parser_state)

def __copy__(self):
copied = object.__new__(LexerThread)
copied.lexer = self.lexer
copied.state = copy(self.state)
return copied
return type(self)(self.lexer, copy(self.state))

_Token = Token


_Callback = Callable[[Token], Token]
Expand Down
2 changes: 1 addition & 1 deletion lark/parser_frontends.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def _verify_start(self, start=None):

def _make_lexer_thread(self, text):
cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
return text if self.skip_lexer else cls(self.lexer, text)
return text if self.skip_lexer else cls.from_text(self.lexer, text)

def parse(self, text, start=None, on_error=None):
chosen_start = self._verify_start(start)
Expand Down
28 changes: 17 additions & 11 deletions lark/parsers/lalr_interactive_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,28 @@

from typing import Iterator, List
from copy import copy
import warnings

from lark.exceptions import UnexpectedToken
from lark.lexer import Token
from lark.lexer import Token, LexerThread


class InteractiveParser:
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.

For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
"""
def __init__(self, parser, parser_state, lexer_state):
def __init__(self, parser, parser_state, lexer_thread: LexerThread):
self.parser = parser
self.parser_state = parser_state
self.lexer_state = lexer_state
self.lexer_thread = lexer_thread
self.result = None

@property
def lexer_state(self) -> LexerThread:
warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning)
return self.lexer_thread

def feed_token(self, token: Token):
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.

Expand All @@ -33,7 +39,7 @@ def iter_parse(self) -> Iterator[Token]:

When the parse is over, the resulting tree can be found in ``InteractiveParser.result``.
"""
for token in self.lexer_state.lex(self.parser_state):
for token in self.lexer_thread.lex(self.parser_state):
yield token
self.result = self.feed_token(token)

Expand All @@ -47,7 +53,7 @@ def exhaust_lexer(self) -> List[Token]:

def feed_eof(self, last_token=None):
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1)
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1)
return self.feed_token(eof)


Expand All @@ -59,7 +65,7 @@ def __copy__(self):
return type(self)(
self.parser,
copy(self.parser_state),
copy(self.lexer_state),
copy(self.lexer_thread),
)

def copy(self):
Expand All @@ -69,12 +75,12 @@ def __eq__(self, other):
if not isinstance(other, InteractiveParser):
return False

return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state
return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread

def as_immutable(self):
"""Convert to an ``ImmutableInteractiveParser``."""
p = copy(self)
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state)
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)

def pretty(self):
"""Print the output of ``choices()`` in a way that's easier to read."""
Expand All @@ -100,7 +106,7 @@ def accepts(self):
if t.isupper(): # is terminal?
new_cursor = copy(self)
try:
new_cursor.feed_token(Token(t, ''))
new_cursor.feed_token(self.lexer_thread._Token(t, ''))
except UnexpectedToken:
pass
else:
Expand All @@ -121,7 +127,7 @@ class ImmutableInteractiveParser(InteractiveParser):
result = None

def __hash__(self):
return hash((self.parser_state, self.lexer_state))
return hash((self.parser_state, self.lexer_thread))

def feed_token(self, token):
c = copy(self)
Expand All @@ -139,5 +145,5 @@ def exhaust_lexer(self):
def as_mutable(self):
"""Convert to an ``InteractiveParser``."""
p = copy(self)
return InteractiveParser(p.parser, p.parser_state, p.lexer_state)
return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)

2 changes: 1 addition & 1 deletion lark/parsers/lalr_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def parse(self, lexer, start, on_error=None):

while True:
if isinstance(e, UnexpectedCharacters):
s = e.interactive_parser.lexer_state.state
s = e.interactive_parser.lexer_thread.state
p = s.line_ctr.char_pos

if not on_error(e):
Expand Down
8 changes: 4 additions & 4 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2518,12 +2518,12 @@ def test_parser_interactive_parser(self):

ip_copy = ip.copy()
self.assertEqual(ip_copy.parser_state, ip.parser_state)
self.assertEqual(ip_copy.lexer_state.state, ip.lexer_state.state)
self.assertEqual(ip_copy.lexer_thread.state, ip.lexer_thread.state)
self.assertIsNot(ip_copy.parser_state, ip.parser_state)
self.assertIsNot(ip_copy.lexer_state.state, ip.lexer_state.state)
self.assertIsNot(ip_copy.lexer_state.state.line_ctr, ip.lexer_state.state.line_ctr)
self.assertIsNot(ip_copy.lexer_thread.state, ip.lexer_thread.state)
self.assertIsNot(ip_copy.lexer_thread.state.line_ctr, ip.lexer_thread.state.line_ctr)

res = ip.feed_eof(ip.lexer_state.state.last_token)
res = ip.feed_eof(ip.lexer_thread.state.last_token)
self.assertEqual(res, Tree('start', ['a', 'b']))
self.assertRaises(UnexpectedToken ,ip.feed_eof)

Expand Down