Skip to content

Commit

Permalink
WIP: Optional feature: More verbose failed expression reporting
Browse files Browse the repository at this point in the history
An option is added to the Parser class. This option is disabled by default,
and the existing behavior is fully preserved.

When the option is enabled, the final expected message is extended with
extra information about the previously "weakly failed" rules. This way,
not only the last failed NoMatch exception and its failing rules are displayed
but also all the rules that were not matched during the whole parsing process.
  • Loading branch information
stanislaw committed Apr 22, 2023
1 parent 0b47dc0 commit 66f4b3a
Show file tree
Hide file tree
Showing 5 changed files with 405 additions and 36 deletions.
195 changes: 182 additions & 13 deletions arpeggio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,16 @@
###############################################################################

from __future__ import print_function, unicode_literals

import collections
import sys
from collections import OrderedDict
import codecs
import re
import bisect
from enum import Enum
from typing import Tuple, List, Deque

from arpeggio.utils import isstr
import types

Expand Down Expand Up @@ -78,6 +83,15 @@ def eval_attrs(self):
"""
Call this to evaluate `message`, `context`, `line` and `col`. Called by __str__.
"""

# We reach this branch if a failed NoMatch exception is created from
# an unmatched Not rule.
if self.rules is None or len(self.rules) == 0:
self.context = self.parser.context(position=self.position)
self.line, self.col = self.parser.pos_to_linecol(self.position)
self.message = f"Not expected input at position ({self.line}, {self.col})"
return

def rule_to_exp_str(rule):
if hasattr(rule, '_exp_str'):
# Rule may override expected report string
Expand All @@ -90,24 +104,150 @@ def rule_to_exp_str(rule):
else:
return rule.name

if not self.rules:
self.message = "Not expected input"
flattened_pos_rules: List[Tuple] = list(
self.parser.weakly_failed_errors
)
rules_set = set(map(lambda pr: pr[1], flattened_pos_rules))

def enumerate_child_nodes(node):
# FIXME: How do we end up with repeating nodes in the tree?
visited = set()
queue = list(node.nodes)
while len(queue) > 0:
current = queue.pop(0)
if current in visited:
continue
visited.add(current)
yield current
queue.extend(current.nodes)

if not self.parser.verbose2:
# Mark all nodes as relevant or irrelevant for the printed error message.
for _, rule in flattened_pos_rules:
# "Not" nodes do not contribute to the reporting of weakly failed
# rules.
assert not isinstance(rule, Not)
if not isinstance(rule, Match):
rule.good_node = NodeMarker.BAD
# We find if all nodes have parents.
for node in enumerate_child_nodes(rule):
if not isinstance(node, Match):
node.good_node = NodeMarker.BAD
continue

# Node is part of the final failed expression.
if node in self.rules:
node.good_node = NodeMarker.GOOD
# Node has a failing parent. It is a good node.
elif node in rules_set:
node.good_node = NodeMarker.GOOD
# Node is orphan. **Nothing was unsuccessful** with this node.
else:
node.good_node = NodeMarker.BAD
else:
rule.good_node = (
NodeMarker.GOOD if rule in self.rules else NodeMarker.BAD
)
flattened_pos_rules = list(
filter(
lambda pr: pr[1].good_node == NodeMarker.GOOD, flattened_pos_rules
)
)
else:
what_is_expected = OrderedDict.fromkeys(
["{}".format(rule_to_exp_str(r)) for r in self.rules])
what_str = " or ".join(what_is_expected)
self.message = "Expected {}".format(what_str)
flattened_pos_rules = list(
filter(
lambda pos_and_rule: isinstance(pos_and_rule[1], Match), flattened_pos_rules
)
)

positions = {}
for position, rule in flattened_pos_rules:
if rule not in positions:
positions[rule] = position
else:
if positions[rule] < position:
positions[rule] = position

flattened_pos_rules = [(positions[k], k) for k in positions]


several_positions = False
current_failed_position = None
for pos_rule in flattened_pos_rules:
if current_failed_position is None:
current_failed_position = pos_rule[0]
continue
if current_failed_position != pos_rule[0]:
several_positions = True
if current_failed_position > pos_rule[0]:
current_failed_position = pos_rule[0]

if current_failed_position is None:
current_failed_position = self.position
if len(flattened_pos_rules) == 0:
flattened_pos_rules = [(self.position, rule) for rule in self.rules]
flattened_pos_rules.sort(key=lambda pos_rule_: pos_rule_[0])

self.context = self.parser.context(position=self.position)
self.line, self.col = self.parser.pos_to_linecol(self.position)

if not several_positions:
what_is_expected = OrderedDict.fromkeys(
["{}".format(rule_to_exp_str(r[1])) for r in flattened_pos_rules])
what_str = " or ".join(what_is_expected)
what_str += f" at position ({self.line}, {self.col})"
self.message = "Expected {}".format(what_str)
elif self.parser.verbose2:
descriptions = []
current_position = flattened_pos_rules[0][0]
current_rules = []
for pos, rule in flattened_pos_rules:
if current_position == pos:
current_rules.append(rule_to_exp_str(rule))
else:
joined_rules = " or ".join(current_rules)
line, col = self.parser.pos_to_linecol(current_position)
descriptions.append(
f"{line}:{col}: {joined_rules}"
)
current_position = pos
current_rules = [rule_to_exp_str(rule)]
joined_rules = " or ".join(current_rules)
line, col = self.parser.pos_to_linecol(current_position)
descriptions.append(
f"{line}:{col}: {joined_rules}"
)

what_str = "\n".join(descriptions)
self.message = "Expected:\n{}\n".format(what_str)
else:
descriptions = []
current_position = flattened_pos_rules[0][0]
current_rules = []
for pos, rule in flattened_pos_rules:
if current_position == pos:
current_rules.append(rule_to_exp_str(rule))
else:
joined_rules = " or ".join(current_rules)
descriptions.append(
f"{joined_rules} at position {self.parser.pos_to_linecol(current_position)}"
)
current_position = pos
current_rules = [rule_to_exp_str(rule)]
joined_rules = " or ".join(current_rules)
descriptions.append(
f"{joined_rules} at position {self.parser.pos_to_linecol(current_position)}"
)

what_str = " or ".join(descriptions)
self.message = "Expected {}".format(what_str)

def __str__(self):
self.eval_attrs()
return "{} at position {}{} => '{}'."\
.format(self.message,
"{}:".format(self.parser.file_name)
return "{}{} => '{}'."\
.format("{}: ".format(self.parser.file_name)
if self.parser.file_name else "",
(self.line, self.col),
self.message,
self.context)

def __unicode__(self):
Expand Down Expand Up @@ -161,6 +301,11 @@ def dprint(self, message, indent_change=0):
# ---------------------------------------------------------
# Parser Model (PEG Abstract Semantic Graph) elements

class NodeMarker(str, Enum):
UNKNOWN = "UNKNOWN"
GOOD = "GOOD"
BAD = "BAD"


class ParsingExpression(object):
"""
Expand Down Expand Up @@ -195,7 +340,7 @@ def __init__(self, *elements, **kwargs):
if not hasattr(nodes, '__iter__'):
nodes = [nodes]
self.nodes = nodes

self.good_node = NodeMarker.UNKNOWN
if 'suppress' in kwargs:
self.suppress = kwargs['suppress']

Expand Down Expand Up @@ -430,6 +575,11 @@ def _parse(self, parser):
if not match:
parser._nm_raise(self, c_pos, parser)

if parser.verbose2 and not parser.in_not:
for node in self.nodes:
if isinstance(node, Match):
parser.weakly_failed_errors.append((c_pos, node))

return result


Expand Down Expand Up @@ -1413,7 +1563,7 @@ class Parser(DebugPrinter):
FIRST_NOT = Not()

def __init__(self, skipws=True, ws=None, reduce_tree=False, autokwd=False,
ignore_case=False, memoization=False, **kwargs):
ignore_case=False, memoization=False, verbose=False, verbose2=False, **kwargs):
"""
Args:
skipws (bool): Should the whitespace skipping be done. Default is
Expand Down Expand Up @@ -1473,6 +1623,12 @@ def __init__(self, skipws=True, ws=None, reduce_tree=False, autokwd=False,
# Last parsing expression traversed
self.last_pexpression = None

self.verbose = verbose
self.verbose2 = verbose2
self.weakly_failed_errors: Deque = (
collections.deque() if verbose or verbose2 else collections.deque(maxlen=0)
)

@property
def ws(self):
return self._ws
Expand Down Expand Up @@ -1709,6 +1865,10 @@ def _nm_raise(self, *args):
"""

rule, position, parser = args

if not self.in_not:
self.weakly_failed_errors.append((position, rule))

if self.nm is None or not parser.in_parse_comments:
if self.nm is None or position > self.nm.position:
if self.in_not:
Expand All @@ -1718,7 +1878,16 @@ def _nm_raise(self, *args):
elif position == self.nm.position and isinstance(rule, Match) \
and not self.in_not:
self.nm.rules.append(rule)

else:
# We reach here if the _nm_raise is called on a failed parent
# expression which is not Match-based (e.g. OrderedChoice).
# Such parent expressions do not contribute to the final error
# reporting. Instead, the previously failed Match-based NoMatch
# exception is reported. Note that _nm_raise is always called
# first on the failed Match expressions and only then the
# failure is propagated to the parent _nm_raise invocation that
# reaches this branch.
pass
raise self.nm

def _clear_caches(self):
Expand Down
2 changes: 1 addition & 1 deletion arpeggio/tests/test_error_reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def grammar(): return Optional('a'), 'b', EOF
with pytest.raises(NoMatch) as e:
parser.parse("\n\n a c", file_name="test_file.peg")
assert (
"Expected 'b' at position test_file.peg:(3, 6) => ' a *c'."
"test_file.peg: Expected 'b' at position (3, 6) => ' a *c'."
) == str(e.value)
assert (e.value.line, e.value.col) == (3, 6)

Expand Down
39 changes: 39 additions & 0 deletions arpeggio/tests/test_error_reporting_verbose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_error_reporting_verbose
# Purpose: Test error reporting for various cases when verbose=True enabled.
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2015 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest

from arpeggio import Optional, Not, ParserPython, NoMatch, EOF, Sequence, RegExMatch, StrMatch, OrderedChoice
from arpeggio import RegExMatch as _


def test_optional_with_better_match():
"""
Test that optional match that has gone further in the input stream
has precedence over non-optional.
"""

def grammar(): return [first, (Optional(second), 'six')]
def first(): return 'one', 'two', 'three', '4'
def second(): return 'one', 'two', 'three', 'four', 'five'

parser = ParserPython(grammar, verbose=True)
assert parser.verbose

with pytest.raises(NoMatch) as e:
parser.parse('one two three four 5')

assert (
"Expected "
"'six' at position (1, 1) or "
"'4' at position (1, 15) or "
"'five' at position (1, 20) => "
"'hree four *5'."
) == str(e.value)
assert (e.value.line, e.value.col) == (1, 20)
Loading

0 comments on commit 66f4b3a

Please sign in to comment.