Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parser error reporting limit #672

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion parser/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import "fmt"

type options struct {
maxRecursionDepth int
errorReportingLimit int
errorRecoveryTokenLookaheadLimit int
errorRecoveryLimit int
expressionSizeCodePointLimit int
Expand Down Expand Up @@ -46,7 +47,7 @@ func MaxRecursionDepth(limit int) Option {
// successfully resume. In some pathological cases, the parser can look through quite a large set of input which
// in turn generates a lot of back-tracking and performance degredation.
//
// The limit must be > 1, and is recommended to be less than the default of 256.
// The limit must be >= 1, and is recommended to be less than the default of 256.
func ErrorRecoveryLookaheadTokenLimit(limit int) Option {
return func(opts *options) error {
if limit < 1 {
Expand All @@ -68,6 +69,19 @@ func ErrorRecoveryLimit(limit int) Option {
}
}

// ErrorReportingLimit limits the number of syntax error reports before terminating parsing.
//
// The limit must be at least 1. If unset, the limit will be 100.
func ErrorReportingLimit(limit int) Option {
return func(opts *options) error {
if limit < 1 {
return fmt.Errorf("error reporting limit must be at least 1: %d", limit)
}
opts.errorReportingLimit = limit
return nil
}
}

// ExpressionSizeCodePointLimit is an option which limits the maximum code point count of an
// expression.
func ExpressionSizeCodePointLimit(expressionSizeCodePointLimit int) Option {
Expand Down
34 changes: 30 additions & 4 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ func NewParser(opts ...Option) (*Parser, error) {
return nil, err
}
}
if p.errorReportingLimit == 0 {
p.errorReportingLimit = 100
}
if p.maxRecursionDepth == 0 {
p.maxRecursionDepth = 250
}
Expand Down Expand Up @@ -91,6 +94,7 @@ func (p *Parser) Parse(source common.Source) (*exprpb.ParsedExpr, *common.Errors
helper: newParserHelper(source),
macros: p.macros,
maxRecursionDepth: p.maxRecursionDepth,
errorReportingLimit: p.errorReportingLimit,
errorRecoveryLimit: p.errorRecoveryLimit,
errorRecoveryLookaheadTokenLimit: p.errorRecoveryTokenLookaheadLimit,
populateMacroCalls: p.populateMacroCalls,
Expand Down Expand Up @@ -200,6 +204,16 @@ func (rl *recursionListener) ExitEveryRule(ctx antlr.ParserRuleContext) {

var _ antlr.ParseTreeListener = &recursionListener{}

type tooManyErrors struct {
errorReportingLimit int
}

func (t *tooManyErrors) Error() string {
return fmt.Sprintf("More than %d syntax errors", t.errorReportingLimit)
}

var _ error = &tooManyErrors{}

type recoveryLimitError struct {
message string
}
Expand Down Expand Up @@ -274,7 +288,9 @@ type parser struct {
helper *parserHelper
macros map[string]Macro
recursionDepth int
errorReports int
maxRecursionDepth int
errorReportingLimit int
errorRecoveryLimit int
errorRecoveryLookaheadTokenLimit int
populateMacroCalls bool
Expand Down Expand Up @@ -306,14 +322,14 @@ func (p *parser) parse(expr runes.Buffer, desc string) *exprpb.Expr {
lexer := lexerPool.Get().(*gen.CELLexer)
prsr := parserPool.Get().(*gen.CELParser)

// Unfortunately ANTLR Go runtime is missing (*antlr.BaseParser).RemoveParseListeners, so this is
// good enough until that is exported.
prsrListener := &recursionListener{
maxDepth: p.maxRecursionDepth,
ruleTypeDepth: map[int]*int{},
}

defer func() {
// Unfortunately ANTLR Go runtime is missing (*antlr.BaseParser).RemoveParseListeners,
// so this is good enough until that is exported.
// Reset the lexer and parser before putting them back in the pool.
lexer.RemoveErrorListeners()
prsr.RemoveParseListener(prsrListener)
Expand Down Expand Up @@ -344,6 +360,8 @@ func (p *parser) parse(expr runes.Buffer, desc string) *exprpb.Expr {
p.errors.ReportError(common.NoLocation, err.Error())
case *recursionError:
p.errors.ReportError(common.NoLocation, err.Error())
case *tooManyErrors:
// do nothing
case *recoveryLimitError:
// do nothing, listeners already notified and error reported.
default:
Expand Down Expand Up @@ -866,15 +884,23 @@ func (p *parser) reportError(ctx any, format string, args ...any) *exprpb.Expr {

// ANTLR Parse listener implementations
func (p *parser) SyntaxError(recognizer antlr.Recognizer, offendingSymbol any, line, column int, msg string, e antlr.RecognitionException) {
// TODO: Snippet
l := p.helper.source.NewLocation(line, column)
// Hack to keep existing error messages consistent with previous versions of CEL when a reserved word
// is used as an identifier. This behavior needs to be overhauled to provide consistent, normalized error
// messages out of ANTLR to prevent future breaking changes related to error message content.
if strings.Contains(msg, "no viable alternative") {
msg = reservedIdentifier.ReplaceAllString(msg, mismatchedReservedIdentifier)
}
p.errors.syntaxError(l, msg)
// Ensure that no more than 100 syntax errors are reported as this will halt attempts to recover from a
// seriously broken expression.
if p.errorReports < p.errorReportingLimit {
p.errorReports++
p.errors.syntaxError(l, msg)
} else {
tme := &tooManyErrors{errorReportingLimit: p.errorReportingLimit}
p.errors.syntaxError(l, tme.Error())
panic(tme)
}
}

func (p *parser) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs antlr.ATNConfigSet) {
Expand Down
24 changes: 21 additions & 3 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1735,6 +1735,21 @@ var testCases = []testInfo{
| x{.
| ..^`,
},
{
I: `'3# < 10" '& tru ^^`,
Opts: []Option{ErrorReportingLimit(2)},
E: `
ERROR: <input>:1:12: Syntax error: token recognition error at: '& '
| '3# < 10" '& tru ^^
| ...........^
ERROR: <input>:1:18: Syntax error: token recognition error at: '^'
| '3# < 10" '& tru ^^
| .................^
ERROR: <input>:1:19: Syntax error: More than 2 syntax errors
| '3# < 10" '& tru ^^
| ..................^
`,
},
}

type testInfo struct {
Expand Down Expand Up @@ -1921,13 +1936,16 @@ func TestParserOptionErrors(t *testing.T) {
if _, err := NewParser(Macros(AllMacros...), MaxRecursionDepth(-2)); err == nil {
t.Fatalf("got %q, want %q", err, "max recursion depth must be greater than or equal to -1: -2")
}
if _, err := NewParser(Macros(AllMacros...), ErrorRecoveryLimit(-2)); err == nil {
if _, err := NewParser(ErrorRecoveryLimit(-2)); err == nil {
t.Fatalf("got %q, want %q", err, "error recovery limit must be greater than or equal to -1: -2")
}
if _, err := NewParser(Macros(AllMacros...), ErrorRecoveryLookaheadTokenLimit(0)); err == nil {
if _, err := NewParser(ErrorRecoveryLookaheadTokenLimit(0)); err == nil {
t.Fatalf("got %q, want %q", err, "error recovery lookahead token limit must be at least 1: 0")
}
if _, err := NewParser(Macros(AllMacros...), ExpressionSizeCodePointLimit(-2)); err == nil {
if _, err := NewParser(ErrorReportingLimit(0)); err == nil {
t.Fatalf("got %q, want %q", err, "error reporting limit must be greater than 0: -2")
}
if _, err := NewParser(ExpressionSizeCodePointLimit(-2)); err == nil {
t.Fatalf("got %q, want %q", err, "expression size code point limit must be greater than or equal to -1: -2")
}
}
Expand Down