From 180860cfa723f87df63507308e24768d1728c860 Mon Sep 17 00:00:00 2001 From: Tristan Swadell Date: Tue, 4 Apr 2023 09:05:17 -0700 Subject: [PATCH 1/2] [WIP] Configurable parser error reporting limit --- parser/options.go | 1 + parser/parser.go | 32 ++++++++++++++++++++++++++++---- parser/parser_test.go | 10 ++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/parser/options.go b/parser/options.go index 8bfdae55..7304ecd5 100644 --- a/parser/options.go +++ b/parser/options.go @@ -18,6 +18,7 @@ import "fmt" type options struct { maxRecursionDepth int + maxErrorsToReport int errorRecoveryTokenLookaheadLimit int errorRecoveryLimit int expressionSizeCodePointLimit int diff --git a/parser/parser.go b/parser/parser.go index da448177..9dc790aa 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -47,6 +47,9 @@ func NewParser(opts ...Option) (*Parser, error) { return nil, err } } + if p.maxErrorsToReport == 0 { + p.maxErrorsToReport = 100 + } if p.maxRecursionDepth == 0 { p.maxRecursionDepth = 250 } @@ -91,6 +94,7 @@ func (p *Parser) Parse(source common.Source) (*exprpb.ParsedExpr, *common.Errors helper: newParserHelper(source), macros: p.macros, maxRecursionDepth: p.maxRecursionDepth, + maxErrorsToReport: p.maxErrorsToReport, errorRecoveryLimit: p.errorRecoveryLimit, errorRecoveryLookaheadTokenLimit: p.errorRecoveryTokenLookaheadLimit, populateMacroCalls: p.populateMacroCalls, @@ -200,6 +204,16 @@ func (rl *recursionListener) ExitEveryRule(ctx antlr.ParserRuleContext) { var _ antlr.ParseTreeListener = &recursionListener{} +type tooManyErrors struct { + maxErrorsToReport int +} + +func (t *tooManyErrors) Error() string { + return fmt.Sprintf("More than %d syntax errors", t.maxErrorsToReport) +} + +var _ error = &tooManyErrors{} + type recoveryLimitError struct { message string } @@ -274,7 +288,9 @@ type parser struct { helper *parserHelper macros map[string]Macro recursionDepth int + errorReports int maxRecursionDepth int + maxErrorsToReport int errorRecoveryLimit int errorRecoveryLookaheadTokenLimit int populateMacroCalls bool @@ -306,14 +322,14 @@ func (p *parser) parse(expr runes.Buffer, desc string) *exprpb.Expr { lexer := lexerPool.Get().(*gen.CELLexer) prsr := parserPool.Get().(*gen.CELParser) - // Unfortunately ANTLR Go runtime is missing (*antlr.BaseParser).RemoveParseListeners, so this is - // good enough until that is exported. prsrListener := &recursionListener{ maxDepth: p.maxRecursionDepth, ruleTypeDepth: map[int]*int{}, } defer func() { + // Unfortunately ANTLR Go runtime is missing (*antlr.BaseParser).RemoveParseListeners, + // so this is good enough until that is exported. // Reset the lexer and parser before putting them back in the pool. lexer.RemoveErrorListeners() prsr.RemoveParseListener(prsrListener) @@ -344,6 +360,8 @@ func (p *parser) parse(expr runes.Buffer, desc string) *exprpb.Expr { p.errors.ReportError(common.NoLocation, err.Error()) case *recursionError: p.errors.ReportError(common.NoLocation, err.Error()) + case *tooManyErrors: + // do nothing case *recoveryLimitError: // do nothing, listeners already notified and error reported. default: @@ -866,7 +884,6 @@ func (p *parser) reportError(ctx any, format string, args ...any) *exprpb.Expr { // ANTLR Parse listener implementations func (p *parser) SyntaxError(recognizer antlr.Recognizer, offendingSymbol any, line, column int, msg string, e antlr.RecognitionException) { - // TODO: Snippet l := p.helper.source.NewLocation(line, column) // Hack to keep existing error messages consistent with previous versions of CEL when a reserved word // is used as an identifier. This behavior needs to be overhauled to provide consistent, normalized error @@ -874,7 +891,14 @@ func (p *parser) SyntaxError(recognizer antlr.Recognizer, offendingSymbol any, l if strings.Contains(msg, "no viable alternative") { msg = reservedIdentifier.ReplaceAllString(msg, mismatchedReservedIdentifier) } - p.errors.syntaxError(l, msg) + // Ensure that no more than 100 syntax errors are reported as this will halt attempts to recover from a + // seriously broken expression. + if p.errorReports < p.maxErrorsToReport { + p.errorReports++ + p.errors.syntaxError(l, msg) + } else { + panic(&tooManyErrors{maxErrorsToReport: p.maxErrorsToReport}) + } } func (p *parser) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs antlr.ATNConfigSet) { diff --git a/parser/parser_test.go b/parser/parser_test.go index a5a21c9a..8c2062b1 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1735,6 +1735,16 @@ var testCases = []testInfo{ | x{. | ..^`, }, + { + I: `."!:��*4X+-it-*+{+{d{+{+{+k++*z�+{zPB{b"xc�Z�\'" + M[A����{-$0-8<1--849--9--9--284-8-6--9-127--��@{C�֙1"��!{!{s!--!-%-*%%-(-(-1u-%[-1[--48,-6%[954-810-.4294967296����-(- -[[*[{[* + %.2-%-2<0--1438895�������(- -[[*[{[* + %.2-%-2<0--1<\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\L\\\\\\\\\\\\\\\\\\\\\17--3%4(305!-53335e31--7e0<3---0<>92<3-6e9-)��@-21-21--8-%6029453{""%*0�d{{w,)a{igw + `, + E: ` + ERROR: Too many errors + `, + }, } type testInfo struct { From fc0e06e3e15a5fc64ed02df966c7f63e35a9e930 Mon Sep 17 00:00:00 2001 From: TristonianJones Date: Tue, 4 Apr 2023 10:04:27 -0700 Subject: [PATCH 2/2] Introduce a configurable error reporting limit in the parser. This limit is not yet exposed on the top-level CEL package --- parser/options.go | 17 +++++++++++++++-- parser/parser.go | 18 ++++++++++-------- parser/parser_test.go | 26 +++++++++++++++++--------- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/parser/options.go b/parser/options.go index 7304ecd5..674c697c 100644 --- a/parser/options.go +++ b/parser/options.go @@ -18,7 +18,7 @@ import "fmt" type options struct { maxRecursionDepth int - maxErrorsToReport int + errorReportingLimit int errorRecoveryTokenLookaheadLimit int errorRecoveryLimit int expressionSizeCodePointLimit int @@ -47,7 +47,7 @@ func MaxRecursionDepth(limit int) Option { // successfully resume. In some pathological cases, the parser can look through quite a large set of input which // in turn generates a lot of back-tracking and performance degredation. // -// The limit must be > 1, and is recommended to be less than the default of 256. +// The limit must be >= 1, and is recommended to be less than the default of 256. func ErrorRecoveryLookaheadTokenLimit(limit int) Option { return func(opts *options) error { if limit < 1 { @@ -69,6 +69,19 @@ func ErrorRecoveryLimit(limit int) Option { } } +// ErrorReportingLimit limits the number of syntax error reports before terminating parsing. +// +// The limit must be at least 1. If unset, the limit will be 100. +func ErrorReportingLimit(limit int) Option { + return func(opts *options) error { + if limit < 1 { + return fmt.Errorf("error reporting limit must be at least 1: %d", limit) + } + opts.errorReportingLimit = limit + return nil + } +} + // ExpressionSizeCodePointLimit is an option which limits the maximum code point count of an // expression. func ExpressionSizeCodePointLimit(expressionSizeCodePointLimit int) Option { diff --git a/parser/parser.go b/parser/parser.go index 9dc790aa..e6f70f90 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -47,8 +47,8 @@ func NewParser(opts ...Option) (*Parser, error) { return nil, err } } - if p.maxErrorsToReport == 0 { - p.maxErrorsToReport = 100 + if p.errorReportingLimit == 0 { + p.errorReportingLimit = 100 } if p.maxRecursionDepth == 0 { p.maxRecursionDepth = 250 @@ -94,7 +94,7 @@ func (p *Parser) Parse(source common.Source) (*exprpb.ParsedExpr, *common.Errors helper: newParserHelper(source), macros: p.macros, maxRecursionDepth: p.maxRecursionDepth, - maxErrorsToReport: p.maxErrorsToReport, + errorReportingLimit: p.errorReportingLimit, errorRecoveryLimit: p.errorRecoveryLimit, errorRecoveryLookaheadTokenLimit: p.errorRecoveryTokenLookaheadLimit, populateMacroCalls: p.populateMacroCalls, @@ -205,11 +205,11 @@ func (rl *recursionListener) ExitEveryRule(ctx antlr.ParserRuleContext) { var _ antlr.ParseTreeListener = &recursionListener{} type tooManyErrors struct { - maxErrorsToReport int + errorReportingLimit int } func (t *tooManyErrors) Error() string { - return fmt.Sprintf("More than %d syntax errors", t.maxErrorsToReport) + return fmt.Sprintf("More than %d syntax errors", t.errorReportingLimit) } var _ error = &tooManyErrors{} @@ -290,7 +290,7 @@ type parser struct { recursionDepth int errorReports int maxRecursionDepth int - maxErrorsToReport int + errorReportingLimit int errorRecoveryLimit int errorRecoveryLookaheadTokenLimit int populateMacroCalls bool @@ -893,11 +893,13 @@ func (p *parser) SyntaxError(recognizer antlr.Recognizer, offendingSymbol any, l } // Ensure that no more than 100 syntax errors are reported as this will halt attempts to recover from a // seriously broken expression. - if p.errorReports < p.maxErrorsToReport { + if p.errorReports < p.errorReportingLimit { p.errorReports++ p.errors.syntaxError(l, msg) } else { - panic(&tooManyErrors{maxErrorsToReport: p.maxErrorsToReport}) + tme := &tooManyErrors{errorReportingLimit: p.errorReportingLimit} + p.errors.syntaxError(l, tme.Error()) + panic(tme) } } diff --git a/parser/parser_test.go b/parser/parser_test.go index 8c2062b1..04472c28 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1736,13 +1736,18 @@ var testCases = []testInfo{ | ..^`, }, { - I: `."!:��*4X+-it-*+{+{d{+{+{+k++*z�+{zPB{b"xc�Z�\'" - M[A����{-$0-8<1--849--9--9--284-8-6--9-127--��@{C�֙1"��!{!{s!--!-%-*%%-(-(-1u-%[-1[--48,-6%[954-810-.4294967296����-(- -[[*[{[* - %.2-%-2<0--1438895�������(- -[[*[{[* - %.2-%-2<0--1<\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\L\\\\\\\\\\\\\\\\\\\\\17--3%4(305!-53335e31--7e0<3---0<>92<3-6e9-)��@-21-21--8-%6029453{""%*0�d{{w,)a{igw - `, + I: `'3# < 10" '& tru ^^`, + Opts: []Option{ErrorReportingLimit(2)}, E: ` - ERROR: Too many errors + ERROR: :1:12: Syntax error: token recognition error at: '& ' + | '3# < 10" '& tru ^^ + | ...........^ + ERROR: :1:18: Syntax error: token recognition error at: '^' + | '3# < 10" '& tru ^^ + | .................^ + ERROR: :1:19: Syntax error: More than 2 syntax errors + | '3# < 10" '& tru ^^ + | ..................^ `, }, } @@ -1931,13 +1936,16 @@ func TestParserOptionErrors(t *testing.T) { if _, err := NewParser(Macros(AllMacros...), MaxRecursionDepth(-2)); err == nil { t.Fatalf("got %q, want %q", err, "max recursion depth must be greater than or equal to -1: -2") } - if _, err := NewParser(Macros(AllMacros...), ErrorRecoveryLimit(-2)); err == nil { + if _, err := NewParser(ErrorRecoveryLimit(-2)); err == nil { t.Fatalf("got %q, want %q", err, "error recovery limit must be greater than or equal to -1: -2") } - if _, err := NewParser(Macros(AllMacros...), ErrorRecoveryLookaheadTokenLimit(0)); err == nil { + if _, err := NewParser(ErrorRecoveryLookaheadTokenLimit(0)); err == nil { t.Fatalf("got %q, want %q", err, "error recovery lookahead token limit must be at least 1: 0") } - if _, err := NewParser(Macros(AllMacros...), ExpressionSizeCodePointLimit(-2)); err == nil { + if _, err := NewParser(ErrorReportingLimit(0)); err == nil { + t.Fatalf("got %q, want %q", err, "error reporting limit must be greater than 0: -2") + } + if _, err := NewParser(ExpressionSizeCodePointLimit(-2)); err == nil { t.Fatalf("got %q, want %q", err, "expression size code point limit must be greater than or equal to -1: -2") } }