diff --git a/parser/options.go b/parser/options.go index 8bfdae55..674c697c 100644 --- a/parser/options.go +++ b/parser/options.go @@ -18,6 +18,7 @@ import "fmt" type options struct { maxRecursionDepth int + errorReportingLimit int errorRecoveryTokenLookaheadLimit int errorRecoveryLimit int expressionSizeCodePointLimit int @@ -46,7 +47,7 @@ func MaxRecursionDepth(limit int) Option { // successfully resume. In some pathological cases, the parser can look through quite a large set of input which // in turn generates a lot of back-tracking and performance degredation. // -// The limit must be > 1, and is recommended to be less than the default of 256. +// The limit must be >= 1, and is recommended to be less than the default of 256. func ErrorRecoveryLookaheadTokenLimit(limit int) Option { return func(opts *options) error { if limit < 1 { @@ -68,6 +69,19 @@ func ErrorRecoveryLimit(limit int) Option { } } +// ErrorReportingLimit limits the number of syntax error reports before terminating parsing. +// +// The limit must be at least 1. If unset, the limit will be 100. +func ErrorReportingLimit(limit int) Option { + return func(opts *options) error { + if limit < 1 { + return fmt.Errorf("error reporting limit must be at least 1: %d", limit) + } + opts.errorReportingLimit = limit + return nil + } +} + // ExpressionSizeCodePointLimit is an option which limits the maximum code point count of an // expression. func ExpressionSizeCodePointLimit(expressionSizeCodePointLimit int) Option { diff --git a/parser/parser.go b/parser/parser.go index da448177..e6f70f90 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -47,6 +47,9 @@ func NewParser(opts ...Option) (*Parser, error) { return nil, err } } + if p.errorReportingLimit == 0 { + p.errorReportingLimit = 100 + } if p.maxRecursionDepth == 0 { p.maxRecursionDepth = 250 } @@ -91,6 +94,7 @@ func (p *Parser) Parse(source common.Source) (*exprpb.ParsedExpr, *common.Errors helper: newParserHelper(source), macros: p.macros, maxRecursionDepth: p.maxRecursionDepth, + errorReportingLimit: p.errorReportingLimit, errorRecoveryLimit: p.errorRecoveryLimit, errorRecoveryLookaheadTokenLimit: p.errorRecoveryTokenLookaheadLimit, populateMacroCalls: p.populateMacroCalls, @@ -200,6 +204,16 @@ func (rl *recursionListener) ExitEveryRule(ctx antlr.ParserRuleContext) { var _ antlr.ParseTreeListener = &recursionListener{} +type tooManyErrors struct { + errorReportingLimit int +} + +func (t *tooManyErrors) Error() string { + return fmt.Sprintf("More than %d syntax errors", t.errorReportingLimit) +} + +var _ error = &tooManyErrors{} + type recoveryLimitError struct { message string } @@ -274,7 +288,9 @@ type parser struct { helper *parserHelper macros map[string]Macro recursionDepth int + errorReports int maxRecursionDepth int + errorReportingLimit int errorRecoveryLimit int errorRecoveryLookaheadTokenLimit int populateMacroCalls bool @@ -306,14 +322,14 @@ func (p *parser) parse(expr runes.Buffer, desc string) *exprpb.Expr { lexer := lexerPool.Get().(*gen.CELLexer) prsr := parserPool.Get().(*gen.CELParser) - // Unfortunately ANTLR Go runtime is missing (*antlr.BaseParser).RemoveParseListeners, so this is - // good enough until that is exported. prsrListener := &recursionListener{ maxDepth: p.maxRecursionDepth, ruleTypeDepth: map[int]*int{}, } defer func() { + // Unfortunately ANTLR Go runtime is missing (*antlr.BaseParser).RemoveParseListeners, + // so this is good enough until that is exported. // Reset the lexer and parser before putting them back in the pool. lexer.RemoveErrorListeners() prsr.RemoveParseListener(prsrListener) @@ -344,6 +360,8 @@ func (p *parser) parse(expr runes.Buffer, desc string) *exprpb.Expr { p.errors.ReportError(common.NoLocation, err.Error()) case *recursionError: p.errors.ReportError(common.NoLocation, err.Error()) + case *tooManyErrors: + // do nothing case *recoveryLimitError: // do nothing, listeners already notified and error reported. default: @@ -866,7 +884,6 @@ func (p *parser) reportError(ctx any, format string, args ...any) *exprpb.Expr { // ANTLR Parse listener implementations func (p *parser) SyntaxError(recognizer antlr.Recognizer, offendingSymbol any, line, column int, msg string, e antlr.RecognitionException) { - // TODO: Snippet l := p.helper.source.NewLocation(line, column) // Hack to keep existing error messages consistent with previous versions of CEL when a reserved word // is used as an identifier. This behavior needs to be overhauled to provide consistent, normalized error @@ -874,7 +891,16 @@ func (p *parser) SyntaxError(recognizer antlr.Recognizer, offendingSymbol any, l if strings.Contains(msg, "no viable alternative") { msg = reservedIdentifier.ReplaceAllString(msg, mismatchedReservedIdentifier) } - p.errors.syntaxError(l, msg) + // Ensure that no more than 100 syntax errors are reported as this will halt attempts to recover from a + // seriously broken expression. + if p.errorReports < p.errorReportingLimit { + p.errorReports++ + p.errors.syntaxError(l, msg) + } else { + tme := &tooManyErrors{errorReportingLimit: p.errorReportingLimit} + p.errors.syntaxError(l, tme.Error()) + panic(tme) + } } func (p *parser) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs antlr.ATNConfigSet) { diff --git a/parser/parser_test.go b/parser/parser_test.go index a5a21c9a..04472c28 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1735,6 +1735,21 @@ var testCases = []testInfo{ | x{. | ..^`, }, + { + I: `'3# < 10" '& tru ^^`, + Opts: []Option{ErrorReportingLimit(2)}, + E: ` + ERROR: :1:12: Syntax error: token recognition error at: '& ' + | '3# < 10" '& tru ^^ + | ...........^ + ERROR: :1:18: Syntax error: token recognition error at: '^' + | '3# < 10" '& tru ^^ + | .................^ + ERROR: :1:19: Syntax error: More than 2 syntax errors + | '3# < 10" '& tru ^^ + | ..................^ + `, + }, } type testInfo struct { @@ -1921,13 +1936,16 @@ func TestParserOptionErrors(t *testing.T) { if _, err := NewParser(Macros(AllMacros...), MaxRecursionDepth(-2)); err == nil { t.Fatalf("got %q, want %q", err, "max recursion depth must be greater than or equal to -1: -2") } - if _, err := NewParser(Macros(AllMacros...), ErrorRecoveryLimit(-2)); err == nil { + if _, err := NewParser(ErrorRecoveryLimit(-2)); err == nil { t.Fatalf("got %q, want %q", err, "error recovery limit must be greater than or equal to -1: -2") } - if _, err := NewParser(Macros(AllMacros...), ErrorRecoveryLookaheadTokenLimit(0)); err == nil { + if _, err := NewParser(ErrorRecoveryLookaheadTokenLimit(0)); err == nil { t.Fatalf("got %q, want %q", err, "error recovery lookahead token limit must be at least 1: 0") } - if _, err := NewParser(Macros(AllMacros...), ExpressionSizeCodePointLimit(-2)); err == nil { + if _, err := NewParser(ErrorReportingLimit(0)); err == nil { + t.Fatalf("got %q, want %q", err, "error reporting limit must be greater than 0: -2") + } + if _, err := NewParser(ExpressionSizeCodePointLimit(-2)); err == nil { t.Fatalf("got %q, want %q", err, "expression size code point limit must be greater than or equal to -1: -2") } }