google · TristonianJones · Apr 5, 2023 · Apr 4, 2023 · Apr 4, 2023
diff --git a/parser/options.go b/parser/options.go
@@ -18,6 +18,7 @@ import "fmt"
 
 type options struct {
 	maxRecursionDepth                int
+	errorReportingLimit              int
 	errorRecoveryTokenLookaheadLimit int
 	errorRecoveryLimit               int
 	expressionSizeCodePointLimit     int
@@ -46,7 +47,7 @@ func MaxRecursionDepth(limit int) Option {
 // successfully resume. In some pathological cases, the parser can look through quite a large set of input which
 // in turn generates a lot of back-tracking and performance degredation.
 //
-// The limit must be > 1, and is recommended to be less than the default of 256.
+// The limit must be >= 1, and is recommended to be less than the default of 256.
 func ErrorRecoveryLookaheadTokenLimit(limit int) Option {
 	return func(opts *options) error {
 		if limit < 1 {
@@ -68,6 +69,19 @@ func ErrorRecoveryLimit(limit int) Option {
 	}
 }
 
+// ErrorReportingLimit limits the number of syntax error reports before terminating parsing.
+//
+// The limit must be at least 1. If unset, the limit will be 100.
+func ErrorReportingLimit(limit int) Option {
+	return func(opts *options) error {
+		if limit < 1 {
+			return fmt.Errorf("error reporting limit must be at least 1: %d", limit)
+		}
+		opts.errorReportingLimit = limit
+		return nil
+	}
+}
+
 // ExpressionSizeCodePointLimit is an option which limits the maximum code point count of an
 // expression.
 func ExpressionSizeCodePointLimit(expressionSizeCodePointLimit int) Option {

diff --git a/parser/parser.go b/parser/parser.go
@@ -47,6 +47,9 @@ func NewParser(opts ...Option) (*Parser, error) {
 			return nil, err
 		}
 	}
+	if p.errorReportingLimit == 0 {
+		p.errorReportingLimit = 100
+	}
 	if p.maxRecursionDepth == 0 {
 		p.maxRecursionDepth = 250
 	}
@@ -91,6 +94,7 @@ func (p *Parser) Parse(source common.Source) (*exprpb.ParsedExpr, *common.Errors
 		helper:                           newParserHelper(source),
 		macros:                           p.macros,
 		maxRecursionDepth:                p.maxRecursionDepth,
+		errorReportingLimit:              p.errorReportingLimit,
 		errorRecoveryLimit:               p.errorRecoveryLimit,
 		errorRecoveryLookaheadTokenLimit: p.errorRecoveryTokenLookaheadLimit,
 		populateMacroCalls:               p.populateMacroCalls,
@@ -200,6 +204,16 @@ func (rl *recursionListener) ExitEveryRule(ctx antlr.ParserRuleContext) {
 
 var _ antlr.ParseTreeListener = &recursionListener{}
 
+type tooManyErrors struct {
+	errorReportingLimit int
+}
+
+func (t *tooManyErrors) Error() string {
+	return fmt.Sprintf("More than %d syntax errors", t.errorReportingLimit)
+}
+
+var _ error = &tooManyErrors{}
+
 type recoveryLimitError struct {
 	message string
 }
@@ -274,7 +288,9 @@ type parser struct {
 	helper                           *parserHelper
 	macros                           map[string]Macro
 	recursionDepth                   int
+	errorReports                     int
 	maxRecursionDepth                int
+	errorReportingLimit              int
 	errorRecoveryLimit               int
 	errorRecoveryLookaheadTokenLimit int
 	populateMacroCalls               bool
@@ -306,14 +322,14 @@ func (p *parser) parse(expr runes.Buffer, desc string) *exprpb.Expr {
 	lexer := lexerPool.Get().(*gen.CELLexer)
 	prsr := parserPool.Get().(*gen.CELParser)
 
-	// Unfortunately ANTLR Go runtime is missing (*antlr.BaseParser).RemoveParseListeners, so this is
-	// good enough until that is exported.
 	prsrListener := &recursionListener{
 		maxDepth:      p.maxRecursionDepth,
 		ruleTypeDepth: map[int]*int{},
 	}
 
 	defer func() {
+		// Unfortunately ANTLR Go runtime is missing (*antlr.BaseParser).RemoveParseListeners,
+		// so this is good enough until that is exported.
 		// Reset the lexer and parser before putting them back in the pool.
 		lexer.RemoveErrorListeners()
 		prsr.RemoveParseListener(prsrListener)
@@ -344,6 +360,8 @@ func (p *parser) parse(expr runes.Buffer, desc string) *exprpb.Expr {
 				p.errors.ReportError(common.NoLocation, err.Error())
 			case *recursionError:
 				p.errors.ReportError(common.NoLocation, err.Error())
+			case *tooManyErrors:
+				// do nothing
 			case *recoveryLimitError:
 				// do nothing, listeners already notified and error reported.
 			default:
@@ -866,15 +884,23 @@ func (p *parser) reportError(ctx any, format string, args ...any) *exprpb.Expr {
 
 // ANTLR Parse listener implementations
 func (p *parser) SyntaxError(recognizer antlr.Recognizer, offendingSymbol any, line, column int, msg string, e antlr.RecognitionException) {
-	// TODO: Snippet
 	l := p.helper.source.NewLocation(line, column)
 	// Hack to keep existing error messages consistent with previous versions of CEL when a reserved word
 	// is used as an identifier. This behavior needs to be overhauled to provide consistent, normalized error
 	// messages out of ANTLR to prevent future breaking changes related to error message content.
 	if strings.Contains(msg, "no viable alternative") {
 		msg = reservedIdentifier.ReplaceAllString(msg, mismatchedReservedIdentifier)
 	}
-	p.errors.syntaxError(l, msg)
+	// Ensure that no more than 100 syntax errors are reported as this will halt attempts to recover from a
+	// seriously broken expression.
+	if p.errorReports < p.errorReportingLimit {
+		p.errorReports++
+		p.errors.syntaxError(l, msg)
+	} else {
+		tme := &tooManyErrors{errorReportingLimit: p.errorReportingLimit}
+		p.errors.syntaxError(l, tme.Error())
+		panic(tme)
+	}
 }
 
 func (p *parser) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs antlr.ATNConfigSet) {

diff --git a/parser/parser_test.go b/parser/parser_test.go
@@ -1735,6 +1735,21 @@ var testCases = []testInfo{
 		 | x{.
 		 | ..^`,
 	},
+	{
+		I:    `'3# < 10" '& tru ^^`,
+		Opts: []Option{ErrorReportingLimit(2)},
+		E: `
+		ERROR: <input>:1:12: Syntax error: token recognition error at: '& '
+		 | '3# < 10" '& tru ^^
+		 | ...........^
+		ERROR: <input>:1:18: Syntax error: token recognition error at: '^'
+		 | '3# < 10" '& tru ^^
+		 | .................^
+		ERROR: <input>:1:19: Syntax error: More than 2 syntax errors
+		 | '3# < 10" '& tru ^^
+		 | ..................^
+		`,
+	},
 }
 
 type testInfo struct {
@@ -1921,13 +1936,16 @@ func TestParserOptionErrors(t *testing.T) {
 	if _, err := NewParser(Macros(AllMacros...), MaxRecursionDepth(-2)); err == nil {
 		t.Fatalf("got %q, want %q", err, "max recursion depth must be greater than or equal to -1: -2")
 	}
-	if _, err := NewParser(Macros(AllMacros...), ErrorRecoveryLimit(-2)); err == nil {
+	if _, err := NewParser(ErrorRecoveryLimit(-2)); err == nil {
 		t.Fatalf("got %q, want %q", err, "error recovery limit must be greater than or equal to -1: -2")
 	}
-	if _, err := NewParser(Macros(AllMacros...), ErrorRecoveryLookaheadTokenLimit(0)); err == nil {
+	if _, err := NewParser(ErrorRecoveryLookaheadTokenLimit(0)); err == nil {
 		t.Fatalf("got %q, want %q", err, "error recovery lookahead token limit must be at least 1: 0")
 	}
-	if _, err := NewParser(Macros(AllMacros...), ExpressionSizeCodePointLimit(-2)); err == nil {
+	if _, err := NewParser(ErrorReportingLimit(0)); err == nil {
+		t.Fatalf("got %q, want %q", err, "error reporting limit must be greater than 0: -2")
+	}
+	if _, err := NewParser(ExpressionSizeCodePointLimit(-2)); err == nil {
 		t.Fatalf("got %q, want %q", err, "expression size code point limit must be greater than or equal to -1: -2")
 	}
 }