From bb58e914a9a9ed64a346d1151b280ed16a11178a Mon Sep 17 00:00:00 2001 From: Alexander Bakker Date: Sat, 12 Nov 2022 12:38:10 +0100 Subject: [PATCH] Make regex matcher timeout configurable I use chroma to syntax highlight code snippets on my blog. While profiling my static blog generator, I noticed that a lot of time was being spent setting up the regex timeout watcher: https://alexbakker.me/u/g987wpgo1v.svg. I'm not exactly sure why it's spending so much time there, but disabling the timeout results in a 2x performance improvement in my testing. --- regexp.go | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/regexp.go b/regexp.go index 1794662e2..52d1e9cfa 100644 --- a/regexp.go +++ b/regexp.go @@ -2,6 +2,7 @@ package chroma import ( "fmt" + "math" "os" "path/filepath" "regexp" @@ -100,6 +101,7 @@ func NewLexer(config *Config, rulesFunc func() Rules) (*RegexLexer, error) { } r := &RegexLexer{ config: config, + regexTimeout: time.Millisecond * 250, fetchRulesFunc: func() (Rules, error) { return rulesFunc(), nil }, } // One-off code to generate XML lexers in the Chroma source tree. @@ -261,10 +263,11 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit // RegexLexer is the default lexer implementation used in Chroma. type RegexLexer struct { - registry *LexerRegistry // The LexerRegistry this Lexer is associated with, if any. - config *Config - analyser func(text string) float32 - trace bool + registry *LexerRegistry // The LexerRegistry this Lexer is associated with, if any. + config *Config + regexTimeout time.Duration + analyser func(text string) float32 + trace bool mu sync.Mutex compiled bool @@ -286,6 +289,12 @@ func (r *RegexLexer) Rules() (Rules, error) { return r.rawRules, nil } +// SetTimeout sets the timeout after which the lexer gives up. The default +// timeout is 250 ms. To disable the timeout, set it to zero. +func (r *RegexLexer) SetTimeout(timeout time.Duration) { + r.regexTimeout = timeout +} + // SetRegistry the lexer will use to lookup other lexers if necessary. func (r *RegexLexer) SetRegistry(registry *LexerRegistry) Lexer { r.registry = registry @@ -334,7 +343,12 @@ func (r *RegexLexer) maybeCompile() (err error) { if err != nil { return fmt.Errorf("failed to compile rule %s.%d: %s", state, i, err) } - rule.Regexp.MatchTimeout = time.Millisecond * 250 + + timeout := r.regexTimeout + if timeout == 0 { + timeout = time.Duration(math.MaxInt64) + } + rule.Regexp.MatchTimeout = timeout } } }