Skip to content

Commit

Permalink
Make regex matcher timeout configurable
Browse files Browse the repository at this point in the history
I use chroma to syntax highlight code snippets on my blog. While profiling my
static blog generator, I noticed that a lot of time was being spent setting up
the regex timeout watcher: https://alexbakker.me/u/g987wpgo1v.svg.

I'm not exactly sure why it's spending so much time there, but disabling the
timeout results in a 2x performance improvement in my testing.
  • Loading branch information
alexbakker committed Nov 12, 2022
1 parent 4dfc2ca commit bb58e91
Showing 1 changed file with 19 additions and 5 deletions.
24 changes: 19 additions & 5 deletions regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package chroma

import (
"fmt"
"math"
"os"
"path/filepath"
"regexp"
Expand Down Expand Up @@ -100,6 +101,7 @@ func NewLexer(config *Config, rulesFunc func() Rules) (*RegexLexer, error) {
}
r := &RegexLexer{
config: config,
regexTimeout: time.Millisecond * 250,
fetchRulesFunc: func() (Rules, error) { return rulesFunc(), nil },
}
// One-off code to generate XML lexers in the Chroma source tree.
Expand Down Expand Up @@ -261,10 +263,11 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit

// RegexLexer is the default lexer implementation used in Chroma.
type RegexLexer struct {
registry *LexerRegistry // The LexerRegistry this Lexer is associated with, if any.
config *Config
analyser func(text string) float32
trace bool
registry *LexerRegistry // The LexerRegistry this Lexer is associated with, if any.
config *Config
regexTimeout time.Duration
analyser func(text string) float32
trace bool

mu sync.Mutex
compiled bool
Expand All @@ -286,6 +289,12 @@ func (r *RegexLexer) Rules() (Rules, error) {
return r.rawRules, nil
}

// SetTimeout sets the timeout after which the lexer gives up. The default
// timeout is 250 ms. To disable the timeout, set it to zero.
func (r *RegexLexer) SetTimeout(timeout time.Duration) {
r.regexTimeout = timeout
}

// SetRegistry the lexer will use to lookup other lexers if necessary.
func (r *RegexLexer) SetRegistry(registry *LexerRegistry) Lexer {
r.registry = registry
Expand Down Expand Up @@ -334,7 +343,12 @@ func (r *RegexLexer) maybeCompile() (err error) {
if err != nil {
return fmt.Errorf("failed to compile rule %s.%d: %s", state, i, err)
}
rule.Regexp.MatchTimeout = time.Millisecond * 250

timeout := r.regexTimeout
if timeout == 0 {
timeout = time.Duration(math.MaxInt64)
}
rule.Regexp.MatchTimeout = timeout
}
}
}
Expand Down

0 comments on commit bb58e91

Please sign in to comment.