Skip to content

Commit

Permalink
Make regex matcher timeout configurable
Browse files Browse the repository at this point in the history
I use chroma to syntax highlight code snippets on my blog. While profiling my
static blog generator, I noticed that a lot of time was being spent setting up
the regex timeout watcher: https://alexbakker.me/u/g987wpgo1v.svg.

I'm not exactly sure why it's spending so much time there, but disabling the
timeout results in a 2x performance improvement in my testing.
  • Loading branch information
alexbakker committed Oct 8, 2022
1 parent 4dfc2ca commit 275483e
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 5 deletions.
5 changes: 5 additions & 0 deletions delegate.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package chroma

import (
"bytes"
"time"
)

type delegatingLexer struct {
Expand Down Expand Up @@ -39,6 +40,10 @@ func (d *delegatingLexer) SetRegistry(r *LexerRegistry) Lexer {
return d
}

func (d *delegatingLexer) SetTimeout(timeout time.Duration) {
// Not implemented
}

func (d *delegatingLexer) Config() *Config {
return d.language.Config()
}
Expand Down
5 changes: 5 additions & 0 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package chroma
import (
"fmt"
"strings"
"time"
)

var (
Expand Down Expand Up @@ -108,6 +109,10 @@ type Lexer interface {
// AnalyseText scores how likely a fragment of text is to match
// this lexer, between 0.0 and 1.0. A value of 1 indicates high confidence.
AnalyseText(text string) float32
// SetTimeout sets the timeout after which the lexer gives up. To disable
// the timeout, set it to zero. If the lexer implementation doesn't support
// setting a timeout, calling this function has no effect.
SetTimeout(timeout time.Duration)
}

// Lexers is a slice of lexers sortable by name.
Expand Down
24 changes: 19 additions & 5 deletions regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package chroma

import (
"fmt"
"math"
"os"
"path/filepath"
"regexp"
Expand Down Expand Up @@ -100,6 +101,7 @@ func NewLexer(config *Config, rulesFunc func() Rules) (*RegexLexer, error) {
}
r := &RegexLexer{
config: config,
regexTimeout: time.Millisecond * 250,
fetchRulesFunc: func() (Rules, error) { return rulesFunc(), nil },
}
// One-off code to generate XML lexers in the Chroma source tree.
Expand Down Expand Up @@ -261,10 +263,11 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit

// RegexLexer is the default lexer implementation used in Chroma.
type RegexLexer struct {
registry *LexerRegistry // The LexerRegistry this Lexer is associated with, if any.
config *Config
analyser func(text string) float32
trace bool
registry *LexerRegistry // The LexerRegistry this Lexer is associated with, if any.
config *Config
regexTimeout time.Duration
analyser func(text string) float32
trace bool

mu sync.Mutex
compiled bool
Expand All @@ -286,6 +289,12 @@ func (r *RegexLexer) Rules() (Rules, error) {
return r.rawRules, nil
}

// SetTimeout sets the timeout after which the lexer gives up. The default
// timeout is 250 ms. To disable the timeout, set it to zero.
func (r *RegexLexer) SetTimeout(timeout time.Duration) {
r.regexTimeout = timeout
}

// SetRegistry the lexer will use to lookup other lexers if necessary.
func (r *RegexLexer) SetRegistry(registry *LexerRegistry) Lexer {
r.registry = registry
Expand Down Expand Up @@ -334,7 +343,12 @@ func (r *RegexLexer) maybeCompile() (err error) {
if err != nil {
return fmt.Errorf("failed to compile rule %s.%d: %s", state, i, err)
}
rule.Regexp.MatchTimeout = time.Millisecond * 250

timeout := r.regexTimeout
if timeout == 0 {
timeout = time.Duration(math.MaxInt64)
}
rule.Regexp.MatchTimeout = timeout
}
}
}
Expand Down
6 changes: 6 additions & 0 deletions remap.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package chroma

import "time"

type remappingLexer struct {
lexer Lexer
mapper func(Token) []Token
Expand All @@ -24,6 +26,10 @@ func (r *remappingLexer) SetRegistry(registry *LexerRegistry) Lexer {
return r
}

func (r *remappingLexer) SetTimeout(timeout time.Duration) {
// Not implemented
}

func (r *remappingLexer) Config() *Config {
return r.lexer.Config()
}
Expand Down

0 comments on commit 275483e

Please sign in to comment.