Skip to content

Commit

Permalink
Add a common regexp cache
Browse files Browse the repository at this point in the history
```
BenchmarkGetOrCompileRegexp-10    	73959368	        13.71 ns/op	       0 B/op	       0 allocs/op
BenchmarkCompileRegexp-10         	 3143529	       380.1 ns/op	     872 B/op	      10 allocs/op
```
  • Loading branch information
bep committed Jul 27, 2023
1 parent 7f058b8 commit 4d7af75
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 43 deletions.
44 changes: 44 additions & 0 deletions common/hstrings/strings.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ package hstrings

import (
"fmt"
"regexp"
"strings"
"sync"

"github.com/gohugoio/hugo/compare"
)
Expand Down Expand Up @@ -55,3 +57,45 @@ func EqualAny(a string, b ...string) bool {
}
return false
}

// regexpCache represents a cache of regexp objects protected by a mutex.
type regexpCache struct {
mu sync.RWMutex
re map[string]*regexp.Regexp
}

func (rc *regexpCache) getOrCompileRegexp(pattern string) (re *regexp.Regexp, err error) {
var ok bool

if re, ok = rc.get(pattern); !ok {
re, err = regexp.Compile(pattern)
if err != nil {
return nil, err
}
rc.set(pattern, re)
}

return re, nil
}

func (rc *regexpCache) get(key string) (re *regexp.Regexp, ok bool) {
rc.mu.RLock()
re, ok = rc.re[key]
rc.mu.RUnlock()
return
}

func (rc *regexpCache) set(key string, re *regexp.Regexp) {
rc.mu.Lock()
rc.re[key] = re
rc.mu.Unlock()
}

var reCache = regexpCache{re: make(map[string]*regexp.Regexp)}

// GetOrCompileRegexp retrieves a regexp object from the cache based upon the pattern.
// If the pattern is not found in the cache, the pattern is compiled and added to
// the cache.
func GetOrCompileRegexp(pattern string) (re *regexp.Regexp, err error) {
return reCache.getOrCompileRegexp(pattern)
}
22 changes: 22 additions & 0 deletions common/hstrings/strings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package hstrings

import (
"regexp"
"testing"

qt "github.com/frankban/quicktest"
Expand All @@ -34,3 +35,24 @@ func TestStringEqualFold(t *testing.T) {
c.Assert(StringEqualFold(s1).Eq("b"), qt.Equals, false)

}

func TestGetOrCompileRegexp(t *testing.T) {
c := qt.New(t)

re, err := GetOrCompileRegexp(`\d+`)
c.Assert(err, qt.IsNil)
c.Assert(re.MatchString("123"), qt.Equals, true)

}

func BenchmarkGetOrCompileRegexp(b *testing.B) {
for i := 0; i < b.N; i++ {
GetOrCompileRegexp(`\d+`)
}
}

func BenchmarkCompileRegexp(b *testing.B) {
for i := 0; i < b.N; i++ {
regexp.MustCompile(`\d+`)
}
}
47 changes: 4 additions & 43 deletions tpl/strings/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,14 @@
package strings

import (
"regexp"
"sync"

"github.com/gohugoio/hugo/common/hstrings"
"github.com/spf13/cast"
)

// FindRE returns a list of strings that match the regular expression. By default all matches
// will be included. The number of matches can be limited with an optional third parameter.
func (ns *Namespace) FindRE(expr string, content any, limit ...any) ([]string, error) {
re, err := reCache.Get(expr)
re, err := hstrings.GetOrCompileRegexp(expr)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -54,7 +52,7 @@ func (ns *Namespace) FindRE(expr string, content any, limit ...any) ([]string, e
// limited with the optional limit parameter. A return value of nil indicates
// no match.
func (ns *Namespace) FindRESubmatch(expr string, content any, limit ...any) ([][]string, error) {
re, err := reCache.Get(expr)
re, err := hstrings.GetOrCompileRegexp(expr)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -102,7 +100,7 @@ func (ns *Namespace) ReplaceRE(pattern, repl, s any, n ...any) (_ string, err er
}
}

re, err := reCache.Get(sp)
re, err := hstrings.GetOrCompileRegexp(sp)
if err != nil {
return "", err
}
Expand All @@ -116,40 +114,3 @@ func (ns *Namespace) ReplaceRE(pattern, repl, s any, n ...any) (_ string, err er
return re.ReplaceAllString(str, sr)
}), nil
}

// regexpCache represents a cache of regexp objects protected by a mutex.
type regexpCache struct {
mu sync.RWMutex
re map[string]*regexp.Regexp
}

// Get retrieves a regexp object from the cache based upon the pattern.
// If the pattern is not found in the cache, create one
func (rc *regexpCache) Get(pattern string) (re *regexp.Regexp, err error) {
var ok bool

if re, ok = rc.get(pattern); !ok {
re, err = regexp.Compile(pattern)
if err != nil {
return nil, err
}
rc.set(pattern, re)
}

return re, nil
}

func (rc *regexpCache) get(key string) (re *regexp.Regexp, ok bool) {
rc.mu.RLock()
re, ok = rc.re[key]
rc.mu.RUnlock()
return
}

func (rc *regexpCache) set(key string, re *regexp.Regexp) {
rc.mu.Lock()
rc.re[key] = re
rc.mu.Unlock()
}

var reCache = regexpCache{re: make(map[string]*regexp.Regexp)}

0 comments on commit 4d7af75

Please sign in to comment.