Skip to content

Commit

Permalink
topdown: Adding cap to caches for regex and glob built-in functions (o…
Browse files Browse the repository at this point in the history
…pen-policy-agent#6846)

Fixing possible memory leak where caches grow uncontrollably when large amounts of regexes or globs are generated or originate from the input document.

Fixes: open-policy-agent#6828

Signed-off-by: Johan Fylling <johan.dev@fylling.se>
  • Loading branch information
johanfylling authored Jul 5, 2024
1 parent 8d2b2a8 commit 34349c5
Show file tree
Hide file tree
Showing 12 changed files with 439 additions and 1 deletion.
9 changes: 8 additions & 1 deletion internal/compiler/wasm/opa/callgraph.csv

Large diffs are not rendered by default.

Binary file modified internal/compiler/wasm/opa/opa.wasm
Binary file not shown.
9 changes: 9 additions & 0 deletions topdown/glob.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"github.com/open-policy-agent/opa/topdown/builtins"
)

const globCacheMaxSize = 100

var globCacheLock = sync.Mutex{}
var globCache map[string]glob.Glob

Expand Down Expand Up @@ -64,6 +66,13 @@ func globCompileAndMatch(id, pattern, match string, delimiters []rune) (bool, er
if p, err = glob.Compile(pattern, delimiters...); err != nil {
return false, err
}
if len(globCache) >= globCacheMaxSize {
// Delete a (semi-)random key to make room for the new one.
for k := range globCache {
delete(globCache, k)
break
}
}
globCache[id] = p
}
out := p.Match(match)
Expand Down
100 changes: 100 additions & 0 deletions topdown/glob_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright 2024 The OPA Authors. All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.

package topdown

import (
"fmt"
"sync"
"testing"

"github.com/gobwas/glob"
"github.com/open-policy-agent/opa/ast"
)

func BenchmarkBuiltinGlobMatch(b *testing.B) {
iter := func(*ast.Term) error { return nil }
ctx := BuiltinContext{}

for _, reusePattern := range []bool{true, false} {
for _, patternCount := range []int{10, 100, 1000} {
b.Run(fmt.Sprintf("reuse-pattern=%v, pattern-count=%d", reusePattern, patternCount), func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
// Clearing the cache
globCache = make(map[string]glob.Glob)

for i := 0; i < patternCount; i++ {
var operands []*ast.Term
if reusePattern {
operands = []*ast.Term{
ast.NewTerm(ast.String("foo/*")),
ast.NullTerm(),
ast.NewTerm(ast.String("foo/bar")),
}
} else {
operands = []*ast.Term{
ast.NewTerm(ast.String(fmt.Sprintf("foo/*/%d", i))),
ast.NullTerm(),
ast.NewTerm(ast.String(fmt.Sprintf("foo/bar/%d", i))),
}
}
if err := builtinGlobMatch(ctx, operands, iter); err != nil {
b.Fatal(err)
}
}
}
})
}
}
}

func BenchmarkBuiltinGlobMatchAsync(b *testing.B) {
iter := func(*ast.Term) error { return nil }
ctx := BuiltinContext{}

for _, reusePattern := range []bool{true, false} {
for _, clientCount := range []int{100, 200} {
for _, patternCount := range []int{10, 100, 1000} {
b.Run(fmt.Sprintf("reuse-pattern=%v, clients=%d, pattern-count=%d", reusePattern, clientCount, patternCount), func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
// Clearing the cache
globCache = make(map[string]glob.Glob)

wg := sync.WaitGroup{}
for i := 0; i < clientCount; i++ {
clientID := i
wg.Add(1)
go func() {
for j := 0; j < patternCount; j++ {
var operands []*ast.Term
if reusePattern {
operands = []*ast.Term{
ast.NewTerm(ast.String("foo/*")),
ast.NullTerm(),
ast.NewTerm(ast.String("foo/bar")),
}
} else {
operands = []*ast.Term{
ast.NewTerm(ast.String(fmt.Sprintf("foo/*/%d/%d", clientID, j))),
ast.NullTerm(),
ast.NewTerm(ast.String(fmt.Sprintf("foo/bar/%d/%d", clientID, j))),
}
}
if err := builtinGlobMatch(ctx, operands, iter); err != nil {
b.Error(err)
return
}
}
wg.Done()
}()
}
wg.Wait()
}
})
}
}
}
}
71 changes: 71 additions & 0 deletions topdown/glob_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright 2024 The OPA Authors. All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.

package topdown

import (
"fmt"
"testing"

"github.com/open-policy-agent/opa/ast"
)

func TestGlobBuiltinCache(t *testing.T) {
ctx := BuiltinContext{}
iter := func(*ast.Term) error { return nil }

// A novel glob pattern is cached.
glob1 := "foo/*"
operands := []*ast.Term{
ast.NewTerm(ast.String(glob1)),
ast.NullTerm(),
ast.NewTerm(ast.String("foo/bar")),
}
err := builtinGlobMatch(ctx, operands, iter)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}

// the glob id will have a trailing '-' rune.
if _, ok := globCache[fmt.Sprintf("%s-", glob1)]; !ok {
t.Fatalf("Expected glob to be cached: %v", glob1)
}

// Fill up the cache.
for i := 0; i < regexCacheMaxSize-1; i++ {
operands := []*ast.Term{
ast.NewTerm(ast.String(fmt.Sprintf("foo/%d/*", i))),
ast.NullTerm(),
ast.NewTerm(ast.String(fmt.Sprintf("foo/%d/bar", i))),
}
err := builtinGlobMatch(ctx, operands, iter)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
}

if len(globCache) != regexCacheMaxSize {
t.Fatalf("Expected cache to be full")
}

// A new glob pattern is cached and a random pattern is evicted.
glob2 := "bar/*"
operands = []*ast.Term{
ast.NewTerm(ast.String(glob2)),
ast.NullTerm(),
ast.NewTerm(ast.String("bar/baz")),
}
err = builtinGlobMatch(ctx, operands, iter)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}

if len(globCache) != regexCacheMaxSize {
t.Fatalf("Expected cache be capped at %d, was %d", regexCacheMaxSize, len(globCache))
}

if _, ok := globCache[fmt.Sprintf("%s-", glob2)]; !ok {
t.Fatalf("Expected glob to be cached: %v", glob2)
}
}
9 changes: 9 additions & 0 deletions topdown/regex.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (
"github.com/open-policy-agent/opa/topdown/builtins"
)

const regexCacheMaxSize = 100

var regexpCacheLock = sync.Mutex{}
var regexpCache map[string]*regexp.Regexp

Expand Down Expand Up @@ -111,6 +113,13 @@ func getRegexp(pat string) (*regexp.Regexp, error) {
if err != nil {
return nil, err
}
if len(regexpCache) >= regexCacheMaxSize {
// Delete a (semi-)random key to make room for the new one.
for k := range regexpCache {
delete(regexpCache, k)
break
}
}
regexpCache[pat] = re
}
return re, nil
Expand Down
96 changes: 96 additions & 0 deletions topdown/regex_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2024 The OPA Authors. All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.

package topdown

import (
"fmt"
"regexp"
"sync"
"testing"

"github.com/open-policy-agent/opa/ast"
)

func BenchmarkBuiltinRegexMatch(b *testing.B) {
iter := func(*ast.Term) error { return nil }
ctx := BuiltinContext{}

for _, reusePattern := range []bool{true, false} {
for _, patternCount := range []int{10, 100, 1000} {
b.Run(fmt.Sprintf("reuse-pattern=%v, pattern-count=%d", reusePattern, patternCount), func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
// Clearing the cache
regexpCache = make(map[string]*regexp.Regexp)

for i := 0; i < patternCount; i++ {
var operands []*ast.Term
if reusePattern {
operands = []*ast.Term{
ast.NewTerm(ast.String("foo.*")),
ast.NewTerm(ast.String("foobar")),
}
} else {
operands = []*ast.Term{
ast.NewTerm(ast.String(fmt.Sprintf("foo%d.*", i))),
ast.NewTerm(ast.String(fmt.Sprintf("foo%dbar", i))),
}
}
if err := builtinRegexMatch(ctx, operands, iter); err != nil {
b.Fatal(err)
}
}
}
})
}
}
}

func BenchmarkBuiltinRegexMatchAsync(b *testing.B) {
iter := func(*ast.Term) error { return nil }
ctx := BuiltinContext{}

for _, reusePattern := range []bool{true, false} {
for _, clientCount := range []int{100, 200} {
for _, patternCount := range []int{10, 100, 1000} {
b.Run(fmt.Sprintf("reuse-pattern=%v, clients=%d, pattern-count=%d", reusePattern, clientCount, patternCount), func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
// Clearing the cache
regexpCache = make(map[string]*regexp.Regexp)

wg := sync.WaitGroup{}
for i := 0; i < clientCount; i++ {
clientID := i
wg.Add(1)
go func() {
for j := 0; j < patternCount; j++ {
var operands []*ast.Term
if reusePattern {
operands = []*ast.Term{
ast.NewTerm(ast.String("foo.*")),
ast.NewTerm(ast.String("foobar")),
}
} else {
operands = []*ast.Term{
ast.NewTerm(ast.String(fmt.Sprintf("foo%d_%d.*", clientID, j))),
ast.NewTerm(ast.String(fmt.Sprintf("foo%d_%dbar", clientID, j))),
}
}
if err := builtinRegexMatch(ctx, operands, iter); err != nil {
b.Error(err)
return
}
}
wg.Done()
}()
}
wg.Wait()
}
})
}
}
}
}
67 changes: 67 additions & 0 deletions topdown/regex_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2024 The OPA Authors. All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.

package topdown

import (
"fmt"
"testing"

"github.com/open-policy-agent/opa/ast"
)

func TestRegexBuiltinCache(t *testing.T) {
ctx := BuiltinContext{}
iter := func(*ast.Term) error { return nil }

// A novel regex pattern is cached.
regex1 := "foo.*"
operands := []*ast.Term{
ast.NewTerm(ast.String(regex1)),
ast.NewTerm(ast.String("foobar")),
}
err := builtinRegexMatch(ctx, operands, iter)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}

if _, ok := regexpCache[regex1]; !ok {
t.Fatalf("Expected regex to be cached: %v", regex1)
}

// Fill up the cache.
for i := 0; i < regexCacheMaxSize-1; i++ {
operands := []*ast.Term{
ast.NewTerm(ast.String(fmt.Sprintf("foo%d.*", i))),
ast.NewTerm(ast.String(fmt.Sprintf("foo%dbar", i))),
}
err := builtinRegexMatch(ctx, operands, iter)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
}

if len(regexpCache) != regexCacheMaxSize {
t.Fatalf("Expected cache to be full")
}

// A new regex pattern is cached and a random pattern is evicted.
regex2 := "bar.*"
operands = []*ast.Term{
ast.NewTerm(ast.String(regex2)),
ast.NewTerm(ast.String("barbaz")),
}
err = builtinRegexMatch(ctx, operands, iter)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}

if len(regexpCache) != regexCacheMaxSize {
t.Fatalf("Expected cache be capped at %d, was %d", regexCacheMaxSize, len(regexpCache))
}

if _, ok := regexpCache[regex2]; !ok {
t.Fatalf("Expected regex to be cached: %v", regex2)
}
}
Loading

0 comments on commit 34349c5

Please sign in to comment.