Skip to content

Commit

Permalink
[dbnode] Add configurability for regexp DFA and FSA limits (#2926)
Browse files Browse the repository at this point in the history
  • Loading branch information
robskillington authored Nov 19, 2020
1 parent c6fe28d commit 518d130
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 4 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ require (
github.com/m3db/stackmurmur3/v2 v2.0.2
github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878
github.com/m3dbx/pilosa v1.4.1
github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de
github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69
github.com/mauricelam/genny v0.0.0-20180903214747-eb2c5232c885
github.com/mjibson/esc v0.1.0
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -556,8 +556,8 @@ github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878 h1:kww0LtVVfGrXR7Ofpbi/
github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878/go.mod h1:TxroQUZzb1wzOsq+4+TfVtT7z89YTz3v2UJAYfLNfLE=
github.com/m3dbx/pilosa v1.4.1 h1:/Cpp1XAHSd6orpjceXGiKpCoDdYBP5BD/6NoqGG9eVg=
github.com/m3dbx/pilosa v1.4.1/go.mod h1:Jt0+w9O08sa7qWDeRC58VBjb4OeOTDMOhfvVmyeVCO8=
github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de h1:C4DpCfTNzJf5RhJqxOtfWAnD2d6ls7KDnK1boBGUnVg=
github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de/go.mod h1:DOTAUfV4bzK6Nrb0dboT/oCG0DnQuX+/n0jfZPh6xxI=
github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69 h1:dANuca0xuYlZR7qWdPIIAZKG0YHvsbLTzdenj53yQvc=
github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69/go.mod h1:DOTAUfV4bzK6Nrb0dboT/oCG0DnQuX+/n0jfZPh6xxI=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
Expand Down
27 changes: 27 additions & 0 deletions src/cmd/services/m3dbnode/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
xlog "github.com/m3db/m3/src/x/log"
"github.com/m3db/m3/src/x/opentracing"

"github.com/m3dbx/vellum/regexp"
"go.etcd.io/etcd/embed"
"go.etcd.io/etcd/pkg/transport"
"go.etcd.io/etcd/pkg/types"
Expand Down Expand Up @@ -379,6 +380,14 @@ type IndexConfiguration struct {
// as they are very CPU-intensive (regex and FST matching).
MaxQueryIDsConcurrency int `yaml:"maxQueryIDsConcurrency" validate:"min=0"`

// RegexpDFALimit is the limit on the max number of states used by a
// regexp deterministic finite automaton. Default is 10,000 states.
RegexpDFALimit *int `yaml:"regexpDFALimit"`

// RegexpFSALimit is the limit on the max number of bytes used by the
// finite state automaton. Default is 10mb (10 million as int).
RegexpFSALimit *uint `yaml:"regexpFSALimit"`

// ForwardIndexProbability determines the likelihood that an incoming write is
// written to the next block, when arriving close to the block boundary.
//
Expand All @@ -396,6 +405,24 @@ type IndexConfiguration struct {
ForwardIndexThreshold float64 `yaml:"forwardIndexThreshold" validate:"min=0.0,max=1.0"`
}

// RegexpDFALimitOrDefault returns the deterministic finite automaton states
// limit or default.
func (c IndexConfiguration) RegexpDFALimitOrDefault() int {
if c.RegexpDFALimit == nil {
return regexp.StateLimit()
}
return *c.RegexpDFALimit
}

// RegexpFSALimitOrDefault returns the finite state automaton size
// limit or default.
func (c IndexConfiguration) RegexpFSALimitOrDefault() uint {
if c.RegexpFSALimit == nil {
return regexp.DefaultLimit()
}
return *c.RegexpFSALimit
}

// TransformConfiguration contains configuration options that can transform
// incoming writes.
type TransformConfiguration struct {
Expand Down
2 changes: 2 additions & 0 deletions src/cmd/services/m3dbnode/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ func TestConfiguration(t *testing.T) {
expected := `db:
index:
maxQueryIDsConcurrency: 0
regexpDFALimit: null
regexpFSALimit: null
forwardIndexProbability: 0
forwardIndexThreshold: 0
transforms:
Expand Down
13 changes: 13 additions & 0 deletions src/dbnode/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ import (
xsync "github.com/m3db/m3/src/x/sync"

apachethrift "github.com/apache/thrift/lib/go/thrift"
"github.com/m3dbx/vellum/levenshtein"
"github.com/m3dbx/vellum/levenshtein2"
"github.com/m3dbx/vellum/regexp"
opentracing "github.com/opentracing/opentracing-go"
"github.com/uber-go/tally"
"github.com/uber/tchannel-go"
Expand Down Expand Up @@ -371,6 +374,16 @@ func Run(runOpts RunOptions) {
logger.Warn("max index query IDs concurrency was not set, falling back to default value")
}

// Set global index options.
if n := cfg.Index.RegexpDFALimitOrDefault(); n > 0 {
regexp.SetStateLimit(n)
levenshtein.SetStateLimit(n)
levenshtein2.SetStateLimit(n)
}
if n := cfg.Index.RegexpFSALimitOrDefault(); n > 0 {
regexp.SetDefaultLimit(n)
}

buildReporter := instrument.NewBuildReporter(iOpts)
if err := buildReporter.Start(); err != nil {
logger.Fatal("unable to start build reporter", zap.Error(err))
Expand Down
2 changes: 1 addition & 1 deletion src/m3ninx/index/segment/fst/regexp/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func ParseRegexp(pattern string) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte
// ParsedRegexp uses the pre-parsed regexp pattern and creates an equivalent matching automaton, and
// corresponding keys to bound prefix beginning and end during the FST search.
func ParsedRegexp(pattern string, parsed *syntax.Regexp) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit)
re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit())
if err != nil {
return nil, nil, nil, err
}
Expand Down

0 comments on commit 518d130

Please sign in to comment.