From 518d1309be20804368b705a306cb193c1f7654d1 Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Thu, 19 Nov 2020 04:37:27 -0500 Subject: [PATCH] [dbnode] Add configurability for regexp DFA and FSA limits (#2926) --- go.mod | 2 +- go.sum | 4 +-- src/cmd/services/m3dbnode/config/config.go | 27 +++++++++++++++++++ .../services/m3dbnode/config/config_test.go | 2 ++ src/dbnode/server/server.go | 13 +++++++++ src/m3ninx/index/segment/fst/regexp/regexp.go | 2 +- 6 files changed, 46 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index f9daf5794a..8fa319d98c 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,7 @@ require ( github.com/m3db/stackmurmur3/v2 v2.0.2 github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878 github.com/m3dbx/pilosa v1.4.1 - github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de + github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69 github.com/mauricelam/genny v0.0.0-20180903214747-eb2c5232c885 github.com/mjibson/esc v0.1.0 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect diff --git a/go.sum b/go.sum index 40f7e67334..9ea86511f2 100644 --- a/go.sum +++ b/go.sum @@ -556,8 +556,8 @@ github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878 h1:kww0LtVVfGrXR7Ofpbi/ github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878/go.mod h1:TxroQUZzb1wzOsq+4+TfVtT7z89YTz3v2UJAYfLNfLE= github.com/m3dbx/pilosa v1.4.1 h1:/Cpp1XAHSd6orpjceXGiKpCoDdYBP5BD/6NoqGG9eVg= github.com/m3dbx/pilosa v1.4.1/go.mod h1:Jt0+w9O08sa7qWDeRC58VBjb4OeOTDMOhfvVmyeVCO8= -github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de h1:C4DpCfTNzJf5RhJqxOtfWAnD2d6ls7KDnK1boBGUnVg= -github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de/go.mod h1:DOTAUfV4bzK6Nrb0dboT/oCG0DnQuX+/n0jfZPh6xxI= +github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69 h1:dANuca0xuYlZR7qWdPIIAZKG0YHvsbLTzdenj53yQvc= +github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69/go.mod h1:DOTAUfV4bzK6Nrb0dboT/oCG0DnQuX+/n0jfZPh6xxI= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= diff --git a/src/cmd/services/m3dbnode/config/config.go b/src/cmd/services/m3dbnode/config/config.go index 2a574b9ce3..b1b3b8332d 100644 --- a/src/cmd/services/m3dbnode/config/config.go +++ b/src/cmd/services/m3dbnode/config/config.go @@ -40,6 +40,7 @@ import ( xlog "github.com/m3db/m3/src/x/log" "github.com/m3db/m3/src/x/opentracing" + "github.com/m3dbx/vellum/regexp" "go.etcd.io/etcd/embed" "go.etcd.io/etcd/pkg/transport" "go.etcd.io/etcd/pkg/types" @@ -379,6 +380,14 @@ type IndexConfiguration struct { // as they are very CPU-intensive (regex and FST matching). MaxQueryIDsConcurrency int `yaml:"maxQueryIDsConcurrency" validate:"min=0"` + // RegexpDFALimit is the limit on the max number of states used by a + // regexp deterministic finite automaton. Default is 10,000 states. + RegexpDFALimit *int `yaml:"regexpDFALimit"` + + // RegexpFSALimit is the limit on the max number of bytes used by the + // finite state automaton. Default is 10mb (10 million as int). + RegexpFSALimit *uint `yaml:"regexpFSALimit"` + // ForwardIndexProbability determines the likelihood that an incoming write is // written to the next block, when arriving close to the block boundary. // @@ -396,6 +405,24 @@ type IndexConfiguration struct { ForwardIndexThreshold float64 `yaml:"forwardIndexThreshold" validate:"min=0.0,max=1.0"` } +// RegexpDFALimitOrDefault returns the deterministic finite automaton states +// limit or default. +func (c IndexConfiguration) RegexpDFALimitOrDefault() int { + if c.RegexpDFALimit == nil { + return regexp.StateLimit() + } + return *c.RegexpDFALimit +} + +// RegexpFSALimitOrDefault returns the finite state automaton size +// limit or default. +func (c IndexConfiguration) RegexpFSALimitOrDefault() uint { + if c.RegexpFSALimit == nil { + return regexp.DefaultLimit() + } + return *c.RegexpFSALimit +} + // TransformConfiguration contains configuration options that can transform // incoming writes. type TransformConfiguration struct { diff --git a/src/cmd/services/m3dbnode/config/config_test.go b/src/cmd/services/m3dbnode/config/config_test.go index c41a099429..5beeb0a5e7 100644 --- a/src/cmd/services/m3dbnode/config/config_test.go +++ b/src/cmd/services/m3dbnode/config/config_test.go @@ -337,6 +337,8 @@ func TestConfiguration(t *testing.T) { expected := `db: index: maxQueryIDsConcurrency: 0 + regexpDFALimit: null + regexpFSALimit: null forwardIndexProbability: 0 forwardIndexThreshold: 0 transforms: diff --git a/src/dbnode/server/server.go b/src/dbnode/server/server.go index b33b73f40e..72422ee912 100644 --- a/src/dbnode/server/server.go +++ b/src/dbnode/server/server.go @@ -91,6 +91,9 @@ import ( xsync "github.com/m3db/m3/src/x/sync" apachethrift "github.com/apache/thrift/lib/go/thrift" + "github.com/m3dbx/vellum/levenshtein" + "github.com/m3dbx/vellum/levenshtein2" + "github.com/m3dbx/vellum/regexp" opentracing "github.com/opentracing/opentracing-go" "github.com/uber-go/tally" "github.com/uber/tchannel-go" @@ -371,6 +374,16 @@ func Run(runOpts RunOptions) { logger.Warn("max index query IDs concurrency was not set, falling back to default value") } + // Set global index options. + if n := cfg.Index.RegexpDFALimitOrDefault(); n > 0 { + regexp.SetStateLimit(n) + levenshtein.SetStateLimit(n) + levenshtein2.SetStateLimit(n) + } + if n := cfg.Index.RegexpFSALimitOrDefault(); n > 0 { + regexp.SetDefaultLimit(n) + } + buildReporter := instrument.NewBuildReporter(iOpts) if err := buildReporter.Start(); err != nil { logger.Fatal("unable to start build reporter", zap.Error(err)) diff --git a/src/m3ninx/index/segment/fst/regexp/regexp.go b/src/m3ninx/index/segment/fst/regexp/regexp.go index cd21d99f9f..e382d09366 100644 --- a/src/m3ninx/index/segment/fst/regexp/regexp.go +++ b/src/m3ninx/index/segment/fst/regexp/regexp.go @@ -41,7 +41,7 @@ func ParseRegexp(pattern string) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte // ParsedRegexp uses the pre-parsed regexp pattern and creates an equivalent matching automaton, and // corresponding keys to bound prefix beginning and end during the FST search. func ParsedRegexp(pattern string, parsed *syntax.Regexp) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte, err error) { - re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit) + re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit()) if err != nil { return nil, nil, nil, err }