Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kaizen: Increase throughput with flexible FA traversal #332

Merged
merged 2 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:

# Alert on regression
alert-threshold: "120%"
fail-on-alert: true
fail-on-alert: false
comment-on-alert: false

# Disable github pages, for now.
Expand Down
5 changes: 0 additions & 5 deletions match_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ func (m *matchSet) addXSingleThreaded(exes ...X) *matchSet {
return m
}

func (m *matchSet) contains(x X) bool {
_, ok := m.set[x]
return ok
}

func (m *matchSet) matches() []X {
matches := make([]X, 0, len(m.set))
for x := range m.set {
Expand Down
5 changes: 5 additions & 0 deletions match_set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ func TestAddXSingleThreaded(t *testing.T) {
}
}

func (m *matchSet) contains(x X) bool {
_, ok := m.set[x]
return ok
}

func isSameMatches(matchSet *matchSet, exes ...X) bool {
if len(exes) == 0 && len(matchSet.matches()) == 0 {
return true
Expand Down
24 changes: 23 additions & 1 deletion nfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,29 @@ func (tm *transmap) all() []*fieldMatcher {
return all
}

func traverseFA(table *smallTable, val []byte, transitions []*fieldMatcher, bufs *bufpair) []*fieldMatcher {
// While some Quamina patterns require the use of NFAs, many (most?) don't, and while we're still using a
// NFA-capable data structure, we can traverse it deterministically if we know in advance that every
// combination of an faState with a byte will transition to at most one other faState.

func traverseDFA(table *smallTable, val []byte, transitions []*fieldMatcher) []*fieldMatcher {
for index := 0; index <= len(val); index++ {
var utf8Byte byte
if index < len(val) {
utf8Byte = val[index]
} else {
utf8Byte = valueTerminator
}
next := table.dStep(utf8Byte)
if next == nil {
break
}
transitions = append(transitions, next.fieldTransitions...)
table = next.table
}
return transitions
}

func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, bufs *bufpair) []*fieldMatcher {
currentStates := bufs.buf1
currentStates = append(currentStates, &faState{table: table})
nextStates := bufs.buf2
Expand Down
4 changes: 2 additions & 2 deletions shell_style_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ func TestMakeShellStyleFA(t *testing.T) {
var bufs bufpair
for _, should := range shouldsForPatterns[i] {
var transitions []*fieldMatcher
gotTrans := traverseFA(a, []byte(should), transitions, &bufs)
gotTrans := traverseNFA(a, []byte(should), transitions, &bufs)
if len(gotTrans) != 1 || gotTrans[0] != wanted {
t.Errorf("Failure for %s on %s", pattern, should)
}
}
for _, shouldNot := range shouldNotForPatterns[i] {
var transitions []*fieldMatcher
gotTrans := traverseFA(a, []byte(shouldNot), transitions, &bufs)
gotTrans := traverseNFA(a, []byte(shouldNot), transitions, &bufs)
if gotTrans != nil {
t.Errorf("bogus match for %s on %s", pattern, shouldNot)
}
Expand Down
16 changes: 16 additions & 0 deletions small_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,22 @@ func (t *smallTable) step(utf8Byte byte, out *stepOut) {
panic("Malformed smallTable")
}

// dStep takes a step through an NFA in the case where it is known that the NFA in question
// is deterministic, i.e. each combination of an faState and a byte value transitions to at
// most one other byte value.
func (t *smallTable) dStep(utf8Byte byte) *faState {
for index, ceiling := range t.ceilings {
if utf8Byte < ceiling {
if t.steps[index] == nil {
return nil
} else {
return t.steps[index].states[0]
}
}
}
panic("Malformed smallTable")
}

// makeSmallTable creates a pre-loaded small table, with all bytes not otherwise specified having the defaultStep
// value, and then a few other values with their indexes and values specified in the other two arguments. The
// goal is to reduce memory churn
Expand Down
18 changes: 15 additions & 3 deletions value_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type vmFields struct {
singletonMatch []byte
singletonTransition *fieldMatcher
hasQNumbers bool
isNondeterministic bool
}

func (m *valueMatcher) fields() *vmFields {
Expand Down Expand Up @@ -71,14 +72,22 @@ func (m *valueMatcher) transitionOn(eventField *Field, bufs *bufpair) []*fieldMa
case vmFields.startTable != nil:
// if there is a potential for a numeric match, try making a Q number from the event
if vmFields.hasQNumbers && eventField.IsQNumber {
qNumber, err := qNumFromBytes(val)
qNum, err := qNumFromBytes(val)
if err == nil {
return traverseFA(vmFields.startTable, qNumber, transitions, bufs)
if vmFields.isNondeterministic {
return traverseNFA(vmFields.startTable, qNum, transitions, bufs)
} else {
return traverseDFA(vmFields.startTable, qNum, transitions)
}
}
}

// if it doesn't work as a Q number for some reason, go ahead and compare the string values
return traverseFA(vmFields.startTable, val, transitions, bufs)
if vmFields.isNondeterministic {
return traverseNFA(vmFields.startTable, val, transitions, bufs)
} else {
return traverseDFA(vmFields.startTable, val, transitions)
}

default:
// no FA, no singleton, nothing to do, this probably can't happen because a flattener
Expand Down Expand Up @@ -108,6 +117,7 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
newFA, nextField = makeMultiAnythingButFA(val.list)
case shellStyleType:
newFA, nextField = makeShellStyleFA(valBytes, printer)
fields.isNondeterministic = true
case prefixType:
newFA, nextField = makePrefixFA(valBytes)
default:
Expand Down Expand Up @@ -150,6 +160,7 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
case shellStyleType:
newAutomaton, nextField := makeShellStyleFA(valBytes, printer)
fields.startTable = newAutomaton
fields.isNondeterministic = true
m.update(fields)
return nextField
case prefixType:
Expand Down Expand Up @@ -187,6 +198,7 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
newFA, nextField = makeMultiAnythingButFA(val.list)
case shellStyleType:
newFA, nextField = makeShellStyleFA(valBytes, printer)
fields.isNondeterministic = true
case prefixType:
newFA, nextField = makePrefixFA(valBytes)
default:
Expand Down