Skip to content

Commit

Permalink
Save 5% of process line time by skipping creating a string slice and … (
Browse files Browse the repository at this point in the history
#13)

* Save 5% of process line time by skipping creating a string slice and giving direct access to the indexed results

* Add overall extractor benchmarking

* Fix out of bounds bug
  • Loading branch information
zix99 authored Nov 20, 2019
1 parent 19ac1ef commit 5ce7781
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 43 deletions.
40 changes: 12 additions & 28 deletions pkg/extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@ type BString []byte

// Match is a single given match
type Match struct {
bLine BString // Keep the pointer around next to line
Line string // Unsafe pointer to bLine (no-copy)
Groups []string // Groups of the matched regex expression
Indices []int // match indices as returned by regexp
Extracted string // The extracted expression
LineNumber uint64 // Line number
MatchNumber uint64 // Match number
bLine BString // Keep the pointer around next to line
Line string // Unsafe pointer to bLine (no-copy)
Indices []int // match indices as returned by regexp
Extracted string // The extracted expression
LineNumber uint64 // Line number
MatchNumber uint64 // Match number
}

// Config for the extractor
Expand Down Expand Up @@ -67,20 +66,6 @@ func (s *Extractor) ReadChan() <-chan []Match {
return s.readChan
}

func indexToSlices(s string, indexMatches []int) []string {
strings := make([]string, len(indexMatches)/2)
for i := 0; i < len(indexMatches)/2; i++ {
start := indexMatches[i*2]
end := indexMatches[i*2+1]
if start < 0 || end < 0 {
strings[i] = ""
} else {
strings[i] = s[start:end]
}
}
return strings
}

// async safe
func (s *Extractor) processLineSync(line BString) (Match, bool) {
lineNum := atomic.AddUint64(&s.readLines, 1)
Expand All @@ -93,19 +78,18 @@ func (s *Extractor) processLineSync(line BString) (Match, bool) {
// a string instance, but we can safely point to the existing bytes
// as a pointer instead
lineStringPtr := *(*string)(unsafe.Pointer(&line))
slices := indexToSlices(lineStringPtr, matches)
if s.ignore == nil || !s.ignore.IgnoreMatch(slices...) {
context := expressions.KeyBuilderContextArray{
Elements: slices,
}
extractedKey := s.keyBuilder.BuildKey(&context)
expContext := SliceSpaceExpressionContext{
linePtr: lineStringPtr,
indices: matches,
}
if s.ignore == nil || !s.ignore.IgnoreMatch(&expContext) {
extractedKey := s.keyBuilder.BuildKey(&expContext)

if len(extractedKey) > 0 {
matchNum := atomic.AddUint64(&s.matchedLines, 1)
return Match{
bLine: line,
Line: lineStringPtr,
Groups: slices,
Indices: matches,
Extracted: extractedKey,
LineNumber: lineNum,
Expand Down
8 changes: 2 additions & 6 deletions pkg/extractor/extractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ func TestBasicExtractor(t *testing.T) {

vals := unbatchMatches(ex.ReadChan())
assert.Equal(t, "abc 123", vals[0].Line)
assert.Equal(t, 2, len(vals[0].Groups))
assert.Equal(t, 4, len(vals[0].Indices))
assert.Equal(t, "123", vals[0].Groups[0])
assert.Equal(t, "val:123", vals[0].Extracted)
assert.Equal(t, uint64(1), vals[0].LineNumber)
assert.Equal(t, uint64(1), vals[0].MatchNumber)
Expand All @@ -42,14 +40,12 @@ func TestGH10SliceBoundsPanic(t *testing.T) {
input := ConvertReaderToStringChan(ioutil.NopCloser(strings.NewReader("this is an [ERROR] message")), 1)
ex, err := New(input, &Config{
Regex: `\[(INFO)|(ERROR)|(WARNING)|(CRITICAL)\]`,
Extract: "val:{2}",
Extract: "val:{2} val:{3}",
Workers: 1,
})
assert.NoError(t, err)

vals := unbatchMatches(ex.ReadChan())
assert.Equal(t, "val:ERROR", vals[0].Extracted)
assert.Equal(t, "val:ERROR val:", vals[0].Extracted)
assert.Equal(t, []int{12, 17, -1, -1, 12, 17, -1, -1, -1, -1}, vals[0].Indices)
assert.Equal(t, "ERROR", vals[0].Groups[0])
assert.Equal(t, "ERROR", vals[0].Groups[2])
}
11 changes: 4 additions & 7 deletions pkg/extractor/ignoreset.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
)

type IgnoreSet interface {
IgnoreMatch(matchSet ...string) bool
IgnoreMatch(context expressions.KeyBuilderContext) bool
}

type ExpressionIgnoreSet struct {
Expand All @@ -32,15 +32,12 @@ func NewIgnoreExpressions(expSet ...string) (IgnoreSet, error) {
return igSet, nil
}

func (s *ExpressionIgnoreSet) IgnoreMatch(matchSet ...string) bool {
if len(matchSet) == 0 || len(s.expressions) == 0 {
func (s *ExpressionIgnoreSet) IgnoreMatch(context expressions.KeyBuilderContext) bool {
if len(s.expressions) == 0 {
return false
}
context := expressions.KeyBuilderContextArray{
Elements: matchSet,
}
for _, exp := range s.expressions {
result := strings.TrimSpace(exp.BuildKey(&context))
result := strings.TrimSpace(exp.BuildKey(context))
if expressions.Truthy(result) {
return true
}
Expand Down
11 changes: 9 additions & 2 deletions pkg/extractor/ignoreset_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
package extractor

import (
"rare/pkg/expressions"
"testing"

"github.com/stretchr/testify/assert"
)

func mockArrayContext(elements ...string) expressions.KeyBuilderContext {
return &expressions.KeyBuilderContextArray{
Elements: elements,
}
}

func TestEmptyIgnoreSet(t *testing.T) {
is, err := NewIgnoreExpressions()
assert.NoError(t, err)
Expand All @@ -15,6 +22,6 @@ func TestEmptyIgnoreSet(t *testing.T) {
func TestSimpleIgnoreSet(t *testing.T) {
is, err := NewIgnoreExpressions("{eq {0} ignoreme}")
assert.NoError(t, err)
assert.True(t, is.IgnoreMatch("ignoreme"))
assert.False(t, is.IgnoreMatch("notme"))
assert.True(t, is.IgnoreMatch(mockArrayContext("ignoreme")))
assert.False(t, is.IgnoreMatch(mockArrayContext("notme")))
}
19 changes: 19 additions & 0 deletions pkg/extractor/sliceSpaceExpressionContext.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package extractor

type SliceSpaceExpressionContext struct {
linePtr string
indices []int
}

func (s *SliceSpaceExpressionContext) GetMatch(idx int) string {
sliceIndex := idx * 2
if sliceIndex < 0 || sliceIndex+1 >= len(s.indices) {
return ""
}
start := s.indices[sliceIndex]
end := s.indices[sliceIndex+1]
if start < 0 || end < 0 {
return ""
}
return s.linePtr[start:end]
}
40 changes: 40 additions & 0 deletions pkg/extractor_test/benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package benchmark_test

import (
"rare/pkg/extractor"
"testing"
)

func batchInputGenerator(batches int, batchSize int) <-chan []extractor.BString {
c := make(chan []extractor.BString, 128)
go func() {
for i := 0; i < batches; i++ {
batch := make([]extractor.BString, batchSize)
for j := 0; j < batchSize; j++ {
batch[j] = extractor.BString("abcdefg 123")
}
c <- batch
}
close(c)
}()
return c
}

func BenchmarkExtractor(b *testing.B) {
total := 0
for n := 0; n < b.N; n++ {
gen := batchInputGenerator(10000, 100)
extractor, _ := extractor.New(gen, &extractor.Config{
Regex: `(\d{3})`,
Extract: "{bucket {1} 10}",
Workers: 2,
})
reader := extractor.ReadChan()
for val := range reader {
total++
if val[0].Extracted != "120" {
panic("NO MATCH")
}
} // Drain reader
}
}

0 comments on commit 5ce7781

Please sign in to comment.