Skip to content

Commit

Permalink
perf: Limit tokens per pattern to 128 (#13376)
Browse files Browse the repository at this point in the history
  • Loading branch information
benclive authored Jul 3, 2024
1 parent 11e1976 commit a1efafd
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 18 deletions.
9 changes: 4 additions & 5 deletions pkg/pattern/drain/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/<_><_><_>/<_><_><_>.<_> op=CREATE`,
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/<_><_><_>/<_><_><_>.<_>.<_> op=CREATE`,
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/hosted-grafana_.<_>/<_>/0.log.<_>.<_> op=CREATE`,
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:<_> level=info component=logs logs_config=default msg="<_> target" key="/var/log/pods/*<_>/<_>/*.log:{app=\"grafana\", conprof=\"true\", container=\"<_>\", instanceId=\"<_>\", job=\"hosted-grafana/grafana\", name=\"grafana\", namespace=\"hosted-grafana\", org=\"<_>\", plan=\"free\", pod=\"<_>\", pod_template_hash=\"<_>\", resource_version=\"<_>\", slug=\"<_>\", stackId=\"<_>\"}"`,
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:<_> level=info component=logs logs_config=default msg="<_> target" key="/var/log/pods/*<_>/<_>/*.log:{app=\"grafana\", conprof=\"true\", container=\"<_>\", instanceId=\"<_>\", job=\"hosted-grafana/grafana\", name=\"grafana\", namespace=\"hosted-grafana\", org=\"<_>\", plan=\"free\", pod=<_>`,
`ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Re-opening moved/deleted file /var/log/pods/<_>/<_>/<_>.log ..."`,
`ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Re-opening moved/deleted file /var/log/pods/hosted-grafana_.<_>/<_>/0.log ..."`,
`ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Seeked /var/log/pods/<_>/<_>/0.log - &{Offset:0 Whence:0}"`,
Expand Down Expand Up @@ -118,12 +118,12 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with CreateContainerConfigError: \"secret \\\"<_>\\\" not found\"" pod="<_>/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with ImagePullBackOff: \"Back-off pulling image \\\"us.gcr.io/hosted-grafana/<_>:<_>.<_>.<_>\\\"\"" pod="<_>/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with ImagePullBackOff: \"Back-off pulling image \\\"us.gcr.io/kubernetes-dev/<_>:<_>\\\"\"" pod="<_>/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"grafana\" with ErrImagePull: \"[rpc error: code = NotFound desc = failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not found, failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden]\"" pod="hosted-grafana/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"grafana\" with ErrImagePull: \"[rpc error: code = NotFound desc = failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not <_>`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"pdc\" with ErrImageNeverPull: \"Container image \\\"us.gcr.io/hosted-grafana/pdc:0.1.415\\\" is not present with pull policy of Never\"" pod="pdc/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> prober.go:104] "Probe errored" err="rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task <_> not found: not found" probeType="Readiness" pod="hosted-grafana/<_>" podUID="<_>" containerName="grafana"`,
`E0507 11:59:<_>.<_> <_> prober.go:239] "Unable to write all bytes from execInContainer" err="short write" expectedBytes=<_> actualBytes=10240`,
`E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = NotFound desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not found" image="us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>"`,
`E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden" image="us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>"`,
`E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden" image="us.gcr.io/hosted-grafana/<_>`,
`E0507 11:59:<_>.<_> <_> remote_runtime.go:432] "ContainerStatus from runtime service failed" err="rpc error: code = NotFound desc = an error occurred when try to find container \"<_>\": not found" containerID="<_>"`,
`E0507 11:59:<_>.<_> <_> remote_runtime.go:496] "ExecSync cmd from runtime service failed" err="rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task <_> not found: not found" containerID="<_>" cmd=["/bin/hgrun","check"]`,
`I0507 11:59:31.815514 2791 azure_credentials.go:220] image(us.gcr.io/hosted-grafana/hosted-grafana-pro) is not from ACR, return empty authentication`,
Expand Down Expand Up @@ -311,10 +311,9 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
`2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 686: Reconcile against kernel programming ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`,
`2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 880: Processing route: 254 <_> 10.68.10.<_>/32 ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`,
`2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 915: Route is correct dest=10.68.10.<_>/32 ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`,
`2024-05-08 15:23:57.942 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "", "", "", "", "", "", "", "", "tVnHkvAo15HuiPy0", "", ""} chainName="OUTPUT" expectedRuleIDs=[]string{"tVnHkvAo15HuiPy0", "", "", "", "", "", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="raw"`,
`2024-05-08 15:23:57.942 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "6gwbT8clXdHdC1b1"} chainName="PREROUTING" expectedRuleIDs=[]string{"6gwbT8clXdHdC1b1", "", "", "", ""} ipVersion=0x4 table="raw"`,
`2024-05-08 15:23:57.969 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "Cz_u1IQiXIMmKD4c", "", "", "", "", "", "", "", "", "", "", "", ""} chainName="INPUT" expectedRuleIDs=[]string{"Cz_u1IQiXIMmKD4c", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="filter"`,
`2024-05-08 15:23:57.969 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "tVnHkvAo15HuiPy0", "", "", "", "", ""} chainName="OUTPUT" expectedRuleIDs=[]string{"tVnHkvAo15HuiPy0", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="filter"`,
`2024-05-08 15:23:57.<_> [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "<_><_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> <_>", "", ""<_> <_><_><_> <_>"<_> <_><_><_><_><_> <_><_><_><_><_><_><_><_>", "", "", "", "", "", "", "", "", ""<_>`,
`2024-05-08 15:23:58.169 [INFO][2333] felix/summary.go 100: Summarising 35 dataplane reconciliation loops over 1m2s: avg=12ms longest=46ms (resync-filter-v4,resync-filter-v6,resync-mangle-v4,resync-mangle-v6,update-filter-v4,update-filter-v6)`,
`2024-05-08 15:23:58.566 [DEBUG][3576126] felix/int_dataplane.go 957: Examining link for MTU calculation mtu=1500 name="eth0"`,
`2024-05-08 15:23:58.680 [DEBUG][216945] felix/int_dataplane.go 1785: Reschedule kick received`,
Expand Down
23 changes: 10 additions & 13 deletions pkg/pattern/drain/line_tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,39 +38,36 @@ func newPunctuationTokenizer() *punctuationTokenizer {
}

func (p *punctuationTokenizer) Tokenize(line string) ([]string, interface{}) {
tokens := make([]string, len(line)) // Maximum size is every character is punctuation
spacesAfter := make([]int, strings.Count(line, " ")) // Could be a bitmap, but it's not worth it for a few bytes.
tokens := make([]string, 0, 128)
spacesAfter := make([]int, 0, 64)

start := 0
nextTokenIdx := 0
nextSpaceIdx := 0
for i, char := range line {
if len(tokens) >= cap(tokens)-1 {
break
}
if unicode.IsLetter(char) || unicode.IsNumber(char) || char < 128 && p.excludeDelimiters[char] != 0 {
continue
}
included := char < 128 && p.includeDelimiters[char] != 0
if char == ' ' || included || unicode.IsPunct(char) {
if i > start {
tokens[nextTokenIdx] = line[start:i]
nextTokenIdx++
tokens = append(tokens, line[start:i])
}
if char == ' ' {
spacesAfter[nextSpaceIdx] = nextTokenIdx - 1
nextSpaceIdx++
spacesAfter = append(spacesAfter, len(tokens)-1)
} else {
tokens[nextTokenIdx] = line[i : i+1]
nextTokenIdx++
tokens = append(tokens, line[i:i+1])
}
start = i + 1
}
}

if start < len(line) {
tokens[nextTokenIdx] = line[start:]
nextTokenIdx++
tokens = append(tokens, line[start:])
}

return tokens[:nextTokenIdx], spacesAfter[:nextSpaceIdx]
return tokens, spacesAfter
}

func (p *punctuationTokenizer) Join(tokens []string, state interface{}) string {
Expand Down
17 changes: 17 additions & 0 deletions pkg/pattern/drain/line_tokenizer_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package drain

import (
"strings"
"testing"

"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -88,6 +89,22 @@ var testCases = []TestCase{
typeSplitting: {"ts=", "2024-05-30T12:50:36.648377186Z", "caller=", "scheduler_processor.go:143", "level=", "warn", "msg=", "\"error", "contacting", "scheduler\"", "err=", "\"rpc", "error:", "code", "=", ``, "Unavailable", "desc", "=", ``, "connection", "error:", "desc", "=", ``, `\"error`, "reading", "server", "preface:", `EOF\""`, "addr=", "10.0.151.101:9095"},
},
},
{
name: "Exactly 128 tokens are not combined",
line: strings.Repeat(`A `, 126) + "127 128",
want: map[string][]string{
typePunctuation: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128"),
typeSplitting: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128"),
},
},
{
name: "More than 128 tokens combined suffix into one token",
line: strings.Repeat(`A `, 126) + "127 128 129",
want: map[string][]string{
typePunctuation: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128 129"),
typeSplitting: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128", "129"),
},
},
{
name: "Only punctation",
line: `!@£$%^&*()`,
Expand Down

0 comments on commit a1efafd

Please sign in to comment.