Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Limit tokens per pattern to 128 #13376

Merged
merged 6 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions pkg/pattern/drain/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/<_><_><_>/<_><_><_>.<_> op=CREATE`,
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/<_><_><_>/<_><_><_>.<_>.<_> op=CREATE`,
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/hosted-grafana_.<_>/<_>/0.log.<_>.<_> op=CREATE`,
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:<_> level=info component=logs logs_config=default msg="<_> target" key="/var/log/pods/*<_>/<_>/*.log:{app=\"grafana\", conprof=\"true\", container=\"<_>\", instanceId=\"<_>\", job=\"hosted-grafana/grafana\", name=\"grafana\", namespace=\"hosted-grafana\", org=\"<_>\", plan=\"free\", pod=\"<_>\", pod_template_hash=\"<_>\", resource_version=\"<_>\", slug=\"<_>\", stackId=\"<_>\"}"`,
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:<_> level=info component=logs logs_config=default msg="<_> target" key="/var/log/pods/*<_>/<_>/*.log:{app=\"grafana\", conprof=\"true\", container=\"<_>\", instanceId=\"<_>\", job=\"hosted-grafana/grafana\", name=\"grafana\", namespace=\"hosted-grafana\", org=\"<_>\", plan=\"free\", pod=<_>`,
`ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Re-opening moved/deleted file /var/log/pods/<_>/<_>/<_>.log ..."`,
`ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Re-opening moved/deleted file /var/log/pods/hosted-grafana_.<_>/<_>/0.log ..."`,
`ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Seeked /var/log/pods/<_>/<_>/0.log - &{Offset:0 Whence:0}"`,
Expand Down Expand Up @@ -112,12 +112,12 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with CreateContainerConfigError: \"secret \\\"<_>\\\" not found\"" pod="<_>/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with ImagePullBackOff: \"Back-off pulling image \\\"us.gcr.io/hosted-grafana/<_>:<_>.<_>.<_>\\\"\"" pod="<_>/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with ImagePullBackOff: \"Back-off pulling image \\\"us.gcr.io/kubernetes-dev/<_>:<_>\\\"\"" pod="<_>/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"grafana\" with ErrImagePull: \"[rpc error: code = NotFound desc = failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not found, failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden]\"" pod="hosted-grafana/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"grafana\" with ErrImagePull: \"[rpc error: code = NotFound desc = failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not <_>`,
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"pdc\" with ErrImageNeverPull: \"Container image \\\"us.gcr.io/hosted-grafana/pdc:0.1.415\\\" is not present with pull policy of Never\"" pod="pdc/<_>" podUID="<_>"`,
`E0507 11:59:<_>.<_> <_> prober.go:104] "Probe errored" err="rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task <_> not found: not found" probeType="Readiness" pod="hosted-grafana/<_>" podUID="<_>" containerName="grafana"`,
`E0507 11:59:<_>.<_> <_> prober.go:239] "Unable to write all bytes from execInContainer" err="short write" expectedBytes=<_> actualBytes=10240`,
`E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = NotFound desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not found" image="us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>"`,
`E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden" image="us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>"`,
`E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden" image="us.gcr.io/hosted-grafana/<_>`,
`E0507 11:59:<_>.<_> <_> remote_runtime.go:432] "ContainerStatus from runtime service failed" err="rpc error: code = NotFound desc = an error occurred when try to find container \"<_>\": not found" containerID="<_>"`,
`E0507 11:59:<_>.<_> <_> remote_runtime.go:496] "ExecSync cmd from runtime service failed" err="rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task <_> not found: not found" containerID="<_>" cmd=["/bin/hgrun","check"]`,
`I0507 11:59:31.815514 2791 azure_credentials.go:220] image(us.gcr.io/hosted-grafana/hosted-grafana-pro) is not from ACR, return empty authentication`,
Expand Down Expand Up @@ -301,10 +301,9 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
`2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 686: Reconcile against kernel programming ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`,
`2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 880: Processing route: 254 <_> 10.68.10.<_>/32 ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`,
`2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 915: Route is correct dest=10.68.10.<_>/32 ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`,
`2024-05-08 15:23:57.942 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "", "", "", "", "", "", "", "", "tVnHkvAo15HuiPy0", "", ""} chainName="OUTPUT" expectedRuleIDs=[]string{"tVnHkvAo15HuiPy0", "", "", "", "", "", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="raw"`,
`2024-05-08 15:23:57.942 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "6gwbT8clXdHdC1b1"} chainName="PREROUTING" expectedRuleIDs=[]string{"6gwbT8clXdHdC1b1", "", "", "", ""} ipVersion=0x4 table="raw"`,
`2024-05-08 15:23:57.969 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "Cz_u1IQiXIMmKD4c", "", "", "", "", "", "", "", "", "", "", "", ""} chainName="INPUT" expectedRuleIDs=[]string{"Cz_u1IQiXIMmKD4c", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="filter"`,
`2024-05-08 15:23:57.969 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "tVnHkvAo15HuiPy0", "", "", "", "", ""} chainName="OUTPUT" expectedRuleIDs=[]string{"tVnHkvAo15HuiPy0", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="filter"`,
`2024-05-08 15:23:57.<_> [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "<_><_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> <_>", "", ""<_> <_><_><_> <_>"<_> <_><_><_><_><_> <_><_><_><_><_><_><_><_>", "", "", "", "", "", "", "", "", ""<_>`,
`2024-05-08 15:23:58.169 [INFO][2333] felix/summary.go 100: Summarising 35 dataplane reconciliation loops over 1m2s: avg=12ms longest=46ms (resync-filter-v4,resync-filter-v6,resync-mangle-v4,resync-mangle-v6,update-filter-v4,update-filter-v6)`,
`2024-05-08 15:23:58.566 [DEBUG][3576126] felix/int_dataplane.go 957: Examining link for MTU calculation mtu=1500 name="eth0"`,
`2024-05-08 15:23:58.680 [DEBUG][216945] felix/int_dataplane.go 1785: Reschedule kick received`,
Expand Down
23 changes: 10 additions & 13 deletions pkg/pattern/drain/line_tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,39 +38,36 @@ func newPunctuationTokenizer() *punctuationTokenizer {
}

func (p *punctuationTokenizer) Tokenize(line string) ([]string, interface{}) {
tokens := make([]string, len(line)) // Maximum size is every character is punctuation
spacesAfter := make([]int, strings.Count(line, " ")) // Could be a bitmap, but it's not worth it for a few bytes.
tokens := make([]string, 0, 128)
spacesAfter := make([]int, 0, 64)

start := 0
nextTokenIdx := 0
nextSpaceIdx := 0
for i, char := range line {
if len(tokens) >= cap(tokens)-1 {
break
}
if unicode.IsLetter(char) || unicode.IsNumber(char) || char < 128 && p.excludeDelimiters[char] != 0 {
continue
}
included := char < 128 && p.includeDelimiters[char] != 0
if char == ' ' || included || unicode.IsPunct(char) {
if i > start {
tokens[nextTokenIdx] = line[start:i]
nextTokenIdx++
tokens = append(tokens, line[start:i])
}
if char == ' ' {
spacesAfter[nextSpaceIdx] = nextTokenIdx - 1
nextSpaceIdx++
spacesAfter = append(spacesAfter, len(tokens)-1)
} else {
tokens[nextTokenIdx] = line[i : i+1]
nextTokenIdx++
tokens = append(tokens, line[i:i+1])
}
start = i + 1
}
}

if start < len(line) {
tokens[nextTokenIdx] = line[start:]
nextTokenIdx++
tokens = append(tokens, line[start:])
}

return tokens[:nextTokenIdx], spacesAfter[:nextSpaceIdx]
return tokens, spacesAfter
}

func (p *punctuationTokenizer) Join(tokens []string, state interface{}) string {
Expand Down
17 changes: 17 additions & 0 deletions pkg/pattern/drain/line_tokenizer_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package drain

import (
"strings"
"testing"

"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -88,6 +89,22 @@ var testCases = []TestCase{
typeSplitting: {"ts=", "2024-05-30T12:50:36.648377186Z", "caller=", "scheduler_processor.go:143", "level=", "warn", "msg=", "\"error", "contacting", "scheduler\"", "err=", "\"rpc", "error:", "code", "=", ``, "Unavailable", "desc", "=", ``, "connection", "error:", "desc", "=", ``, `\"error`, "reading", "server", "preface:", `EOF\""`, "addr=", "10.0.151.101:9095"},
},
},
{
name: "Exactly 128 tokens are not combined",
line: strings.Repeat(`A `, 126) + "127 128",
want: map[string][]string{
typePunctuation: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128"),
typeSplitting: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128"),
},
},
{
name: "More than 128 tokens combined suffix into one token",
line: strings.Repeat(`A `, 126) + "127 128 129",
want: map[string][]string{
typePunctuation: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128 129"),
typeSplitting: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128", "129"),
},
},
{
name: "Only punctation",
line: `!@£$%^&*()`,
Expand Down
Loading