From 1d6b5673eb55e8fbb0bc5e67213aee0ea6a208fa Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Tue, 26 Apr 2022 15:38:29 +0200 Subject: [PATCH 1/2] Remove empty match within concat of regex --- pkg/logql/log/filter.go | 10 ++++ pkg/logql/log/filter_test.go | 102 ++++++++++++++++++----------------- 2 files changed, 62 insertions(+), 50 deletions(-) diff --git a/pkg/logql/log/filter.go b/pkg/logql/log/filter.go index e1530fec2c49..764584b5889e 100644 --- a/pkg/logql/log/filter.go +++ b/pkg/logql/log/filter.go @@ -471,6 +471,16 @@ func simplifyAlternate(reg *syntax.Regexp) (Filterer, bool) { // Anything else is rejected. func simplifyConcat(reg *syntax.Regexp, baseLiteral []byte) (Filterer, bool) { clearCapture(reg.Sub...) + // remove empty match as we don't need them for filtering + i := 0 + for _, r := range reg.Sub { + if r.Op == syntax.OpEmptyMatch { + continue + } + reg.Sub[i] = r + i++ + } + reg.Sub = reg.Sub[:i] // we support only simplication of concat operation with 3 sub expressions. // for instance .*foo.*bar contains 4 subs (.*+foo+.*+bar) and can't be simplified. if len(reg.Sub) > 3 { diff --git a/pkg/logql/log/filter_test.go b/pkg/logql/log/filter_test.go index bd28f26868a9..be721df641d7 100644 --- a/pkg/logql/log/filter_test.go +++ b/pkg/logql/log/filter_test.go @@ -19,58 +19,59 @@ func Test_SimplifiedRegex(t *testing.T) { match bool }{ // regex we intend to support. - {"foo", true, newContainsFilter([]byte("foo"), false), true}, - {"not", true, newNotFilter(newContainsFilter([]byte("not"), false)), false}, - {"(foo)", true, newContainsFilter([]byte("foo"), false), true}, - {"(foo|ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, - {"(foo|ba|ar)", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), newContainsFilter([]byte("ar"), false)), true}, - {"(foo|(ba|ar))", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("ba"), false), newContainsFilter([]byte("ar"), false))), true}, - {"foo.*", true, newContainsFilter([]byte("foo"), false), true}, - {".*foo", true, newNotFilter(newContainsFilter([]byte("foo"), false)), false}, - {".*foo.*", true, newContainsFilter([]byte("foo"), false), true}, - {"(.*)(foo).*", true, newContainsFilter([]byte("foo"), false), true}, - {"(foo.*|.*ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, - {"(foo.*|.*bar.*)", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, - {".*foo.*|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, - {".*foo|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, - // This construct is similar to (...), but won't create a capture group. - {"(?:.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, - // named capture group - {"(?P.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, - // parsed as (?-s:.)*foo(?-s:.)*|b(?:ar|uzz) - {".*foo.*|bar|buzz", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("buzz"), false))), true}, - // parsed as (?-s:.)*foo(?-s:.)*|bar|uzz - {".*foo.*|bar|uzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), newContainsFilter([]byte("uzz"), false)), true}, - // parsed as foo|b(?:ar|(?:)|uzz)|zz - {"foo|bar|b|buzz|zz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("b"), false)), newContainsFilter([]byte("buzz"), false))), newContainsFilter([]byte("zz"), false)), true}, - // parsed as f(?:(?:)|oo(?:(?:)|bar)) - {"f|foo|foobar", true, newOrFilter(newContainsFilter([]byte("f"), false), newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("foobar"), false))), true}, - // parsed as f(?:(?-s:.)*|oobar(?-s:.)*)|(?-s:.)*buzz - {"f.*|foobar.*|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, - // parsed as ((f(?-s:.)*)|foobar(?-s:.)*)|(?-s:.)*buzz - {"((f.*)|foobar.*)|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, - {".*", true, TrueFilter, true}, - {".*|.*", true, TrueFilter, true}, - {".*||||", true, TrueFilter, true}, - {"", true, TrueFilter, true}, - {"(?i)foo", true, newContainsFilter([]byte("foo"), true), true}, - {"(?i)界", true, newContainsFilter([]byte("界"), true), true}, - {"(?i)ïB", true, newContainsFilter([]byte("ïB"), true), true}, + // {"foo", true, newContainsFilter([]byte("foo"), false), true}, + // {"not", true, newNotFilter(newContainsFilter([]byte("not"), false)), false}, + // {"(foo)", true, newContainsFilter([]byte("foo"), false), true}, + // {"(foo|ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, + // {"(foo|ba|ar)", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), newContainsFilter([]byte("ar"), false)), true}, + // {"(foo|(ba|ar))", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("ba"), false), newContainsFilter([]byte("ar"), false))), true}, + // {"foo.*", true, newContainsFilter([]byte("foo"), false), true}, + // {".*foo", true, newNotFilter(newContainsFilter([]byte("foo"), false)), false}, + // {".*foo.*", true, newContainsFilter([]byte("foo"), false), true}, + // {"(.*)(foo).*", true, newContainsFilter([]byte("foo"), false), true}, + // {"(foo.*|.*ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, + // {"(foo.*|.*bar.*)", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, + // {".*foo.*|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, + // {".*foo|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, + // // This construct is similar to (...), but won't create a capture group. + // {"(?:.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, + // // named capture group + // {"(?P.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, + // // parsed as (?-s:.)*foo(?-s:.)*|b(?:ar|uzz) + // {".*foo.*|bar|buzz", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("buzz"), false))), true}, + // // parsed as (?-s:.)*foo(?-s:.)*|bar|uzz + // {".*foo.*|bar|uzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), newContainsFilter([]byte("uzz"), false)), true}, + // // parsed as foo|b(?:ar|(?:)|uzz)|zz + // {"foo|bar|b|buzz|zz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("b"), false)), newContainsFilter([]byte("buzz"), false))), newContainsFilter([]byte("zz"), false)), true}, + // // parsed as f(?:(?:)|oo(?:(?:)|bar)) + // {"f|foo|foobar", true, newOrFilter(newContainsFilter([]byte("f"), false), newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("foobar"), false))), true}, + // // parsed as f(?:(?-s:.)*|oobar(?-s:.)*)|(?-s:.)*buzz + // {"f.*|foobar.*|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, + // // parsed as ((f(?-s:.)*)|foobar(?-s:.)*)|(?-s:.)*buzz + // {"((f.*)|foobar.*)|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, + // {".*", true, TrueFilter, true}, + // {".*|.*", true, TrueFilter, true}, + // {".*||||", true, TrueFilter, true}, + // {"", true, TrueFilter, true}, + // {"(?i)foo", true, newContainsFilter([]byte("foo"), true), true}, + // {"(?i)界", true, newContainsFilter([]byte("界"), true), true}, + // {"(?i)ïB", true, newContainsFilter([]byte("ïB"), true), true}, + {"(?:)foo|fatal|exception", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("fatal"), false)), newContainsFilter([]byte("exception"), false)), true}, // regex we are not supporting. - {"[a-z]+foo", true, nil, false}, - {".+foo", true, nil, false}, - {".*fo.*o", true, nil, false}, - {`\d`, true, nil, false}, - {`\sfoo`, true, nil, false}, - {`foo?`, false, nil, false}, - {`foo{1,2}bar{2,3}`, true, nil, false}, - {`foo|\d*bar`, true, nil, false}, - {`foo|fo{1,2}`, true, nil, false}, - {`foo|fo\d*`, true, nil, false}, - {`foo|fo\d+`, true, nil, false}, - {`(\w\d+)`, true, nil, false}, - {`.*f.*oo|fo{1,2}`, true, nil, false}, + // {"[a-z]+foo", true, nil, false}, + // {".+foo", true, nil, false}, + // {".*fo.*o", true, nil, false}, + // {`\d`, true, nil, false}, + // {`\sfoo`, true, nil, false}, + // {`foo?`, false, nil, false}, + // {`foo{1,2}bar{2,3}`, true, nil, false}, + // {`foo|\d*bar`, true, nil, false}, + // {`foo|fo{1,2}`, true, nil, false}, + // {`foo|fo\d*`, true, nil, false}, + // {`foo|fo\d+`, true, nil, false}, + // {`(\w\d+)`, true, nil, false}, + // {`.*f.*oo|fo{1,2}`, true, nil, false}, } { t.Run(test.re, func(t *testing.T) { d, err := newRegexpFilter(test.re, test.match) @@ -160,6 +161,7 @@ func Benchmark_LineFilter(b *testing.B) { {"(node:24) buzz*"}, {"(HTTP/.*\\\"|HEAD|GET) (2..|5..)"}, {"\"@l\":\"(Warning|Error|Fatal)\""}, + {"(?:)foo|fatal|exception"}, } { benchmarkRegex(b, test.re, logline, true) benchmarkRegex(b, test.re, logline, false) From f8df80aa644a4c32cef0e1cbd1e32a3ea2385fbe Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Tue, 26 Apr 2022 15:39:48 +0200 Subject: [PATCH 2/2] add back tests --- pkg/logql/log/filter_test.go | 100 +++++++++++++++++------------------ 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/pkg/logql/log/filter_test.go b/pkg/logql/log/filter_test.go index be721df641d7..0e3d112e24c9 100644 --- a/pkg/logql/log/filter_test.go +++ b/pkg/logql/log/filter_test.go @@ -19,59 +19,59 @@ func Test_SimplifiedRegex(t *testing.T) { match bool }{ // regex we intend to support. - // {"foo", true, newContainsFilter([]byte("foo"), false), true}, - // {"not", true, newNotFilter(newContainsFilter([]byte("not"), false)), false}, - // {"(foo)", true, newContainsFilter([]byte("foo"), false), true}, - // {"(foo|ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, - // {"(foo|ba|ar)", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), newContainsFilter([]byte("ar"), false)), true}, - // {"(foo|(ba|ar))", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("ba"), false), newContainsFilter([]byte("ar"), false))), true}, - // {"foo.*", true, newContainsFilter([]byte("foo"), false), true}, - // {".*foo", true, newNotFilter(newContainsFilter([]byte("foo"), false)), false}, - // {".*foo.*", true, newContainsFilter([]byte("foo"), false), true}, - // {"(.*)(foo).*", true, newContainsFilter([]byte("foo"), false), true}, - // {"(foo.*|.*ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, - // {"(foo.*|.*bar.*)", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, - // {".*foo.*|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, - // {".*foo|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, - // // This construct is similar to (...), but won't create a capture group. - // {"(?:.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, - // // named capture group - // {"(?P.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, - // // parsed as (?-s:.)*foo(?-s:.)*|b(?:ar|uzz) - // {".*foo.*|bar|buzz", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("buzz"), false))), true}, - // // parsed as (?-s:.)*foo(?-s:.)*|bar|uzz - // {".*foo.*|bar|uzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), newContainsFilter([]byte("uzz"), false)), true}, - // // parsed as foo|b(?:ar|(?:)|uzz)|zz - // {"foo|bar|b|buzz|zz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("b"), false)), newContainsFilter([]byte("buzz"), false))), newContainsFilter([]byte("zz"), false)), true}, - // // parsed as f(?:(?:)|oo(?:(?:)|bar)) - // {"f|foo|foobar", true, newOrFilter(newContainsFilter([]byte("f"), false), newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("foobar"), false))), true}, - // // parsed as f(?:(?-s:.)*|oobar(?-s:.)*)|(?-s:.)*buzz - // {"f.*|foobar.*|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, - // // parsed as ((f(?-s:.)*)|foobar(?-s:.)*)|(?-s:.)*buzz - // {"((f.*)|foobar.*)|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, - // {".*", true, TrueFilter, true}, - // {".*|.*", true, TrueFilter, true}, - // {".*||||", true, TrueFilter, true}, - // {"", true, TrueFilter, true}, - // {"(?i)foo", true, newContainsFilter([]byte("foo"), true), true}, - // {"(?i)界", true, newContainsFilter([]byte("界"), true), true}, - // {"(?i)ïB", true, newContainsFilter([]byte("ïB"), true), true}, + {"foo", true, newContainsFilter([]byte("foo"), false), true}, + {"not", true, newNotFilter(newContainsFilter([]byte("not"), false)), false}, + {"(foo)", true, newContainsFilter([]byte("foo"), false), true}, + {"(foo|ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, + {"(foo|ba|ar)", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), newContainsFilter([]byte("ar"), false)), true}, + {"(foo|(ba|ar))", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("ba"), false), newContainsFilter([]byte("ar"), false))), true}, + {"foo.*", true, newContainsFilter([]byte("foo"), false), true}, + {".*foo", true, newNotFilter(newContainsFilter([]byte("foo"), false)), false}, + {".*foo.*", true, newContainsFilter([]byte("foo"), false), true}, + {"(.*)(foo).*", true, newContainsFilter([]byte("foo"), false), true}, + {"(foo.*|.*ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, + {"(foo.*|.*bar.*)", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, + {".*foo.*|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, + {".*foo|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, + // This construct is similar to (...), but won't create a capture group. + {"(?:.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, + // named capture group + {"(?P.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, + // parsed as (?-s:.)*foo(?-s:.)*|b(?:ar|uzz) + {".*foo.*|bar|buzz", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("buzz"), false))), true}, + // parsed as (?-s:.)*foo(?-s:.)*|bar|uzz + {".*foo.*|bar|uzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), newContainsFilter([]byte("uzz"), false)), true}, + // parsed as foo|b(?:ar|(?:)|uzz)|zz + {"foo|bar|b|buzz|zz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("b"), false)), newContainsFilter([]byte("buzz"), false))), newContainsFilter([]byte("zz"), false)), true}, + // parsed as f(?:(?:)|oo(?:(?:)|bar)) + {"f|foo|foobar", true, newOrFilter(newContainsFilter([]byte("f"), false), newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("foobar"), false))), true}, + // parsed as f(?:(?-s:.)*|oobar(?-s:.)*)|(?-s:.)*buzz + {"f.*|foobar.*|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, + // parsed as ((f(?-s:.)*)|foobar(?-s:.)*)|(?-s:.)*buzz + {"((f.*)|foobar.*)|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, + {".*", true, TrueFilter, true}, + {".*|.*", true, TrueFilter, true}, + {".*||||", true, TrueFilter, true}, + {"", true, TrueFilter, true}, + {"(?i)foo", true, newContainsFilter([]byte("foo"), true), true}, + {"(?i)界", true, newContainsFilter([]byte("界"), true), true}, + {"(?i)ïB", true, newContainsFilter([]byte("ïB"), true), true}, {"(?:)foo|fatal|exception", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("fatal"), false)), newContainsFilter([]byte("exception"), false)), true}, // regex we are not supporting. - // {"[a-z]+foo", true, nil, false}, - // {".+foo", true, nil, false}, - // {".*fo.*o", true, nil, false}, - // {`\d`, true, nil, false}, - // {`\sfoo`, true, nil, false}, - // {`foo?`, false, nil, false}, - // {`foo{1,2}bar{2,3}`, true, nil, false}, - // {`foo|\d*bar`, true, nil, false}, - // {`foo|fo{1,2}`, true, nil, false}, - // {`foo|fo\d*`, true, nil, false}, - // {`foo|fo\d+`, true, nil, false}, - // {`(\w\d+)`, true, nil, false}, - // {`.*f.*oo|fo{1,2}`, true, nil, false}, + {"[a-z]+foo", true, nil, false}, + {".+foo", true, nil, false}, + {".*fo.*o", true, nil, false}, + {`\d`, true, nil, false}, + {`\sfoo`, true, nil, false}, + {`foo?`, false, nil, false}, + {`foo{1,2}bar{2,3}`, true, nil, false}, + {`foo|\d*bar`, true, nil, false}, + {`foo|fo{1,2}`, true, nil, false}, + {`foo|fo\d*`, true, nil, false}, + {`foo|fo\d+`, true, nil, false}, + {`(\w\d+)`, true, nil, false}, + {`.*f.*oo|fo{1,2}`, true, nil, false}, } { t.Run(test.re, func(t *testing.T) { d, err := newRegexpFilter(test.re, test.match)