From efa114ac89d4d6bd79ced0fd442000f0dbc5dc6f Mon Sep 17 00:00:00 2001 From: Tim Bray Date: Sat, 28 Jan 2023 10:38:02 -0800 Subject: [PATCH 1/3] pat: add prefix pattern fixes: #62 Signed-off-by: Tim Bray --- PATTERNS.md | 23 ++++++++++++++++++++--- README.md | 8 ++++++++ cl2_test.go | 33 +++++++++++++++++++++++++++++++++ core_matcher_test.go | 12 ++++++++++++ pattern.go | 22 ++++++++++++++++++++++ pattern_test.go | 5 +++++ value_matcher.go | 33 +++++++++++++++++++++++++++++++++ 7 files changed, 133 insertions(+), 3 deletions(-) diff --git a/PATTERNS.md b/PATTERNS.md index 5868795..ddc0941 100644 --- a/PATTERNS.md +++ b/PATTERNS.md @@ -60,6 +60,23 @@ Thus, the following Pattern would match both JSON events above: An **Extended Pattern** **MUST** be a JSON object containing a single field whose name is called the **Pattern Type**. +### Prefix Pattern + +The Pattern Type of a Prefix Pattern is `prefix` and its value +**MUST** be a string. + +The following event: + +```json +{"a": "alpha"} +``` + +would be matched by this Prefix Pattern: + +```json +{"a": [ { "prefix": "al" } ] } +``` + ### Exists Pattern The Pattern Type of an Exists Pattern is `exists` and its @@ -132,9 +149,9 @@ Consider the following Event: ``` The following Shellstyle Patterns would match it: ```json -{"img": [ {"shellstyle": "*.jpg"} ]} -{"img": [ {"shellstyle": "https://example.com/*"} ]} -{"img": [ {"shellstyle": "https://example.com/*.jpg"} ]} +{"img": [ {"shellstyle": "*.jpg"} ] } +{"img": [ {"shellstyle": "https://example.com/*"} ] } +{"img": [ {"shellstyle": "https://example.com/*.jpg"} ] } ``` ## EventBridge Patterns diff --git a/README.md b/README.md index 8e0f626..737a298 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,14 @@ The following Patterns would match it: } } ``` +```json +{ + "Image": { + "Thumbnail": { + "Url": [ "a", { "prefix": "https:" } ] } + } +} +``` The syntax and semantics of Patterns are fully specified in [Patterns in Quamina](PATTERNS.md). diff --git a/cl2_test.go b/cl2_test.go index 7238ff2..8168943 100644 --- a/cl2_test.go +++ b/cl2_test.go @@ -78,6 +78,35 @@ func TestRulerCl2(t *testing.T) { } exactMatches := []int{1, 101, 35, 655, 1} + prefixRules := []string{ + "{\n" + + " \"properties\": {\n" + + " \"STREET\": [ { \"prefix\": \"AC\" } ]\n" + + " }\n" + + "}", + "{\n" + + " \"properties\": {\n" + + " \"STREET\": [ { \"prefix\": \"BL\" } ]\n" + + " }\n" + + "}", + "{\n" + + " \"properties\": {\n" + + " \"STREET\": [ { \"prefix\": \"DR\" } ]\n" + + " }\n" + + "}", + "{\n" + + " \"properties\": {\n" + + " \"STREET\": [ { \"prefix\": \"FU\" } ]\n" + + " }\n" + + "}", + "{\n" + + " \"properties\": {\n" + + " \"STREET\": [ { \"prefix\": \"RH\" } ]\n" + + " }\n" + + "}", + } + prefixMatches := []int{24, 442, 38, 2387, 328} + anythingButRules := []string{ "{\n" + " \"properties\": {\n" + @@ -166,6 +195,10 @@ func TestRulerCl2(t *testing.T) { bm.addRules(exactRules, exactMatches) fmt.Printf("EXACT events/sec: %.1f\n", bm.run(t, lines)) + bm = newBenchmarker() + bm.addRules(prefixRules, prefixMatches) + fmt.Printf("PREFIX events/sec: %.1f\n", bm.run(t, lines)) + bm = newBenchmarker() bm.addRules(anythingButRules, anythingButMatches) fmt.Printf("ANYTHING-BUT events/sec: %.1f\n", bm.run(t, lines)) diff --git a/core_matcher_test.go b/core_matcher_test.go index 9bd1001..4d098bd 100644 --- a/core_matcher_test.go +++ b/core_matcher_test.go @@ -165,9 +165,20 @@ func TestExerciseMatching(t *testing.T) { `{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://www.example.com/*" } ] } } }`, `{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://www.example.com/*9943" } ] } } }`, `{"Image": { "Title": [ {"anything-but": ["Pikachu", "Eevee"] } ] } }`, + `{"Image": { "Thumbnail": { "Url": [ { "prefix": "https:" } ] } } }`, + `{"Image": { "Thumbnail": { "Url": [ "a", { "prefix": "https:" } ] } } }`, } var err error + blankMatcher := newCoreMatcher() + empty, err := blankMatcher.matchesForJSONEvent([]byte(j)) + if err != nil { + t.Error("blank: " + err.Error()) + } + if len(empty) != 0 { + t.Error("matches on blank matcher") + } + for i, should := range patternsFromReadme { m := newCoreMatcher() err = m.addPattern(fmt.Sprintf("should %d", i), should) @@ -187,6 +198,7 @@ func TestExerciseMatching(t *testing.T) { `{"Image": { "Animated": [ { "exists": false } ] } }`, `{"Image": { "NotThere": [ { "exists": true } ] } }`, `{"Image": { "IDs": [ { "exists": false } ], "Animated": [ false ] } }`, + `{"Image": { "Thumbnail": { "Url": [ { "prefix": "http:" } ] } } }`, } for i, shouldNot := range shouldNotMatches { m := newCoreMatcher() diff --git a/pattern.go b/pattern.go index 51d04a0..9715956 100644 --- a/pattern.go +++ b/pattern.go @@ -19,6 +19,7 @@ const ( existsFalseType shellStyleType anythingButType + prefixType ) // typedVal represents the value of a field in a pattern, giving the value and the type of pattern. @@ -196,12 +197,33 @@ func readSpecialPattern(pb *patternBuild, valsIn []typedVal) (pathVals []typedVa pathVals, err = readExistsSpecial(pb, pathVals) case "shellstyle": pathVals, err = readShellStyleSpecial(pb, pathVals) + case "prefix": + pathVals, err = readPrefixSpecial(pb, pathVals) default: err = errors.New("unrecognized in special pattern: " + tt) } return } +func readPrefixSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal, err error) { + t, err := pb.jd.Token() + if err != nil { + return + } + pathVals = valsIn + + prefixString, ok := t.(string) + if !ok { + err = errors.New("value for 'prefix' must be a string") + return + } + pathVals = append(pathVals, typedVal{vType: prefixType, val: `"` + prefixString + `"`}) + + // has to be } or tokenizer will throw error + _, err = pb.jd.Token() + return +} + func readExistsSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal, err error) { t, err := pb.jd.Token() if err != nil { diff --git a/pattern_test.go b/pattern_test.go index 2bdbb30..4e218c6 100644 --- a/pattern_test.go +++ b/pattern_test.go @@ -61,6 +61,11 @@ func TestPatternFromJSON(t *testing.T) { `{"xxx": [ { "exists": false, "x": ["a", 3 ] }] }`, `{"abc": [ {"shellstyle":15} ] }`, `{"abc": [ {"shellstyle":"a**b"}, "foo" ] }`, + `{"abc": [ {"prefix":23}, "foo" ] }`, + `{"abc": [ {"prefix":["a", "b"]}, "foo" ] }`, + `{"abc": [ {"prefix": - }, "foo" ] }`, + `{"abc": [ {"prefix": - "a" }, "foo" ] }`, + `{"abc": [ {"prefix": "a" {, "foo" ] }`, } for _, b := range bads { _, err := patternFromJSON([]byte(b)) diff --git a/value_matcher.go b/value_matcher.go index 1e5100f..e2f9b5e 100644 --- a/value_matcher.go +++ b/value_matcher.go @@ -71,6 +71,7 @@ func (m *valueMatcher) transitionOn(val []byte) []*fieldMatcher { default: // no dfa, no singleton, nothing to do + // this probably can't happen because a flattener shouldn't preserve a field that hasn't appeared in a pattern return transitions } } @@ -116,6 +117,8 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { var newNfa *smallTable[*nfaStepList] newNfa, nextField = makeShellStyleAutomaton(valBytes, nil) newDfa = nfa2Dfa(newNfa) + case prefixType: + newDfa, nextField = makePrefixAutomaton(valBytes, nil) default: panic("unknown value type") } @@ -145,6 +148,12 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { fields.startDfa = nfa2Dfa(newAutomaton) m.update(fields) return nextField + case prefixType: + newAutomaton, nextField := makePrefixAutomaton(valBytes, nil) + fields.startDfa = newAutomaton + m.update(fields) + return nextField + default: panic("unknown value type") } @@ -171,6 +180,8 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { var newNfa *smallTable[*nfaStepList] newNfa, nextField = makeShellStyleAutomaton(valBytes, nil) newDfa = nfa2Dfa(newNfa) + case prefixType: + newDfa, nextField = makePrefixAutomaton(valBytes, nil) default: panic("unknown val type") } @@ -183,6 +194,28 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { return nextField } +func makePrefixAutomaton(val []byte, useThisTransition *fieldMatcher) (*smallTable[*dfaStep], *fieldMatcher) { + var nextField *fieldMatcher + if useThisTransition != nil { + nextField = useThisTransition + } else { + nextField = newFieldMatcher() + } + return onePrefixStep(val, 0, nextField), nextField +} + +func onePrefixStep(val []byte, index int, nextField *fieldMatcher) *smallTable[*dfaStep] { + var nextStep *dfaStep + + // have to stop one short to skip the closing " + if index == len(val)-2 { + nextStep = &dfaStep{table: newSmallTable[*dfaStep](), fieldTransitions: []*fieldMatcher{nextField}} + } else { + nextStep = &dfaStep{table: onePrefixStep(val, index+1, nextField)} + } + return makeSmallDfaTable(nil, []byte{val[index]}, []*dfaStep{nextStep}) +} + // makeStringAutomaton creates a utf8-based automaton from a literal string // using smallTables. Note the addition of a valueTerminator. The implementation // is recursive because this allows the use of the makeSmallDfaTable call, which From 3f22f7ea5f8b65ed9996b586f85360501ce5fe35 Mon Sep 17 00:00:00 2001 From: Tim Bray Date: Sat, 28 Jan 2023 10:38:02 -0800 Subject: [PATCH 2/3] pat: add prefix pattern fixes: #62 Signed-off-by: Tim Bray --- value_matcher.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/value_matcher.go b/value_matcher.go index e2f9b5e..b8856a5 100644 --- a/value_matcher.go +++ b/value_matcher.go @@ -119,8 +119,6 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { newDfa = nfa2Dfa(newNfa) case prefixType: newDfa, nextField = makePrefixAutomaton(valBytes, nil) - default: - panic("unknown value type") } fields.startDfa = mergeDfas(fields.startDfa, newDfa) m.update(fields) @@ -153,9 +151,6 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { fields.startDfa = newAutomaton m.update(fields) return nextField - - default: - panic("unknown value type") } } @@ -182,8 +177,6 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { newDfa = nfa2Dfa(newNfa) case prefixType: newDfa, nextField = makePrefixAutomaton(valBytes, nil) - default: - panic("unknown val type") } // now table is ready for use, nuke singleton to signal threads to use it From 865f6c37f3b4787f96887c6ed957146b94aeb7d4 Mon Sep 17 00:00:00 2001 From: Tim Bray Date: Wed, 15 Feb 2023 14:09:50 -0800 Subject: [PATCH 3/3] pat: add prefix pattern fixes: #62 Signed-off-by: Tim Bray --- pattern.go | 6 +++++- small_table.go | 3 +-- value_matcher.go | 11 +++++++++-- value_matcher_test.go | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/pattern.go b/pattern.go index 9715956..e9aa127 100644 --- a/pattern.go +++ b/pattern.go @@ -217,7 +217,11 @@ func readPrefixSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal err = errors.New("value for 'prefix' must be a string") return } - pathVals = append(pathVals, typedVal{vType: prefixType, val: `"` + prefixString + `"`}) + val := typedVal{ + vType: prefixType, + val: `"` + prefixString + `"`, + } + pathVals = append(pathVals, val) // has to be } or tokenizer will throw error _, err = pb.jd.Token() diff --git a/small_table.go b/small_table.go index 8416dc8..9484cb5 100644 --- a/small_table.go +++ b/small_table.go @@ -107,8 +107,7 @@ func mergeOneDfaStep(step1, step2 *dfaStep, memoize map[dfaStepKey]*dfaStep) *df return combined } - // TODO: this works, all the tests pass, but I'm not satisfied with it. My intuition is that you ought - // to be able to come out of this with just one *fieldMatcher + // TODO: this works, all the tests pass, but should to be able to have with just one *fieldMatcher newTable := newSmallTable[*dfaStep]() switch { case step1.fieldTransitions == nil && step2.fieldTransitions == nil: diff --git a/value_matcher.go b/value_matcher.go index b8856a5..8219349 100644 --- a/value_matcher.go +++ b/value_matcher.go @@ -70,8 +70,8 @@ func (m *valueMatcher) transitionOn(val []byte) []*fieldMatcher { return transitionDfa(fields.startDfa, val, transitions) default: - // no dfa, no singleton, nothing to do - // this probably can't happen because a flattener shouldn't preserve a field that hasn't appeared in a pattern + // no dfa, no singleton, nothing to do, this probably can't happen because a flattener + // shouldn't preserve a field that hasn't appeared in a pattern return transitions } } @@ -119,6 +119,8 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { newDfa = nfa2Dfa(newNfa) case prefixType: newDfa, nextField = makePrefixAutomaton(valBytes, nil) + default: + panic("unknown value type") } fields.startDfa = mergeDfas(fields.startDfa, newDfa) m.update(fields) @@ -151,6 +153,8 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { fields.startDfa = newAutomaton m.update(fields) return nextField + default: + panic("unknown value type") } } @@ -177,6 +181,8 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { newDfa = nfa2Dfa(newNfa) case prefixType: newDfa, nextField = makePrefixAutomaton(valBytes, nil) + default: + panic("unknown value type") } // now table is ready for use, nuke singleton to signal threads to use it @@ -189,6 +195,7 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { func makePrefixAutomaton(val []byte, useThisTransition *fieldMatcher) (*smallTable[*dfaStep], *fieldMatcher) { var nextField *fieldMatcher + if useThisTransition != nil { nextField = useThisTransition } else { diff --git a/value_matcher_test.go b/value_matcher_test.go index 7bb59f9..54fa670 100644 --- a/value_matcher_test.go +++ b/value_matcher_test.go @@ -7,6 +7,46 @@ import ( "testing" ) +func TestInvalidValueTypes(t *testing.T) { + var before []typedVal + addInvalid(t, before) + + before = append(before, typedVal{vType: stringType, val: "foo"}) + addInvalid(t, before) + + before = append(before, typedVal{vType: stringType, val: "bar"}) + addInvalid(t, before) +} +func addInvalid(t *testing.T, before []typedVal) { + t.Helper() + defer func() { + if recover() == nil { + t.Errorf("TestAddInvalidTransition should have panicked") + } + }() + + panicType := valType(999) + + // empty value matcher + m := newValueMatcher() + invalidField := typedVal{ + vType: panicType, + val: "one", + } + for _, addBefore := range before { + m.addTransition(addBefore) + } + m.addTransition(invalidField) +} + +func TestNoOpTransition(t *testing.T) { + vm := newValueMatcher() + tr := vm.transitionOn([]byte("foo")) + if len(tr) != 0 { + t.Error("matched on empty valuematcher") + } +} + func TestAddTransition(t *testing.T) { m := newValueMatcher() v1 := typedVal{