diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 026aaa98f30..af2bfc11f2a 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -278,6 +278,9 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Add config example of how to skip the `add_host_metadata` processor when forwarding logs. {issue}13920[13920] {pull}18153[18153] - When using the `decode_json_fields` processor, decoded fields are now deep-merged into existing event. {pull}17958[17958] - Add backoff configuration options for the Kafka output. {issue}16777[16777] {pull}17808[17808] +- Add keystore support for autodiscover static configurations. {pull]16306[16306] +- Add Kerberos support to Elasticsearch output. {pull}17927[17927] +- Add support for fixed length extraction in `dissect` processor. {pull}17191[17191] *Auditbeat* diff --git a/libbeat/processors/dissect/const.go b/libbeat/processors/dissect/const.go index 610f27ec50b..aa0349cf82d 100644 --- a/libbeat/processors/dissect/const.go +++ b/libbeat/processors/dissect/const.go @@ -28,8 +28,8 @@ var ( // ` %{key}, %{key/2}` // into: // [["", "key" ], [", ", "key/2"]] - delimiterRE = regexp.MustCompile("(?s)(.*?)%\\{([^}]*?)}") - suffixRE = regexp.MustCompile("(.+?)(/(\\d{1,2}))?(->)?$") + ordinalIndicator = "/" + fixedLengthIndicator = "#" skipFieldPrefix = "?" appendFieldPrefix = "+" @@ -39,6 +39,14 @@ var ( greedySuffix = "->" pointerFieldPrefix = "*" + numberRE = "\\d{1,2}" + + delimiterRE = regexp.MustCompile("(?s)(.*?)%\\{([^}]*?)}") + suffixRE = regexp.MustCompile("(.+?)" + // group 1 for key name + "(" + ordinalIndicator + "(" + numberRE + ")" + ")?" + // group 2, 3 for ordinal + "(" + fixedLengthIndicator + "(" + numberRE + ")" + ")?" + // group 4, 5 for fixed length + "(" + greedySuffix + ")?$") // group 6 for greedy + defaultJoinString = " " errParsingFailure = errors.New("parsing failure") diff --git a/libbeat/processors/dissect/dissect.go b/libbeat/processors/dissect/dissect.go index c9093c476f8..406027adfa3 100644 --- a/libbeat/processors/dissect/dissect.go +++ b/libbeat/processors/dissect/dissect.go @@ -89,12 +89,27 @@ func (d *Dissector) extract(s string) (positions, error) { // move through all the other delimiters, until we have consumed all of them. for dl.Next() != nil { start = offset - end = dl.Next().IndexOf(s, offset) - if end == -1 { - return nil, fmt.Errorf( - "could not find delimiter: `%s` in remaining: `%s`, (offset: %d)", - dl.Delimiter(), s[offset:], offset, - ) + + // corresponding field of the delimiter + field := d.parser.fields[d.parser.fieldsIdMap[i]] + + // for fixed-length field, just step the same size of its length + if field.IsFixedLength() { + end = offset + field.Length() + if end > len(s) { + return nil, fmt.Errorf( + "field length is grater than string length: remaining: `%s`, (offset: %d), field: %s", + s[offset:], offset, field, + ) + } + } else { + end = dl.Next().IndexOf(s, offset) + if end == -1 { + return nil, fmt.Errorf( + "could not find delimiter: `%s` in remaining: `%s`, (offset: %d)", + dl.Delimiter(), s[offset:], offset, + ) + } } offset = end @@ -118,6 +133,13 @@ func (d *Dissector) extract(s string) (positions, error) { dl = dl.Next() } + field := d.parser.fields[d.parser.fieldsIdMap[i]] + + if field.IsFixedLength() && offset+field.Length() != len(s) { + return nil, fmt.Errorf("last fixed length key `%s` (length: %d) does not fit into remaining: `%s`, (offset: %d)", + field, field.Length(), s, offset, + ) + } // If we have remaining contents and have not captured all the requested fields if offset < len(s) && i < len(d.parser.fields) { positions[i] = position{start: offset, end: len(s)} diff --git a/libbeat/processors/dissect/docs/dissect.asciidoc b/libbeat/processors/dissect/docs/dissect.asciidoc index c5f1e566793..e11d8ed50b9 100644 --- a/libbeat/processors/dissect/docs/dissect.asciidoc +++ b/libbeat/processors/dissect/docs/dissect.asciidoc @@ -30,7 +30,7 @@ an error; you need to either drop or rename the key before using dissect. For tokenization to be successful, all keys must be found and extracted, if one of them cannot be found an error will be logged and no modification is done on the original event. -NOTE: A key can contain any characters except reserved suffix or prefix modifiers: `/`,`&`, `+` +NOTE: A key can contain any characters except reserved suffix or prefix modifiers: `/`,`&`, `+`, `#` and `?`. See <> for a list of supported conditions. diff --git a/libbeat/processors/dissect/field.go b/libbeat/processors/dissect/field.go index eae6ba7cdf7..bb92db0c18f 100644 --- a/libbeat/processors/dissect/field.go +++ b/libbeat/processors/dissect/field.go @@ -27,17 +27,20 @@ type field interface { MarkGreedy() IsGreedy() bool Ordinal() int + Length() int Key() string ID() int Apply(b string, m Map) String() string IsSaveable() bool + IsFixedLength() bool } type baseField struct { id int key string ordinal int + length int greedy bool } @@ -53,6 +56,10 @@ func (f baseField) Ordinal() int { return f.ordinal } +func (f baseField) Length() int { + return f.length +} + func (f baseField) Key() string { return f.key } @@ -65,6 +72,10 @@ func (f baseField) IsSaveable() bool { return true } +func (f baseField) IsFixedLength() bool { + return f.length > 0 +} + func (f baseField) String() string { return fmt.Sprintf("field: %s, ordinal: %d, greedy: %v", f.key, f.ordinal, f.IsGreedy()) } @@ -193,7 +204,7 @@ func newField(id int, rawKey string, previous delimiter) (field, error) { return newSkipField(id), nil } - key, ordinal, greedy := extractKeyParts(rawKey) + key, ordinal, length, greedy := extractKeyParts(rawKey) // Conflicting prefix used. if strings.HasPrefix(key, appendIndirectPrefix) { @@ -205,81 +216,88 @@ func newField(id int, rawKey string, previous delimiter) (field, error) { } if strings.HasPrefix(key, skipFieldPrefix) { - return newNamedSkipField(id, key[1:]), nil + return newNamedSkipField(id, key[1:], length), nil } if strings.HasPrefix(key, pointerFieldPrefix) { - return newPointerField(id, key[1:]), nil + return newPointerField(id, key[1:], length), nil } if strings.HasPrefix(key, appendFieldPrefix) { - return newAppendField(id, key[1:], ordinal, greedy, previous), nil + return newAppendField(id, key[1:], ordinal, length, greedy, previous), nil } if strings.HasPrefix(key, indirectFieldPrefix) { - return newIndirectField(id, key[1:]), nil + return newIndirectField(id, key[1:], length), nil } - - return newNormalField(id, key, ordinal, greedy), nil + return newNormalField(id, key, ordinal, length, greedy), nil } func newSkipField(id int) skipField { return skipField{baseField{id: id}} } -func newNamedSkipField(id int, key string) namedSkipField { +func newNamedSkipField(id int, key string, length int) namedSkipField { return namedSkipField{ - baseField{id: id, key: key}, + baseField{id: id, key: key, length: length}, } } -func newPointerField(id int, key string) pointerField { +func newPointerField(id int, key string, length int) pointerField { return pointerField{ - baseField{id: id, key: key}, + baseField{id: id, key: key, length: length}, } } -func newAppendField(id int, key string, ordinal int, greedy bool, previous delimiter) appendField { +func newAppendField(id int, key string, ordinal int, length int, greedy bool, previous delimiter) appendField { return appendField{ baseField: baseField{ id: id, key: key, ordinal: ordinal, + length: length, greedy: greedy, }, previous: previous, } } -func newIndirectField(id int, key string) indirectField { +func newIndirectField(id int, key string, length int) indirectField { return indirectField{ baseField{ - id: id, - key: key, + id: id, + key: key, + length: length, }, } } -func newNormalField(id int, key string, ordinal int, greedy bool) normalField { +func newNormalField(id int, key string, ordinal int, length int, greedy bool) normalField { return normalField{ baseField{ id: id, key: key, ordinal: ordinal, + length: length, greedy: greedy, }, } } -func extractKeyParts(rawKey string) (key string, ordinal int, greedy bool) { +func extractKeyParts(rawKey string) (key string, ordinal int, length int, greedy bool) { m := suffixRE.FindAllStringSubmatch(rawKey, -1) if m[0][3] != "" { ordinal, _ = strconv.Atoi(m[0][3]) } - if strings.EqualFold(greedySuffix, m[0][4]) { + if m[0][5] != "" { + length, _ = strconv.Atoi(m[0][5]) + } + + if strings.EqualFold(greedySuffix, m[0][6]) { greedy = true } - return m[0][1], ordinal, greedy + + return m[0][1], ordinal, length, greedy } diff --git a/libbeat/processors/dissect/parser.go b/libbeat/processors/dissect/parser.go index 35c4c48028c..73f42917f7f 100644 --- a/libbeat/processors/dissect/parser.go +++ b/libbeat/processors/dissect/parser.go @@ -26,6 +26,7 @@ import ( type parser struct { delimiters []delimiter fields []field + fieldsIdMap map[int]int referenceFields []field } @@ -81,6 +82,10 @@ func newParser(tokenizer string) (*parser, error) { sort.Slice(fields, func(i, j int) bool { return fields[i].Ordinal() < fields[j].Ordinal() }) + fieldsIdMap := make(map[int]int) + for i, f := range fields { + fieldsIdMap[f.ID()] = i + } // List of fields needed for indirection but don't need to appear in the final event. var referenceFields []field @@ -93,6 +98,7 @@ func newParser(tokenizer string) (*parser, error) { return &parser{ delimiters: delimiters, fields: fields, + fieldsIdMap: fieldsIdMap, referenceFields: referenceFields, }, nil } diff --git a/libbeat/processors/dissect/testdata/dissect_tests.json b/libbeat/processors/dissect/testdata/dissect_tests.json index 35b7ad61a33..6c2b642f969 100644 --- a/libbeat/processors/dissect/testdata/dissect_tests.json +++ b/libbeat/processors/dissect/testdata/dissect_tests.json @@ -230,5 +230,63 @@ }, "skip": false, "fail": false + }, + { + "name": "simple fixed length", + "tok": "%{class#1}%{month#2}%{day#2}", + "msg": "A0118", + "expected": { + "class": "A", + "month": "01", + "day": "18" + }, + "skip": false, + "fail": false + }, + { + "name": "simple ordered and fixed length field", + "tok": "%{+key/3#1}%{+key/1#1} %{+key/2}", + "msg": "12 3", + "expected": { + "key": "2 3 1" + }, + "skip": false, + "fail": false + }, + { + "name": "simple padding and fixed length field", + "tok": "%{+key/3#1}%{+key/1#1->} %{+key/2}", + "msg": "12 3", + "expected": { + "key": "2 3 1" + }, + "skip": false, + "fail": false + }, + { + "name": "mixed pointer and indirect and fixed length", + "tok": "%{*key#5}%{\u0026key#5}", + "msg": "helloworld", + "expected": { + "hello": "world" + }, + "skip": false, + "fail": false + }, + { + "name": "fails when there is remaining string after the fixed-length key", + "tok": "%{class#1}%{month#2}%{day#2}", + "msg": "A0118 ", + "expected": null, + "skip": false, + "fail": true + }, + { + "name": "fails when there is no enough string for the fixed-length key", + "tok": "%{key#10}", + "msg": "foobar", + "expected": null, + "skip": false, + "fail": true } -] +] \ No newline at end of file diff --git a/libbeat/processors/dissect/validate_test.go b/libbeat/processors/dissect/validate_test.go index 8d575a655f6..dd19b688355 100644 --- a/libbeat/processors/dissect/validate_test.go +++ b/libbeat/processors/dissect/validate_test.go @@ -32,16 +32,16 @@ func TestValidate(t *testing.T) { { name: "when we find reference field for all indirect field", p: &parser{ - fields: []field{newIndirectField(1, "hello"), newNormalField(0, "hola", 1, false)}, - referenceFields: []field{newPointerField(2, "hello")}, + fields: []field{newIndirectField(1, "hello", 0), newNormalField(0, "hola", 1, 0, false)}, + referenceFields: []field{newPointerField(2, "hello", 0)}, }, expectError: false, }, { name: "when we cannot find all the reference field for all indirect field", p: &parser{ - fields: []field{newIndirectField(1, "hello"), newNormalField(0, "hola", 1, false)}, - referenceFields: []field{newPointerField(2, "okhello")}, + fields: []field{newIndirectField(1, "hello", 0), newNormalField(0, "hola", 1, 0, false)}, + referenceFields: []field{newPointerField(2, "okhello", 0)}, }, expectError: true, },