Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove nested fields in remove_fields plugin #652

Merged
merged 11 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ tests-offsets
testdata

.idea/
.DS_Store
42 changes: 42 additions & 0 deletions plugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,48 @@ It parses string from the event field using re2 expression with named subgroups
[More details...](plugin/action/parse_re2/README.md)
## remove_fields
It removes the list of the event fields and keeps others.
Nested fields supported: list subfield names separated with dot.
Example:
```
fields: ["a.b.c"]

# event before processing
{
"a": {
"b": {
"c": 100,
"d": "some"
}
}
}

# event after processing
{
"a": {
"b": {
"d": "some" # "c" removed
}
}
}
```

If field name contains dots use backslash for escaping.
Example:
```
fields:
- exception\.type

# event before processing
{
"message": "Exception occurred",
"exception.type": "SomeType"
}

# event after processing
{
"message": "Exception occurred" # "exception.type" removed
}
```

[More details...](plugin/action/remove_fields/README.md)
## rename
Expand Down
42 changes: 42 additions & 0 deletions plugin/action/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,48 @@ It parses string from the event field using re2 expression with named subgroups
[More details...](plugin/action/parse_re2/README.md)
## remove_fields
It removes the list of the event fields and keeps others.
Nested fields supported: list subfield names separated with dot.
Example:
```
fields: ["a.b.c"]

# event before processing
{
"a": {
"b": {
"c": 100,
"d": "some"
}
}
}

# event after processing
{
"a": {
"b": {
"d": "some" # "c" removed
}
}
}
```

If field name contains dots use backslash for escaping.
Example:
```
fields:
- exception\.type

# event before processing
{
"message": "Exception occurred",
"exception.type": "SomeType"
}

# event after processing
{
"message": "Exception occurred" # "exception.type" removed
}
```

[More details...](plugin/action/remove_fields/README.md)
## rename
Expand Down
42 changes: 42 additions & 0 deletions plugin/action/remove_fields/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,47 @@
# Remove fields plugin
It removes the list of the event fields and keeps others.
Nested fields supported: list subfield names separated with dot.
Example:
```
fields: ["a.b.c"]

# event before processing
{
"a": {
"b": {
"c": 100,
"d": "some"
}
}
}

# event after processing
{
"a": {
"b": {
"d": "some" # "c" removed
}
}
}
```

If field name contains dots use backslash for escaping.
Example:
```
fields:
- exception\.type

# event before processing
{
"message": "Exception occurred",
"exception.type": "SomeType"
}

# event after processing
{
"message": "Exception occurred" # "exception.type" removed
}
```

### Config params
**`fields`** *`[]string`*
Expand Down
93 changes: 87 additions & 6 deletions plugin/action/remove_fields/remove_fields.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,64 @@
package remove_fields

import (
"sort"
"strings"

"github.com/ozontech/file.d/cfg"
"github.com/ozontech/file.d/fd"
"github.com/ozontech/file.d/logger"
"github.com/ozontech/file.d/pipeline"
)

/*{ introduction
It removes the list of the event fields and keeps others.
Nested fields supported: list subfield names separated with dot.
Example:
```
fields: ["a.b.c"]

# event before processing
{
"a": {
"b": {
"c": 100,
"d": "some"
}
}
}

# event after processing
{
"a": {
"b": {
"d": "some" # "c" removed
}
}
}
```

If field name contains dots use backslash for escaping.
Example:
```
fields:
- exception\.type

# event before processing
{
"message": "Exception occurred",
"exception.type": "SomeType"
}

# event after processing
{
"message": "Exception occurred" # "exception.type" removed
}
```
}*/

type Plugin struct {
config *Config
fieldsBuf []string
config *Config
fieldPaths [][]string
}

// ! config-params
Expand Down Expand Up @@ -40,20 +86,55 @@ func (p *Plugin) Start(config pipeline.AnyConfig, _ *pipeline.ActionPluginParams
if p.config == nil {
logger.Panicf("config is nil for the remove fields plugin")
}

// remove nested fields selection;
// for example:
// config `fields: ["a", "a.b"]` is equal to
// config `fields: ["a"]`
// see tests: TestDuplicatingFieldSelectors, TestNestedFieldSelectors

fields := p.config.Fields
sort.Slice(fields, func(i, j int) bool {
return len(fields[i]) < len(fields[j])
})

p.fieldPaths = make([][]string, 0, len(fields))

for i, f1 := range fields {
if f1 == "" {
logger.Warn("empty field found")
continue
}

ok := true
for _, f2 := range fields[:i] {
if strings.HasPrefix(f1, f2) {
logger.Warnf("path '%s' included in path '%s'; remove nested path", f1, f2)
ok = false
break
}
}

if ok {
p.fieldPaths = append(p.fieldPaths, cfg.ParseFieldSelector(f1))
}
}

if len(p.fieldPaths) == 0 {
logger.Warn("no fields will be removed")
}
}

func (p *Plugin) Stop() {
}

func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult {
p.fieldsBuf = p.fieldsBuf[:0]

if !event.Root.IsObject() {
return pipeline.ActionPass
}

for _, field := range p.config.Fields {
event.Root.Dig(field).Suicide()
for _, fieldPath := range p.fieldPaths {
event.Root.Dig(fieldPath...).Suicide()
}

return pipeline.ActionPass
Expand Down
43 changes: 43 additions & 0 deletions plugin/action/remove_fields/remove_fields_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/ozontech/file.d/pipeline"
"github.com/ozontech/file.d/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestRemoveFields(t *testing.T) {
Expand All @@ -33,3 +34,45 @@ func TestRemoveFields(t *testing.T) {
assert.Equal(t, `{"b":"c"}`, outEvents[1], "wrong event")
assert.Equal(t, `{"field_3":"value_3","a":"b"}`, outEvents[2], "wrong event")
}

func TestRemoveNestedFields(t *testing.T) {
config := test.NewConfig(&Config{Fields: []string{"a.b"}}, nil)
p, input, output := test.NewPipelineMock(test.NewActionPluginStaticInfo(factory, config, pipeline.MatchModeAnd, nil, false))
wg := &sync.WaitGroup{}
wg.Add(3)

outEvents := make([]string, 0, 3)
output.SetOutFn(func(e *pipeline.Event) {
outEvents = append(outEvents, e.Root.EncodeToString())
wg.Done()
})

input.In(0, "test.log", 0, []byte(`{"a":"some"}`))
input.In(0, "test.log", 0, []byte(`{"a":{"b":"deleted"}}`))
input.In(0, "test.log", 0, []byte(`{"a":{"b":{"c":["deleted"]},"d":"saved"}}`))

wg.Wait()
p.Stop()

assert.Equal(t, 3, len(outEvents), "wrong out events count")
assert.Equal(t, `{"a":"some"}`, outEvents[0], "wrong event")
assert.Equal(t, `{"a":{}}`, outEvents[1], "wrong event")
assert.Equal(t, `{"a":{"d":"saved"}}`, outEvents[2], "wrong event")
}

func TestDuplicatingFieldSelectors(t *testing.T) {
config := test.NewConfig(&Config{Fields: []string{"a.b", "a.b"}}, nil)
p := Plugin{}
p.Start(config, nil)

require.NotEmpty(t, p.fieldPaths)
require.Equal(t, []string{"a", "b"}, p.fieldPaths[0])
}

func TestNestedFieldSelectors(t *testing.T) {
config := test.NewConfig(&Config{Fields: []string{"a.b", "a.b.c", "a.d", "a"}}, nil)
p := Plugin{}
p.Start(config, nil)

require.Equal(t, [][]string{{"a"}}, p.fieldPaths)
}
Loading