Skip to content

Commit

Permalink
[pkg/ottl] Add new ExtractPatterns converter that extract regex pat…
Browse files Browse the repository at this point in the history
…terns from string (#25878)

**Description:** <Describe what has changed.>
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue.
Ex. Adding a feature - Explain what this achieves.--> 
[pkg/ottl] Add new `ExtractPatterns` converter that extract regex
pattern from string

**Link to tracking Issue:** <Issue number if applicable>
#25834, #25856

**Testing:** <Describe what testing was performed and which tests were
added.>
Unit tests

**Documentation:** <Describe the documentation added.>
Added documentation to pkg/ottl/ottlfuncs/README.md.

---------

Co-authored-by: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com>
  • Loading branch information
newly12 and TylerHelmuth authored Aug 21, 2023
1 parent 5d26321 commit 9d607f5
Show file tree
Hide file tree
Showing 5 changed files with 260 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .chloggen/ottl-func-extract-pattern.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/ottl

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add new `ExtractPatterns` converter that extract regex pattern from string.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [25834, 25856]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
17 changes: 17 additions & 0 deletions pkg/ottl/ottlfuncs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ Unlike functions, they do not modify any input telemetry and always return a val
Available Converters:
- [Concat](#concat)
- [ConvertCase](#convertcase)
- [ExtractPatterns](#extractpatterns)
- [FNV](#fnv)
- [Duration](#duration)
- [Int](#int)
Expand Down Expand Up @@ -353,6 +354,22 @@ Examples:
- `Duration("333ms")`
- `Duration("1000000h")`

### ExtractPatterns

`ExtractPatterns(target, pattern)`

The `ExtractPatterns` Converter returns a `pcommon.Map` struct that is a result of extracting named capture groups from the target string. If not matches are found then an empty `pcommon.Map` is returned.

`target` is a Getter that returns a string. `pattern` is a regex string.

If `target` is not a string or nil `ExtractPatterns` will return an error. If `pattern` does not contain at least 1 named capture group then `ExtractPatterns` will error on startup.

Examples:

- `ExtractPatterns(attributes["k8s.change_cause"], "GIT_SHA=(?P<git.sha>\w+)")`

- `ExtractPatterns(body, "^(?P<timestamp>\\w+ \\w+ [0-9]+:[0-9]+:[0-9]+) (?P<hostname>([A-Za-z0-9-_]+)) (?P<process>\\w+)(\\[(?P<pid>\\d+)\\])?: (?P<message>.*)$")`

### FNV

`FNV(value)`
Expand Down
75 changes: 75 additions & 0 deletions pkg/ottl/ottlfuncs/func_extract_patterns.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"context"
"fmt"
"regexp"

"go.opentelemetry.io/collector/pdata/pcommon"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

type ExtractPatternsArguments[K any] struct {
Target ottl.StringGetter[K] `ottlarg:"0"`
Pattern string `ottlarg:"1"`
}

func NewExtractPatternsFactory[K any]() ottl.Factory[K] {
return ottl.NewFactory("ExtractPatterns", &ExtractPatternsArguments[K]{}, createExtractPatternsFunction[K])
}

func createExtractPatternsFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
args, ok := oArgs.(*ExtractPatternsArguments[K])

if !ok {
return nil, fmt.Errorf("ExtractPatternsFactory args must be of type *ExtractPatternsArguments[K]")
}

return extractPatterns(args.Target, args.Pattern)
}

func extractPatterns[K any](target ottl.StringGetter[K], pattern string) (ottl.ExprFunc[K], error) {
r, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("the pattern supplied to ExtractPatterns is not a valid pattern: %w", err)
}

namedCaptureGroups := 0
for _, groupName := range r.SubexpNames() {
if groupName != "" {
namedCaptureGroups++
}
}

if namedCaptureGroups == 0 {
return nil, fmt.Errorf("at least 1 named capture group must be supplied in the given regex")
}

return func(ctx context.Context, tCtx K) (interface{}, error) {
val, err := target.Get(ctx, tCtx)
if err != nil {
return nil, err
}

matches := r.FindStringSubmatch(val)
if matches == nil {
return pcommon.NewMap(), nil
}

result := pcommon.NewMap()
for i, subexp := range r.SubexpNames() {
if i == 0 {
// Skip whole match
continue
}
if subexp != "" {
result.PutStr(subexp, matches[i])
}
}
return result, err
}, nil
}
140 changes: 140 additions & 0 deletions pkg/ottl/ottlfuncs/func_extract_patterns_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/pdata/pcommon"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

func Test_extractPatterns(t *testing.T) {
target := &ottl.StandardStringGetter[any]{
Getter: func(ctx context.Context, tCtx any) (interface{}, error) {
return `a=b c=d`, nil
},
}
tests := []struct {
name string
target ottl.StringGetter[any]
pattern string
want func(pcommon.Map)
}{
{
name: "extract patterns",
target: target,
pattern: `^a=(?P<a>\w+)\s+c=(?P<c>\w+)$`,
want: func(expectedMap pcommon.Map) {
expectedMap.PutStr("a", "b")
expectedMap.PutStr("c", "d")
},
},
{
name: "no pattern found",
target: target,
pattern: `^a=(?P<a>\w+)$`,
want: func(expectedMap pcommon.Map) {},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
exprFunc, err := extractPatterns(tt.target, tt.pattern)
assert.NoError(t, err)

result, err := exprFunc(context.Background(), nil)
assert.NoError(t, err)

resultMap, ok := result.(pcommon.Map)
require.True(t, ok)

expected := pcommon.NewMap()
tt.want(expected)

assert.Equal(t, expected.Len(), resultMap.Len())
expected.Range(func(k string, v pcommon.Value) bool {
ev, _ := expected.Get(k)
av, _ := resultMap.Get(k)
assert.Equal(t, ev, av)
return true
})
})
}
}

func Test_extractPatterns_validation(t *testing.T) {
tests := []struct {
name string
target ottl.StringGetter[any]
pattern string
}{
{
name: "bad regex",
target: &ottl.StandardStringGetter[any]{
Getter: func(ctx context.Context, tCtx any) (interface{}, error) {
return "foobar", nil
},
},
pattern: "(",
},
{
name: "no named capture group",
target: &ottl.StandardStringGetter[any]{
Getter: func(ctx context.Context, tCtx any) (interface{}, error) {
return "foobar", nil
},
},
pattern: "(.*)",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
exprFunc, err := extractPatterns[any](tt.target, tt.pattern)
assert.Error(t, err)
assert.Nil(t, exprFunc)
})
}
}

func Test_extractPatterns_bad_input(t *testing.T) {
tests := []struct {
name string
target ottl.StringGetter[any]
pattern string
}{
{
name: "target is non-string",
target: &ottl.StandardStringGetter[any]{
Getter: func(ctx context.Context, tCtx any) (interface{}, error) {
return 123, nil
},
},
pattern: "(?P<line>.*)",
},
{
name: "target is nil",
target: &ottl.StandardStringGetter[any]{
Getter: func(ctx context.Context, tCtx any) (interface{}, error) {
return nil, nil
},
},
pattern: "(?P<line>.*)",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
exprFunc, err := extractPatterns[any](tt.target, tt.pattern)
assert.NoError(t, err)

result, err := exprFunc(nil, nil)
assert.Error(t, err)
assert.Nil(t, result)
})
}
}
1 change: 1 addition & 0 deletions pkg/ottl/ottlfuncs/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ func converters[K any]() []ottl.Factory[K] {
NewConcatFactory[K](),
NewConvertCaseFactory[K](),
NewDurationFactory[K](),
NewExtractPatternsFactory[K](),
NewFnvFactory[K](),
NewIntFactory[K](),
NewIsMapFactory[K](),
Expand Down

0 comments on commit 9d607f5

Please sign in to comment.