Skip to content

Commit

Permalink
[pkg/ottl] Add GetXML Converter (#35462)
Browse files Browse the repository at this point in the history
  • Loading branch information
djaglowski authored Sep 27, 2024
1 parent 2613b89 commit 70b26af
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .chloggen/ottl-get-xml.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/ottl

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add GetXML Converter

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [35462]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
6 changes: 6 additions & 0 deletions pkg/ottl/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,12 @@ func Test_e2e_converters(t *testing.T) {
tCtx.GetLogRecord().Attributes().PutInt("test", 1)
},
},
{
statement: `set(attributes["test"], GetXML("<a><b>1</b><c><b>2</b></c></a>", "/a//b"))`,
want: func(tCtx ottllog.TransformContext) {
tCtx.GetLogRecord().Attributes().PutStr("test", "<b>1</b><b>2</b>")
},
},
{
statement: `set(attributes["test"], Hex(1.0))`,
want: func(tCtx ottllog.TransformContext) {
Expand Down
32 changes: 32 additions & 0 deletions pkg/ottl/ottlfuncs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ Available Converters:
- [ExtractGrokPatterns](#extractgrokpatterns)
- [FNV](#fnv)
- [Format](#format)
- [GetXML](#getxml)
- [Hex](#hex)
- [Hour](#hour)
- [Hours](#hours)
Expand Down Expand Up @@ -742,6 +743,37 @@ Examples:
- `Format("%04d-%02d-%02d", [Year(Now()), Month(Now()), Day(Now())])`
- `Format("%s/%s/%04d-%02d-%02d.log", [attributes["hostname"], body["program"], Year(Now()), Month(Now()), Day(Now())])`


### GetXML

`GetXML(target, xpath)`

The `GetXML` Converter returns an XML string with selected elements.

`target` is a Getter that returns a string. This string should be in XML format.
If `target` is not a string, nil, or is not valid xml, `GetXML` will return an error.

`xpath` is a string that specifies an [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expression that
selects one or more elements. Currently, this converter only supports selecting elements.

Examples:

Get all elements at the root of the document with tag "a"

- `GetXML(body, "/a")`

Gel all elements anywhere in the document with tag "a"

- `GetXML(body, "//a")`

Get the first element at the root of the document with tag "a"

- `GetXML(body, "/a[1]")`

Get all elements in the document with tag "a" that have an attribute "b" with value "c"

- `GetXML(body, "//a[@b='c']")`

### Hex

`Hex(value)`
Expand Down
62 changes: 62 additions & 0 deletions pkg/ottl/ottlfuncs/func_get_xml.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"context"
"fmt"

"github.com/antchfx/xmlquery"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

type GetXMLArguments[K any] struct {
Target ottl.StringGetter[K]
XPath string
}

func NewGetXMLFactory[K any]() ottl.Factory[K] {
return ottl.NewFactory("GetXML", &GetXMLArguments[K]{}, createGetXMLFunction[K])
}

func createGetXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
args, ok := oArgs.(*GetXMLArguments[K])

if !ok {
return nil, fmt.Errorf("GetXML args must be of type *GetXMLAguments[K]")
}

if err := validateXPath(args.XPath); err != nil {
return nil, err
}

return getXML(args.Target, args.XPath), nil
}

// getXML returns a XML formatted string that is a result of matching elements from the target XML.
func getXML[K any](target ottl.StringGetter[K], xPath string) ottl.ExprFunc[K] {
return func(ctx context.Context, tCtx K) (any, error) {
var doc *xmlquery.Node
if targetVal, err := target.Get(ctx, tCtx); err != nil {
return nil, err
} else if doc, err = parseNodesXML(targetVal); err != nil {
return nil, err
}

nodes, err := xmlquery.QueryAll(doc, xPath)
if err != nil {
return nil, err
}

result := &xmlquery.Node{Type: xmlquery.DocumentNode}
for _, n := range nodes {
if n.Type != xmlquery.ElementNode {
continue
}
xmlquery.AddChild(result, n)
}
return result.OutputXML(false), nil
}
}
144 changes: 144 additions & 0 deletions pkg/ottl/ottlfuncs/func_get_xml_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"context"
"testing"

"github.com/stretchr/testify/assert"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

func Test_GetXML(t *testing.T) {
tests := []struct {
name string
document string
xPath string
want string
}{
{
name: "get single element",
document: `<a><b/></a>`,
xPath: "/a/b",
want: `<b></b>`,
},
{
name: "get single complex element",
document: `<a foo="bar"><b>hello</b></a>`,
xPath: "/a",
want: `<a foo="bar"><b>hello</b></a>`,
},
{
name: "get uniform elements from same parent",
document: `<a><b>hello</b><b>world</b></a>`,
xPath: "/a/b",
want: `<b>hello</b><b>world</b>`,
},
{
name: "get nonuniform elements from same parent",
document: `<a><b>hello</b><b><c>world</c></b><d/></a>`,
xPath: "/a/*",
want: `<b>hello</b><b><c>world</c></b><d></d>`,
},
{
name: "get elements from various places",
document: `<a><x>1</x><b><x>2</x></b><d><e><f><x>3</x></f></e></d></a>`,
xPath: "/a//x",
want: `<x>1</x><x>2</x><x>3</x>`,
},
{
name: "get filtered elements from various places",
document: `<a><x env="prod">1</x><b><x env="dev">2</x></b><d><e><f><x env="prod">3</x></f></e></d></a>`,
xPath: "/a//x[@env='prod']",
want: `<x env="prod">1</x><x env="prod">3</x>`,
},
{
name: "ignore empty",
document: ``,
xPath: "/",
want: ``,
},
{
name: "ignore declaration",
document: `<?xml version="1.0" encoding="UTF-8"?><a></a>`,
xPath: "/*",
want: `<a></a>`,
},
{
name: "ignore comments",
document: `<!-- comment --><a></a><!-- comment -->`,
xPath: "/*",
want: `<a></a>`,
},
{
name: "ignore attribute selection",
document: `<a foo="bar"></a>`,
xPath: "/@foo",
want: ``,
},
{
name: "ignore text selection",
document: `<a>hello</a>`,
xPath: "/a/text()",
want: ``,
},
{
name: "ignore chardata selection",
document: `<a><![CDATA[hello]]></a>`,
xPath: "/a/text()",
want: ``,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
factory := NewGetXMLFactory[any]()
exprFunc, err := factory.CreateFunction(
ottl.FunctionContext{},
&GetXMLArguments[any]{
Target: ottl.StandardStringGetter[any]{
Getter: func(_ context.Context, _ any) (any, error) {
return tt.document, nil
},
},
XPath: tt.xPath,
})
assert.NoError(t, err)

result, err := exprFunc(context.Background(), nil)
assert.NoError(t, err)
assert.Equal(t, tt.want, result)
})
}
}

func TestCreateGetXMLFunc(t *testing.T) {
factory := NewGetXMLFactory[any]()
fCtx := ottl.FunctionContext{}

// Invalid arg type
exprFunc, err := factory.CreateFunction(fCtx, nil)
assert.Error(t, err)
assert.Nil(t, exprFunc)

// Invalid XPath should error on function creation
exprFunc, err = factory.CreateFunction(
fCtx, &GetXMLArguments[any]{
XPath: "!",
})
assert.Error(t, err)
assert.Nil(t, exprFunc)

// Invalid XML should error on function execution
exprFunc, err = factory.CreateFunction(
fCtx, &GetXMLArguments[any]{
Target: invalidXMLGetter(),
XPath: "/",
})
assert.NoError(t, err)
assert.NotNil(t, exprFunc)
_, err = exprFunc(context.Background(), nil)
assert.Error(t, err)
}
1 change: 1 addition & 0 deletions pkg/ottl/ottlfuncs/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func converters[K any]() []ottl.Factory[K] {
NewExtractPatternsFactory[K](),
NewExtractGrokPatternsFactory[K](),
NewFnvFactory[K](),
NewGetXMLFactory[K](),
NewHourFactory[K](),
NewHoursFactory[K](),
NewIntFactory[K](),
Expand Down

0 comments on commit 70b26af

Please sign in to comment.