diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index d809be9498d..3a4fa0f0d6e 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -240,6 +240,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Add Kerberos support to Kafka input and output. {pull}16781[16781] - Update supported versions of `redis` output. {pull}17198[17198] - Update documentation for system.process.memory fields to include clarification on Windows os's. {pull}17268[17268] +- Add `urldecode` processor to for decoding URL-encoded fields. {pull}17505[17505] *Auditbeat* diff --git a/auditbeat/auditbeat.reference.yml b/auditbeat/auditbeat.reference.yml index 7db29b96a2c..b2258d22288 100644 --- a/auditbeat/auditbeat.reference.yml +++ b/auditbeat/auditbeat.reference.yml @@ -378,6 +378,16 @@ auditbeat.modules: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/filebeat/filebeat.reference.yml b/filebeat/filebeat.reference.yml index 6047f09a704..798457b1532 100644 --- a/filebeat/filebeat.reference.yml +++ b/filebeat/filebeat.reference.yml @@ -1084,6 +1084,16 @@ filebeat.inputs: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/filebeat/tests/system/test_processors.py b/filebeat/tests/system/test_processors.py index dae4698a0c9..1dc0b975138 100644 --- a/filebeat/tests/system/test_processors.py +++ b/filebeat/tests/system/test_processors.py @@ -302,6 +302,32 @@ def test_decode_csv_fields_all_options(self): ["42", "hello world", "string\twith tabs and \"broken\" quotes"], ]) + def test_urldecode_defaults(self): + """ + Check URL-decoding using defaults + """ + self.render_config_template( + path=os.path.abspath(self.working_dir) + "/test.log", + processors=[{ + "urldecode": { + "fields": [{ + "from": "message", + "to": "decoded" + }] + }, + }] + ) + + self._init_and_read_test_input([ + "correct data\n", + "correct%20data\n", + ]) + + self._assert_expected_lines([ + "correct data", + "correct data", + ], field="decoded") + def test_javascript_processor_add_host_metadata(self): """ Check JS processor with add_host_metadata diff --git a/heartbeat/heartbeat.reference.yml b/heartbeat/heartbeat.reference.yml index ccb155c00b2..9e032e8b326 100644 --- a/heartbeat/heartbeat.reference.yml +++ b/heartbeat/heartbeat.reference.yml @@ -522,6 +522,16 @@ heartbeat.scheduler: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/journalbeat/journalbeat.reference.yml b/journalbeat/journalbeat.reference.yml index efb984f9708..8555b49eecb 100644 --- a/journalbeat/journalbeat.reference.yml +++ b/journalbeat/journalbeat.reference.yml @@ -316,6 +316,16 @@ setup.template.settings: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/libbeat/_meta/config.reference.yml.tmpl b/libbeat/_meta/config.reference.yml.tmpl index 5f60ae8f5f8..bce27f64055 100644 --- a/libbeat/_meta/config.reference.yml.tmpl +++ b/libbeat/_meta/config.reference.yml.tmpl @@ -259,6 +259,16 @@ # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/libbeat/cmd/instance/imports_common.go b/libbeat/cmd/instance/imports_common.go index 5e993f6a2a0..a2b2569d61c 100644 --- a/libbeat/cmd/instance/imports_common.go +++ b/libbeat/cmd/instance/imports_common.go @@ -36,5 +36,6 @@ import ( _ "github.com/elastic/beats/v7/libbeat/processors/fingerprint" _ "github.com/elastic/beats/v7/libbeat/processors/registered_domain" _ "github.com/elastic/beats/v7/libbeat/processors/translate_sid" + _ "github.com/elastic/beats/v7/libbeat/processors/urldecode" _ "github.com/elastic/beats/v7/libbeat/publisher/includes" // Register publisher pipeline modules ) diff --git a/libbeat/docs/processors-list.asciidoc b/libbeat/docs/processors-list.asciidoc index bebffb9d30c..169657aecc4 100644 --- a/libbeat/docs/processors-list.asciidoc +++ b/libbeat/docs/processors-list.asciidoc @@ -101,6 +101,9 @@ endif::[] ifndef::no_translate_sid_processor[] * <> endif::[] +ifndef::no_urldecode_processor[] +* <> +endif::[] //# end::processors-list[] //# tag::processors-include[] @@ -204,5 +207,8 @@ endif::[] ifndef::no_translate_sid_processor[] include::{libbeat-processors-dir}/translate_sid/docs/translate_sid.asciidoc[] endif::[] +ifndef::no_urldecode_processor[] +include::{libbeat-processors-dir}/urldecode/docs/urldecode.asciidoc[] +endif::[] //# end::processors-include[] diff --git a/libbeat/processors/urldecode/docs/urldecode.asciidoc b/libbeat/processors/urldecode/docs/urldecode.asciidoc new file mode 100644 index 00000000000..6a544749d2c --- /dev/null +++ b/libbeat/processors/urldecode/docs/urldecode.asciidoc @@ -0,0 +1,38 @@ +[[urldecode]] +=== URL Decode + +++++ +urldecode +++++ + +The `urldecode` processor specifies a list of fields to decode from URL encoded format. Under the `fields` +key, each entry contains a `from: source-field` and a `to: target-field` pair, where: + +* `from` is the source field name +* `to` is the target field name (defaults to the `from` value) + +[source,yaml] +------- +processors: +- urldecode: + fields: + - from: "field1" + to: "field2" + ignore_missing: false + fail_on_error: true +------- + +In the example above: + +- field1 is decoded in field2 + +The `urldecode` processor has the following configuration settings: + +`ignore_missing`:: (Optional) If set to true, no error is logged in case a key +which should be URL-decoded is missing. Default is `false`. + +`fail_on_error`:: (Optional) If set to true, in case of an error the URL-decoding +of fields is stopped and the original event is returned. If set to false, decoding +continues also if an error happened during decoding. Default is `true`. + +See <> for a list of supported conditions. diff --git a/libbeat/processors/urldecode/urldecode.go b/libbeat/processors/urldecode/urldecode.go new file mode 100644 index 00000000000..6fc9cb8386d --- /dev/null +++ b/libbeat/processors/urldecode/urldecode.go @@ -0,0 +1,130 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package urldecode + +import ( + "fmt" + "net/url" + + "github.com/pkg/errors" + + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" + "github.com/elastic/beats/v7/libbeat/processors" + "github.com/elastic/beats/v7/libbeat/processors/checks" + jsprocessor "github.com/elastic/beats/v7/libbeat/processors/script/javascript/module/processor" +) + +type urlDecode struct { + config urlDecodeConfig + log *logp.Logger +} + +type urlDecodeConfig struct { + Fields []fromTo `config:"fields" validate:"required"` + IgnoreMissing bool `config:"ignore_missing"` + FailOnError bool `config:"fail_on_error"` +} + +type fromTo struct { + From string `config:"from" validate:"required"` + To string `config:"to"` +} + +func init() { + processors.RegisterPlugin("urldecode", + checks.ConfigChecked(New, + checks.RequireFields("fields"), + checks.AllowedFields("fields", "ignore_missing", "fail_on_error"))) + jsprocessor.RegisterPlugin("URLDecode", New) +} + +func New(c *common.Config) (processors.Processor, error) { + config := urlDecodeConfig{ + IgnoreMissing: false, + FailOnError: true, + } + + if err := c.Unpack(&config); err != nil { + return nil, fmt.Errorf("failed to unpack the configuration of urldecode processor: %s", err) + } + + return &urlDecode{ + config: config, + log: logp.NewLogger("urldecode"), + }, nil + +} + +func (p *urlDecode) Run(event *beat.Event) (*beat.Event, error) { + var backup common.MapStr + if p.config.FailOnError { + backup = event.Fields.Clone() + } + + for _, field := range p.config.Fields { + err := p.decodeField(field.From, field.To, event) + if err != nil { + errMsg := fmt.Errorf("failed to decode fields in urldecode processor: %v", err) + p.log.Debug(errMsg.Error()) + if p.config.FailOnError { + event.Fields = backup + event.PutValue("error.message", errMsg.Error()) + return event, err + } + } + } + + return event, nil +} + +func (p *urlDecode) decodeField(from string, to string, event *beat.Event) error { + value, err := event.GetValue(from) + if err != nil { + if p.config.IgnoreMissing && errors.Cause(err) == common.ErrKeyNotFound { + return nil + } + return fmt.Errorf("could not fetch value for key: %s, Error: %v", from, err) + } + + encodedString, ok := value.(string) + if !ok { + return fmt.Errorf("invalid type for `from`, expecting a string received %T", value) + } + + decodedData, err := url.QueryUnescape(encodedString) + if err != nil { + return fmt.Errorf("error trying to URL-decode %s: %v", encodedString, err) + } + + target := to + if to == "" { + target = from + } + + if _, err := event.PutValue(target, decodedData); err != nil { + return fmt.Errorf("could not put value: %s: %v, %v", decodedData, target, err) + } + + return nil +} + +func (p *urlDecode) String() string { + return "urldecode=" + fmt.Sprintf("%+v", p.config.Fields) +} diff --git a/libbeat/processors/urldecode/urldecode_test.go b/libbeat/processors/urldecode/urldecode_test.go new file mode 100644 index 00000000000..bf10244965d --- /dev/null +++ b/libbeat/processors/urldecode/urldecode_test.go @@ -0,0 +1,214 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package urldecode + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +func TestURLDecode(t *testing.T) { + var testCases = []struct { + description string + config urlDecodeConfig + Input common.MapStr + Output common.MapStr + error bool + }{ + { + description: "simple field urldecode", + config: urlDecodeConfig{ + Fields: []fromTo{{ + From: "field1", To: "field2", + }}, + IgnoreMissing: false, + FailOnError: true, + }, + Input: common.MapStr{ + "field1": "correct%20data", + }, + Output: common.MapStr{ + "field1": "correct%20data", + "field2": "correct data", + }, + error: false, + }, + { + description: "simple multiple fields urldecode", + config: urlDecodeConfig{ + Fields: []fromTo{ + {From: "field1", To: "field2"}, + {From: "field3", To: "field4"}, + }, + IgnoreMissing: false, + FailOnError: true, + }, + Input: common.MapStr{ + "field1": "correct%20field1", + "field3": "correct%20field3", + }, + Output: common.MapStr{ + "field1": "correct%20field1", + "field2": "correct field1", + "field3": "correct%20field3", + "field4": "correct field3", + }, + error: false, + }, + { + description: "simple field urldecode To empty", + config: urlDecodeConfig{ + Fields: []fromTo{{ + From: "field1", To: "", + }}, + IgnoreMissing: false, + FailOnError: true, + }, + Input: common.MapStr{ + "field1": "correct%20data", + }, + Output: common.MapStr{ + "field1": "correct data", + }, + error: false, + }, + { + description: "simple field urldecode from and to equals", + config: urlDecodeConfig{ + Fields: []fromTo{{ + From: "field1", To: "field1", + }}, + IgnoreMissing: false, + FailOnError: true, + }, + Input: common.MapStr{ + "field1": "correct%20data", + }, + Output: common.MapStr{ + "field1": "correct data", + }, + error: false, + }, + { + description: "simple field bad data - fail on error", + config: urlDecodeConfig{ + Fields: []fromTo{{ + From: "field1", To: "field1", + }}, + IgnoreMissing: false, + FailOnError: true, + }, + Input: common.MapStr{ + "field1": "Hello G%ünter", + }, + Output: common.MapStr{ + "field1": "Hello G%ünter", + "error": common.MapStr{ + "message": "failed to decode fields in urldecode processor: error trying to URL-decode Hello G%ünter: invalid URL escape \"%ü\"", + }, + }, + error: true, + }, + { + description: "simple field bad data fail on error false", + config: urlDecodeConfig{ + Fields: []fromTo{{ + From: "field1", To: "field1", + }}, + IgnoreMissing: false, + FailOnError: false, + }, + Input: common.MapStr{ + "field1": "Hello G%ünter", + }, + Output: common.MapStr{ + "field1": "Hello G%ünter", + }, + error: false, + }, + { + description: "missing field", + config: urlDecodeConfig{ + Fields: []fromTo{{ + From: "field2", To: "field3", + }}, + IgnoreMissing: false, + FailOnError: true, + }, + Input: common.MapStr{ + "field1": "correct%20data", + }, + Output: common.MapStr{ + "field1": "correct%20data", + "error": common.MapStr{ + "message": "failed to decode fields in urldecode processor: could not fetch value for key: field2, Error: key not found", + }, + }, + error: true, + }, + { + description: "missing field ignore", + config: urlDecodeConfig{ + Fields: []fromTo{{ + From: "field2", To: "field3", + }}, + IgnoreMissing: true, + FailOnError: true, + }, + Input: common.MapStr{ + "field1": "correct%20data", + }, + Output: common.MapStr{ + "field1": "correct%20data", + }, + error: false, + }, + } + + for _, test := range testCases { + test := test + t.Run(test.description, func(t *testing.T) { + //t.Parallel() + + f := &urlDecode{ + log: logp.NewLogger("urldecode"), + config: test.config, + } + + event := &beat.Event{ + Fields: test.Input, + } + + newEvent, err := f.Run(event) + if !test.error { + assert.Nil(t, err) + } else { + assert.NotNil(t, err) + } + + assert.Equal(t, test.Output, newEvent.Fields) + + }) + } + +} diff --git a/metricbeat/metricbeat.reference.yml b/metricbeat/metricbeat.reference.yml index 0f2954298da..f8eced27245 100644 --- a/metricbeat/metricbeat.reference.yml +++ b/metricbeat/metricbeat.reference.yml @@ -1114,6 +1114,16 @@ metricbeat.modules: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/packetbeat/packetbeat.reference.yml b/packetbeat/packetbeat.reference.yml index 7960abdab60..48581265a02 100644 --- a/packetbeat/packetbeat.reference.yml +++ b/packetbeat/packetbeat.reference.yml @@ -805,6 +805,16 @@ packetbeat.ignore_outgoing: false # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/winlogbeat/winlogbeat.reference.yml b/winlogbeat/winlogbeat.reference.yml index c3b115c4272..043655dc907 100644 --- a/winlogbeat/winlogbeat.reference.yml +++ b/winlogbeat/winlogbeat.reference.yml @@ -301,6 +301,16 @@ winlogbeat.event_logs: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/x-pack/auditbeat/auditbeat.reference.yml b/x-pack/auditbeat/auditbeat.reference.yml index a64b66f4767..fb635aa636e 100644 --- a/x-pack/auditbeat/auditbeat.reference.yml +++ b/x-pack/auditbeat/auditbeat.reference.yml @@ -434,6 +434,16 @@ auditbeat.modules: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index 21397e876d1..e6d8f8ca773 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -1775,6 +1775,16 @@ filebeat.inputs: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/x-pack/functionbeat/functionbeat.reference.yml b/x-pack/functionbeat/functionbeat.reference.yml index 1424fa868aa..0ee90e6742b 100644 --- a/x-pack/functionbeat/functionbeat.reference.yml +++ b/x-pack/functionbeat/functionbeat.reference.yml @@ -644,6 +644,16 @@ functionbeat.provider.gcp.functions: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/x-pack/metricbeat/metricbeat.reference.yml b/x-pack/metricbeat/metricbeat.reference.yml index 25ad9a2af29..2f35699f5ab 100644 --- a/x-pack/metricbeat/metricbeat.reference.yml +++ b/x-pack/metricbeat/metricbeat.reference.yml @@ -1523,6 +1523,16 @@ metricbeat.modules: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ================================== diff --git a/x-pack/winlogbeat/winlogbeat.reference.yml b/x-pack/winlogbeat/winlogbeat.reference.yml index 437a7aaa676..dffdab31f66 100644 --- a/x-pack/winlogbeat/winlogbeat.reference.yml +++ b/x-pack/winlogbeat/winlogbeat.reference.yml @@ -304,6 +304,16 @@ winlogbeat.event_logs: # max_bytes: 1024 # fail_on_error: false # ignore_missing: true +# +# The following example URL-decodes the value of field1 to field2 +# +#processors: +#- urldecode: +# fields: +# - from: "field1" +# to: "field2" +# ignore_missing: false +# fail_on_error: true #============================= Elastic Cloud ==================================