From dc98361e356434461f920e82964c57deb43667c9 Mon Sep 17 00:00:00 2001 From: Lee Hinman <57081003+leehinman@users.noreply.github.com> Date: Tue, 25 Feb 2020 08:39:14 -0600 Subject: [PATCH] Improve ECS field mapping in haproxy module (#16529) - event.category - event.kind - event.outcome - event.type - related.ip - switch haproxy pipeline to yaml Closes #16162 --- CHANGELOG.next.asciidoc | 1 + .../module/haproxy/log/ingest/pipeline.json | 146 ------------------ .../module/haproxy/log/ingest/pipeline.yml | 138 +++++++++++++++++ filebeat/module/haproxy/log/manifest.yml | 2 +- .../log/test/default.log-expected.json | 12 ++ .../log/test/haproxy.log-expected.json | 8 + .../test/httplog-no-headers.log-expected.json | 24 +++ .../haproxy/log/test/tcplog.log-expected.json | 4 + 8 files changed, 188 insertions(+), 147 deletions(-) delete mode 100644 filebeat/module/haproxy/log/ingest/pipeline.json create mode 100644 filebeat/module/haproxy/log/ingest/pipeline.yml diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index ef7c8514f3e..c5b6f756e96 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -136,6 +136,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Improve ECS categorization, container & process field mappings in auditd module. {issue}16153[16153] {pull}16280[16280] - Improve ECS field mappings in aws module. {issue}16154[16154] {pull}16307[16307] - Improve ECS categorization field mappings in googlecloud module. {issue}16030[16030] {pull}16500[16500] +- Improve ECS field mappings in haproxy module. {issue}16162[16162] {pull}16529[16529] *Heartbeat* diff --git a/filebeat/module/haproxy/log/ingest/pipeline.json b/filebeat/module/haproxy/log/ingest/pipeline.json deleted file mode 100644 index 3f215a9a13a..00000000000 --- a/filebeat/module/haproxy/log/ingest/pipeline.json +++ /dev/null @@ -1,146 +0,0 @@ -{ - "description": "Pipeline for parsing HAProxy http, tcp and default logs. Requires the geoip plugin.", - "processors": [ - { - "grok": { - "field": "message", - "patterns": [ - "%{HAPROXY_DATE:haproxy.request_date} %{IPORHOST:haproxy.source} %{PROG:process.name}(?:\\[%{POSINT:process.pid:long}\\])?: %{GREEDYDATA} %{IPORHOST:source.address}:%{POSINT:source.port:long} %{WORD} %{IPORHOST:destination.ip}:%{POSINT:destination.port:long} \\(%{WORD:haproxy.frontend_name}/%{WORD:haproxy.mode}\\)", - - "(%{NOTSPACE:process.name}\\[%{NUMBER:process.pid:long}\\]: )?%{IP:source.address}:%{NUMBER:source.port:long} \\[%{NOTSPACE:haproxy.request_date}\\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/%{NOTSPACE:haproxy.server_name} %{NUMBER:haproxy.http.request.time_wait_ms:long}/%{NUMBER:haproxy.total_waiting_time_ms:long}/%{NUMBER:haproxy.connection_wait_time_ms:long}/%{NUMBER:haproxy.http.request.time_wait_without_data_ms:long}/%{NUMBER:temp.duration:long} %{NUMBER:http.response.status_code:long} %{NUMBER:haproxy.bytes_read:long} %{NOTSPACE:haproxy.http.request.captured_cookie} %{NOTSPACE:haproxy.http.response.captured_cookie} %{NOTSPACE:haproxy.termination_state} %{NUMBER:haproxy.connections.active:long}/%{NUMBER:haproxy.connections.frontend:long}/%{NUMBER:haproxy.connections.backend:long}/%{NUMBER:haproxy.connections.server:long}/%{NUMBER:haproxy.connections.retries:long} %{NUMBER:haproxy.server_queue:long}/%{NUMBER:haproxy.backend_queue:long} (\\{%{DATA:haproxy.http.request.captured_headers}\\} \\{%{DATA:haproxy.http.response.captured_headers}\\} |\\{%{DATA}\\} )?\"%{GREEDYDATA:haproxy.http.request.raw_request_line}\"", - - "(%{NOTSPACE:process.name}\\[%{NUMBER:process.pid:long}\\]: )?%{IP:source.address}:%{NUMBER:source.port:long} \\[%{NOTSPACE:haproxy.request_date}\\] %{NOTSPACE:haproxy.frontend_name}/%{NOTSPACE:haproxy.bind_name} %{GREEDYDATA:haproxy.error_message}", - - "%{HAPROXY_DATE} %{IPORHOST:haproxy.source} (%{NOTSPACE:process.name}\\[%{NUMBER:process.pid:long}\\]: )?%{IP:source.address}:%{NUMBER:source.port:long} \\[%{NOTSPACE:haproxy.request_date}\\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/%{NOTSPACE:haproxy.server_name} %{NUMBER:haproxy.total_waiting_time_ms:long}/%{NUMBER:haproxy.connection_wait_time_ms:long}/%{NUMBER:temp.duration:long} %{NUMBER:haproxy.bytes_read:long} %{NOTSPACE:haproxy.termination_state} %{NUMBER:haproxy.connections.active:long}/%{NUMBER:haproxy.connections.frontend:long}/%{NUMBER:haproxy.connections.backend:long}/%{NUMBER:haproxy.connections.server:long}/%{NUMBER:haproxy.connections.retries:long} %{NUMBER:haproxy.server_queue:long}/%{NUMBER:haproxy.backend_queue:long}" - ], - "ignore_missing": false, - "pattern_definitions": { - "HAPROXY_DATE": "(%{MONTHDAY}[/-]%{MONTH}[/-]%{YEAR}:%{HOUR}:%{MINUTE}:%{SECOND})|%{SYSLOGTIMESTAMP}" - } - } - }, - { - "date": { - "if" : "ctx.event.timezone == null", - "field": "haproxy.request_date", - "target_field": "@timestamp", - "formats": [ - "dd/MMM/yyyy:HH:mm:ss.SSS", - "MMM dd HH:mm:ss" - ], - "on_failure": [{"append": {"field": "error.message", "value": "{{ _ingest.on_failure_message }}"}}] - } - }, - { - "date": { - "if" : "ctx.event.timezone != null", - "field": "haproxy.request_date", - "target_field": "@timestamp", - "formats": [ - "dd/MMM/yyyy:HH:mm:ss.SSS", - "MMM dd HH:mm:ss" - ], - "timezone" : "{{ event.timezone }}", - "on_failure": [{"append": {"field": "error.message", "value": "{{ _ingest.on_failure_message }}"}}] - } - }, - { - "remove": { - "field": "haproxy.request_date" - } - }, - { - "remove": { - "field": "message" - } - }, - { - "grok": { - "field": "source.address", - "ignore_failure": true, - "patterns": [ - "^%{IP:source.ip}$" - ] - } - }, - { - "geoip": { - "field": "source.ip", - "target_field": "source.geo", - "ignore_missing": true - } - }, - { - "geoip": { - "database_file": "GeoLite2-ASN.mmdb", - "field": "source.ip", - "target_field": "source.as", - "properties": [ - "asn", - "organization_name" - ], - "ignore_missing": true - } - }, - { - "rename": { - "field": "source.as.asn", - "target_field": "source.as.number", - "ignore_missing": true - } - }, - { - "rename": { - "field": "source.as.organization_name", - "target_field": "source.as.organization.name", - "ignore_missing": true - } - }, - { - "split": { - "field": "haproxy.http.request.captured_headers", - "separator": "\\|", - "ignore_failure": true - } - }, - { - "split": { - "field": "haproxy.http.response.captured_headers", - "separator": "\\|", - "ignore_failure": true - } - }, - - { - "script": { - "lang": "painless", - "source": "ctx.event.duration = Math.round(ctx.temp.duration * params.scale)", - "params": { "scale": 1000000 }, - "if": "ctx.temp?.duration != null" - } - }, - { - "remove": { - "field": "temp.duration", - "ignore_missing": true - } - }, - - { - "convert": { - "field": "haproxy.bytes_read", - "target_field": "http.response.bytes", - "type": "long", - "if": "ctx.containsKey('http')" - } - } - ], - "on_failure": [ - { - "set": { - "field": "error.message", - "value": "{{ _ingest.on_failure_message }}" - } - } - ] -} diff --git a/filebeat/module/haproxy/log/ingest/pipeline.yml b/filebeat/module/haproxy/log/ingest/pipeline.yml new file mode 100644 index 00000000000..fdcfc828701 --- /dev/null +++ b/filebeat/module/haproxy/log/ingest/pipeline.yml @@ -0,0 +1,138 @@ +description: Pipeline for parsing HAProxy http, tcp and default logs. Requires the + geoip plugin. +processors: +- grok: + field: message + patterns: + - '%{HAPROXY_DATE:haproxy.request_date} %{IPORHOST:haproxy.source} %{PROG:process.name}(?:\[%{POSINT:process.pid:long}\])?: + %{GREEDYDATA} %{IPORHOST:source.address}:%{POSINT:source.port:long} %{WORD} + %{IPORHOST:destination.ip}:%{POSINT:destination.port:long} \(%{WORD:haproxy.frontend_name}/%{WORD:haproxy.mode}\)' + - '(%{NOTSPACE:process.name}\[%{NUMBER:process.pid:long}\]: )?%{IP:source.address}:%{NUMBER:source.port:long} + \[%{NOTSPACE:haproxy.request_date}\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/%{NOTSPACE:haproxy.server_name} + %{NUMBER:haproxy.http.request.time_wait_ms:long}/%{NUMBER:haproxy.total_waiting_time_ms:long}/%{NUMBER:haproxy.connection_wait_time_ms:long}/%{NUMBER:haproxy.http.request.time_wait_without_data_ms:long}/%{NUMBER:temp.duration:long} + %{NUMBER:http.response.status_code:long} %{NUMBER:haproxy.bytes_read:long} %{NOTSPACE:haproxy.http.request.captured_cookie} + %{NOTSPACE:haproxy.http.response.captured_cookie} %{NOTSPACE:haproxy.termination_state} + %{NUMBER:haproxy.connections.active:long}/%{NUMBER:haproxy.connections.frontend:long}/%{NUMBER:haproxy.connections.backend:long}/%{NUMBER:haproxy.connections.server:long}/%{NUMBER:haproxy.connections.retries:long} + %{NUMBER:haproxy.server_queue:long}/%{NUMBER:haproxy.backend_queue:long} (\{%{DATA:haproxy.http.request.captured_headers}\} + \{%{DATA:haproxy.http.response.captured_headers}\} |\{%{DATA}\} )?"%{GREEDYDATA:haproxy.http.request.raw_request_line}"' + - '(%{NOTSPACE:process.name}\[%{NUMBER:process.pid:long}\]: )?%{IP:source.address}:%{NUMBER:source.port:long} + \[%{NOTSPACE:haproxy.request_date}\] %{NOTSPACE:haproxy.frontend_name}/%{NOTSPACE:haproxy.bind_name} + %{GREEDYDATA:haproxy.error_message}' + - '%{HAPROXY_DATE} %{IPORHOST:haproxy.source} (%{NOTSPACE:process.name}\[%{NUMBER:process.pid:long}\]: + )?%{IP:source.address}:%{NUMBER:source.port:long} \[%{NOTSPACE:haproxy.request_date}\] + %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/%{NOTSPACE:haproxy.server_name} + %{NUMBER:haproxy.total_waiting_time_ms:long}/%{NUMBER:haproxy.connection_wait_time_ms:long}/%{NUMBER:temp.duration:long} + %{NUMBER:haproxy.bytes_read:long} %{NOTSPACE:haproxy.termination_state} %{NUMBER:haproxy.connections.active:long}/%{NUMBER:haproxy.connections.frontend:long}/%{NUMBER:haproxy.connections.backend:long}/%{NUMBER:haproxy.connections.server:long}/%{NUMBER:haproxy.connections.retries:long} + %{NUMBER:haproxy.server_queue:long}/%{NUMBER:haproxy.backend_queue:long}' + ignore_missing: false + pattern_definitions: + HAPROXY_DATE: (%{MONTHDAY}[/-]%{MONTH}[/-]%{YEAR}:%{HOUR}:%{MINUTE}:%{SECOND})|%{SYSLOGTIMESTAMP} +- date: + if: ctx.event.timezone == null + field: haproxy.request_date + target_field: '@timestamp' + formats: + - dd/MMM/yyyy:HH:mm:ss.SSS + - MMM dd HH:mm:ss + on_failure: + - append: + field: error.message + value: '{{ _ingest.on_failure_message }}' +- date: + if: ctx.event.timezone != null + field: haproxy.request_date + target_field: '@timestamp' + formats: + - dd/MMM/yyyy:HH:mm:ss.SSS + - MMM dd HH:mm:ss + timezone: '{{ event.timezone }}' + on_failure: + - append: + field: error.message + value: '{{ _ingest.on_failure_message }}' +- remove: + field: haproxy.request_date +- remove: + field: message +- grok: + field: source.address + ignore_failure: true + patterns: + - ^%{IP:source.ip}$ +- geoip: + field: source.ip + target_field: source.geo + ignore_missing: true +- geoip: + database_file: GeoLite2-ASN.mmdb + field: source.ip + target_field: source.as + properties: + - asn + - organization_name + ignore_missing: true +- rename: + field: source.as.asn + target_field: source.as.number + ignore_missing: true +- rename: + field: source.as.organization_name + target_field: source.as.organization.name + ignore_missing: true +- split: + field: haproxy.http.request.captured_headers + separator: \| + ignore_failure: true +- split: + field: haproxy.http.response.captured_headers + separator: \| + ignore_failure: true +- script: + lang: painless + source: ctx.event.duration = Math.round(ctx.temp.duration * params.scale) + params: + scale: 1000000 + if: ctx.temp?.duration != null +- remove: + field: temp.duration + ignore_missing: true +- convert: + field: haproxy.bytes_read + target_field: http.response.bytes + type: long + if: ctx.containsKey('http') +- append: + field: related.ip + value: "{{source.ip}}" + if: "ctx?.source?.ip != null" +- append: + field: related.ip + value: "{{destination.ip}}" + if: "ctx?.destination?.ip != null" +- set: + field: event.kind + value: event +- append: + field: event.category + value: web + if: "ctx?.haproxy?.mode == 'HTTP' || ctx?.haproxy?.http != null" +- append: + field: event.category + value: network + if: "ctx?.source.ip != null && ctx?.destination?.ip != null" +- append: + field: event.type + value: connection + if: "ctx?.source.ip != null && ctx?.destination?.ip != null" +- set: + field: event.outcome + value: success + if: "ctx?.http?.response?.status_code != null && ctx.http.response.status_code < 400" +- set: + field: event.outcome + value: failure + if: "ctx?.http?.response?.status_code != null && ctx.http.response.status_code >= 400" +on_failure: +- set: + field: error.message + value: '{{ _ingest.on_failure_message }}' diff --git a/filebeat/module/haproxy/log/manifest.yml b/filebeat/module/haproxy/log/manifest.yml index 399d029bd64..81bf46f9c49 100644 --- a/filebeat/module/haproxy/log/manifest.yml +++ b/filebeat/module/haproxy/log/manifest.yml @@ -11,5 +11,5 @@ var: - name: input default: syslog -ingest_pipeline: ingest/pipeline.json +ingest_pipeline: ingest/pipeline.yml input: config/{{.input}}.yml diff --git a/filebeat/module/haproxy/log/test/default.log-expected.json b/filebeat/module/haproxy/log/test/default.log-expected.json index a5f890bf5ad..f58515d6172 100644 --- a/filebeat/module/haproxy/log/test/default.log-expected.json +++ b/filebeat/module/haproxy/log/test/default.log-expected.json @@ -2,9 +2,17 @@ { "destination.ip": "1.2.3.4", "destination.port": 5000, + "event.category": [ + "web", + "network" + ], "event.dataset": "haproxy.log", + "event.kind": "event", "event.module": "haproxy", "event.timezone": "-02:00", + "event.type": [ + "connection" + ], "fileset.name": "log", "haproxy.frontend_name": "main", "haproxy.mode": "HTTP", @@ -13,6 +21,10 @@ "log.offset": 0, "process.name": "haproxy", "process.pid": 24551, + "related.ip": [ + "1.2.3.4", + "1.2.3.4" + ], "service.type": "haproxy", "source.address": "1.2.3.4", "source.geo.city_name": "Moscow", diff --git a/filebeat/module/haproxy/log/test/haproxy.log-expected.json b/filebeat/module/haproxy/log/test/haproxy.log-expected.json index a1385fcffe8..474b7a5e5d3 100644 --- a/filebeat/module/haproxy/log/test/haproxy.log-expected.json +++ b/filebeat/module/haproxy/log/test/haproxy.log-expected.json @@ -1,8 +1,13 @@ [ { + "event.category": [ + "web" + ], "event.dataset": "haproxy.log", "event.duration": 2000000, + "event.kind": "event", "event.module": "haproxy", + "event.outcome": "success", "event.timezone": "-02:00", "fileset.name": "log", "haproxy.backend_name": "docs_microservice", @@ -34,6 +39,9 @@ "log.offset": 0, "process.name": "haproxy", "process.pid": 32450, + "related.ip": [ + "1.2.3.4" + ], "service.type": "haproxy", "source.address": "1.2.3.4", "source.geo.city_name": "Moscow", diff --git a/filebeat/module/haproxy/log/test/httplog-no-headers.log-expected.json b/filebeat/module/haproxy/log/test/httplog-no-headers.log-expected.json index 7e2fb0b502c..560c271c94b 100644 --- a/filebeat/module/haproxy/log/test/httplog-no-headers.log-expected.json +++ b/filebeat/module/haproxy/log/test/httplog-no-headers.log-expected.json @@ -1,8 +1,13 @@ [ { + "event.category": [ + "web" + ], "event.dataset": "haproxy.log", "event.duration": 0, + "event.kind": "event", "event.module": "haproxy", + "event.outcome": "failure", "event.timezone": "-02:00", "fileset.name": "log", "haproxy.backend_name": "http-webservices", @@ -30,15 +35,23 @@ "log.offset": 0, "process.name": "haproxy", "process.pid": 19312, + "related.ip": [ + "127.0.0.1" + ], "service.type": "haproxy", "source.address": "127.0.0.1", "source.ip": "127.0.0.1", "source.port": 35982 }, { + "event.category": [ + "web" + ], "event.dataset": "haproxy.log", "event.duration": 0, + "event.kind": "event", "event.module": "haproxy", + "event.outcome": "failure", "event.timezone": "-02:00", "fileset.name": "log", "haproxy.backend_name": "http-webservices", @@ -66,15 +79,23 @@ "log.offset": 186, "process.name": "haproxy", "process.pid": 29785, + "related.ip": [ + "127.0.0.1" + ], "service.type": "haproxy", "source.address": "127.0.0.1", "source.ip": "127.0.0.1", "source.port": 43738 }, { + "event.category": [ + "web" + ], "event.dataset": "haproxy.log", "event.duration": 0, + "event.kind": "event", "event.module": "haproxy", + "event.outcome": "failure", "event.timezone": "-02:00", "fileset.name": "log", "haproxy.backend_name": "http-webservices", @@ -106,6 +127,9 @@ "log.offset": 394, "process.name": "haproxy", "process.pid": 7873, + "related.ip": [ + "127.0.0.1" + ], "service.type": "haproxy", "source.address": "127.0.0.1", "source.ip": "127.0.0.1", diff --git a/filebeat/module/haproxy/log/test/tcplog.log-expected.json b/filebeat/module/haproxy/log/test/tcplog.log-expected.json index 2b019bb9308..fc5395e5d38 100644 --- a/filebeat/module/haproxy/log/test/tcplog.log-expected.json +++ b/filebeat/module/haproxy/log/test/tcplog.log-expected.json @@ -2,6 +2,7 @@ { "event.dataset": "haproxy.log", "event.duration": 1000000, + "event.kind": "event", "event.module": "haproxy", "event.timezone": "-02:00", "fileset.name": "log", @@ -24,6 +25,9 @@ "log.offset": 0, "process.name": "haproxy", "process.pid": 25457, + "related.ip": [ + "127.0.0.1" + ], "service.type": "haproxy", "source.address": "127.0.0.1", "source.ip": "127.0.0.1",