Update suricata integration with wildcard fields

elastic · Jan 19, 2021 · c0b5d00 · c0b5d00
1 parent 20ea344
commit c0b5d00
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 91 deletions.
diff --git a/packages/suricata/data_stream/eve/fields/agent.yml b/packages/suricata/data_stream/eve/fields/agent.yml
@@ -2,16 +2,14 @@
   title: Cloud
   group: 2
   description: Fields related to the cloud or infrastructure the events are coming from.
-  footnote: 'Examples: If Metricbeat is running on an EC2 host and fetches data from its host, the cloud info contains the data about this machine. If Metricbeat runs on a remote machine outside the cloud and fetches data from a service running in the cloud, the field contains cloud data from the machine the service is running on.'
+  footnote: "Examples: If Metricbeat is running on an EC2 host and fetches data from its host, the cloud info contains the data about this machine. If Metricbeat runs on a remote machine outside the cloud and fetches data from a service running in the cloud, the field contains cloud data from the machine the service is running on."
   type: group
   fields:
     - name: account.id
       level: extended
       type: keyword
       ignore_above: 1024
-      description: 'The cloud account or organization id used to identify different entities in a multi-tenant environment.
-
-        Examples: AWS account id, Google Cloud ORG Id, or other unique identifier.'
+      description: "The cloud account or organization id used to identify different entities in a multi-tenant environment.\nExamples: AWS account id, Google Cloud ORG Id, or other unique identifier."
       example: 666777888999
     - name: availability_zone
       level: extended
@@ -57,9 +55,7 @@
 - name: container
   title: Container
   group: 2
-  description: 'Container fields are used for meta information about the specific container that is the source of information.
-
-    These fields help correlate data based containers from any runtime.'
+  description: "Container fields are used for meta information about the specific container that is the source of information.\nThese fields help correlate data based containers from any runtime."
   type: group
   fields:
     - name: id
@@ -85,9 +81,7 @@
 - name: host
   title: Host
   group: 2
-  description: 'A host is defined as a general computing instance.
-
-    ECS host.* fields should be populated with details about the host on which the event happened, or from which the measurement was taken. Host types include hardware, virtual machines, Docker containers, and Kubernetes nodes.'
+  description: "A host is defined as a general computing instance.\nECS host.* fields should be populated with details about the host on which the event happened, or from which the measurement was taken. Host types include hardware, virtual machines, Docker containers, and Kubernetes nodes."
   type: group
   fields:
     - name: architecture
@@ -100,27 +94,19 @@
       level: extended
       type: keyword
       ignore_above: 1024
-      description: 'Name of the domain of which the host is a member.
-
-        For example, on Windows this could be the host''s Active Directory domain or NetBIOS domain name. For Linux this could be the domain of the host''s LDAP provider.'
+      description: "Name of the domain of which the host is a member.\nFor example, on Windows this could be the host's Active Directory domain or NetBIOS domain name. For Linux this could be the domain of the host's LDAP provider."
       example: CONTOSO
       default_field: false
     - name: hostname
       level: core
-      type: keyword
+      type: wildcard
       ignore_above: 1024
-      description: 'Hostname of the host.
-
-        It normally contains what the `hostname` command returns on the host machine.'
+      description: "Hostname of the host.\nIt normally contains what the `hostname` command returns on the host machine."
     - name: id
       level: core
       type: keyword
       ignore_above: 1024
-      description: 'Unique host id.
-
-        As hostname is not always unique, use values that are meaningful in your environment.
-
-        Example: The current usage of `beat.name`.'
+      description: "Unique host id.\nAs hostname is not always unique, use values that are meaningful in your environment.\nExample: The current usage of `beat.name`."
     - name: ip
       level: core
       type: ip
@@ -134,9 +120,7 @@
       level: core
       type: keyword
       ignore_above: 1024
-      description: 'Name of the host.
-
-        It can contain what `hostname` returns on Unix systems, the fully qualified domain name, or a name specified by the user. The sender decides which value to use.'
+      description: "Name of the host.\nIt can contain what `hostname` returns on Unix systems, the fully qualified domain name, or a name specified by the user. The sender decides which value to use."
     - name: os.family
       level: extended
       type: keyword
@@ -151,7 +135,7 @@
       example: 4.4.0-112-generic
     - name: os.name
       level: extended
-      type: keyword
+      type: wildcard
       ignore_above: 1024
       multi_fields:
         - name: text
@@ -176,9 +160,7 @@
       level: core
       type: keyword
       ignore_above: 1024
-      description: 'Type of host.
-
-        For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment.'
+      description: "Type of host.\nFor Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment."
     - name: containerized
       type: boolean
       description: >

diff --git a/packages/suricata/data_stream/eve/fields/fields-epr.yml b/packages/suricata/data_stream/eve/fields/fields-epr.yml
@@ -1,56 +1,36 @@
 - name: event
   title: Event
   group: 2
-  description: 'The event fields are used for context information about the log or metric event itself.
-
-    A log is defined as an event containing details of something that happened. Log events must include the time at which the thing happened. Examples of log events include a process starting on a host, a network packet being sent from a source to a destination, or a network connection between a client and a server being initiated or closed. A metric is defined as an event containing one or more numerical measurements and the time at which the measurement was taken. Examples of metric events include memory pressure measured on a host and device temperature. See the `event.kind` definition in this section for additional details about metric and state events.'
+  description: "The event fields are used for context information about the log or metric event itself.\nA log is defined as an event containing details of something that happened. Log events must include the time at which the thing happened. Examples of log events include a process starting on a host, a network packet being sent from a source to a destination, or a network connection between a client and a server being initiated or closed. A metric is defined as an event containing one or more numerical measurements and the time at which the measurement was taken. Examples of metric events include memory pressure measured on a host and device temperature. See the `event.kind` definition in this section for additional details about metric and state events."
   type: group
   fields:
     - name: created
       level: core
       type: date
-      description: 'event.created contains the date/time when the event was first read by an agent, or by your pipeline.
-
-        This field is distinct from @timestamp in that @timestamp typically contain the time extracted from the original event.
-
-        In most situations, these two timestamps will be slightly different. The difference can be used to calculate the delay between your source generating an event, and the time when your agent first processed it. This can be used to monitor your agent''s or pipeline''s ability to keep up with your event source.
-
-        In case the two timestamps are identical, @timestamp should be used.'
-      example: '2016-05-23T08:05:34.857Z'
+      description: "event.created contains the date/time when the event was first read by an agent, or by your pipeline.\nThis field is distinct from @timestamp in that @timestamp typically contain the time extracted from the original event.\nIn most situations, these two timestamps will be slightly different. The difference can be used to calculate the delay between your source generating an event, and the time when your agent first processed it. This can be used to monitor your agent's or pipeline's ability to keep up with your event source.\nIn case the two timestamps are identical, @timestamp should be used."
+      example: "2016-05-23T08:05:34.857Z"
     - name: ingested
       level: core
       type: date
-      description: 'Timestamp when an event arrived in the central data store.
-
-        This is different from `@timestamp`, which is when the event originally occurred.  It''s also different from `event.created`, which is meant to capture the first time an agent saw the event.
-
-        In normal conditions, assuming no tampering, the timestamps should chronologically look like this: `@timestamp` < `event.created` < `event.ingested`.'
-      example: '2016-05-23T08:05:35.101Z'
+      description: "Timestamp when an event arrived in the central data store.\nThis is different from `@timestamp`, which is when the event originally occurred.  It's also different from `event.created`, which is meant to capture the first time an agent saw the event.\nIn normal conditions, assuming no tampering, the timestamps should chronologically look like this: `@timestamp` < `event.created` < `event.ingested`."
+      example: "2016-05-23T08:05:35.101Z"
     - name: original
       level: core
       type: keyword
       ignore_above: 1024
-      description: 'Raw text message of entire event. Used to demonstrate log integrity.
-
-        This field is not indexed and doc_values are disabled. It cannot be searched, but it can be retrieved from `_source`.'
+      description: "Raw text message of entire event. Used to demonstrate log integrity.\nThis field is not indexed and doc_values are disabled. It cannot be searched, but it can be retrieved from `_source`."
       example: Sep 19 08:26:10 host CEF:0&#124;Security&#124; threatmanager&#124;1.0&#124;100&#124; worm successfully stopped&#124;10&#124;src=10.0.0.1 dst=2.1.2.2spt=1232
 - name: dns
   title: DNS
   group: 2
-  description: 'Fields describing DNS queries and answers.
-
-    DNS events should either represent a single DNS query prior to getting answers (`dns.type:query`) or they should represent a full exchange and contain the query details as well as all of the answers that were provided for this query (`dns.type:answer`).'
+  description: "Fields describing DNS queries and answers.\nDNS events should either represent a single DNS query prior to getting answers (`dns.type:query`) or they should represent a full exchange and contain the query details as well as all of the answers that were provided for this query (`dns.type:answer`)."
   type: group
   fields:
     - name: answers
       level: extended
       type: object
       object_type: keyword
-      description: 'An array containing an object for each answer section returned by the server.
-
-        The main keys that should be present in these objects are defined by ECS. Records that have more information may contain more keys than what ECS defines.
-
-        Not all DNS data sources give all details about DNS answers. At minimum, answer objects must contain the `data` key. If more information is available, map as much of it to ECS as possible, and add any additional fields to the answer objects as custom fields.'
+      description: "An array containing an object for each answer section returned by the server.\nThe main keys that should be present in these objects are defined by ECS. Records that have more information may contain more keys than what ECS defines.\nNot all DNS data sources give all details about DNS answers. At minimum, answer objects must contain the `data` key. If more information is available, map as much of it to ECS as possible, and add any additional fields to the answer objects as custom fields."
     - name: answers.class
       level: extended
       type: keyword
@@ -59,19 +39,15 @@
       example: IN
     - name: answers.data
       level: extended
-      type: keyword
+      type: wildcard
       ignore_above: 1024
-      description: 'The data describing the resource.
-
-        The meaning of this data depends on the type and class of the resource record.'
+      description: "The data describing the resource.\nThe meaning of this data depends on the type and class of the resource record."
       example: 10.10.10.10
     - name: answers.name
       level: extended
       type: keyword
       ignore_above: 1024
-      description: 'The domain name to which this resource record pertains.
-
-        If a chain of CNAME is being resolved, each answer''s `name` should be the one that corresponds with the answer''s `data`. It should not simply be the original `question.name` repeated.'
+      description: "The domain name to which this resource record pertains.\nIf a chain of CNAME is being resolved, each answer's `name` should be the one that corresponds with the answer's `data`. It should not simply be the original `question.name` repeated."
       example: www.google.com
     - name: answers.ttl
       level: extended
@@ -88,9 +64,7 @@
       level: extended
       type: keyword
       ignore_above: 1024
-      description: 'Array of 2 letter DNS header flags.
-
-        Expected values are: AA, TC, RD, RA, AD, CD, DO.'
+      description: "Array of 2 letter DNS header flags.\nExpected values are: AA, TC, RD, RA, AD, CD, DO."
       example:
         - RD
         - RA
@@ -114,7 +88,7 @@
       example: IN
     - name: question.name
       level: extended
-      type: keyword
+      type: wildcard
       ignore_above: 1024
       description: 'The name being queried.
 
@@ -155,9 +129,7 @@
     - name: resolved_ip
       level: extended
       type: ip
-      description: 'Array containing all IPs seen in `answers.data`.
-
-        The `answers` array can be difficult to use, because of the variety of data formats it can contain. Extracting all IP addresses seen in there to `dns.resolved_ip` makes it possible to index them as IP addresses, and makes them easier to visualize and query for.'
+      description: "Array containing all IPs seen in `answers.data`.\nThe `answers` array can be difficult to use, because of the variety of data formats it can contain. Extracting all IP addresses seen in there to `dns.resolved_ip` makes it possible to index them as IP addresses, and makes them easier to visualize and query for."
       example:
         - 10.10.10.10
         - 10.10.10.11
@@ -171,20 +143,12 @@
       level: extended
       type: keyword
       ignore_above: 1024
-      description: 'The type of DNS event captured, query or answer.
-
-        If your source of DNS events only gives you DNS queries, you should only create dns events of type `dns.type:query`.
-
-        If your source of DNS events gives you answers as well, you should create one event per query (optionally as soon as the query is seen). And a second event containing all query details as well as an array of answers.'
+      description: "The type of DNS event captured, query or answer.\nIf your source of DNS events only gives you DNS queries, you should only create dns events of type `dns.type:query`.\nIf your source of DNS events gives you answers as well, you should create one event per query (optionally as soon as the query is seen). And a second event containing all query details as well as an array of answers."
       example: answer
 - name: related
   title: Related
   group: 2
-  description: 'This field set is meant to facilitate pivoting around a piece of data.
-
-    Some pieces of information can be seen in many places in an ECS event. To facilitate searching for them, store an array of all seen values to their corresponding field in `related.`.
-
-    A concrete example is IP addresses, which can be under host, observer, source, destination, client, server, and network.forwarded_ip. If you append all IPs to `related.ip`, you can then search for a given IP trivially, no matter where it appeared, by querying `related.ip:192.0.2.15`.'
+  description: "This field set is meant to facilitate pivoting around a piece of data.\nSome pieces of information can be seen in many places in an ECS event. To facilitate searching for them, store an array of all seen values to their corresponding field in `related.`.\nA concrete example is IP addresses, which can be under host, observer, source, destination, client, server, and network.forwarded_ip. If you append all IPs to `related.ip`, you can then search for a given IP trivially, no matter where it appeared, by querying `related.ip:192.0.2.15`."
   type: group
   fields:
     - name: ip
@@ -195,7 +159,7 @@
   type: keyword
   description: Filebeat input type used to collect the log.
 - name: log.file.path
-  type: keyword
+  type: wildcard
   description: >
     The file from which the line was read. This field contains the absolute path to the file. For example: `/var/log/system.log`.