From 3cb70538979485d865cd932fd13cb8fa25b0b4e1 Mon Sep 17 00:00:00 2001 From: Sandra Gonzales Date: Tue, 24 Mar 2020 07:36:23 -0400 Subject: [PATCH] [EPM] merge duplicate fields when creating index patterns (#60957) * fix bug where an already installed packaged was installed twice through endpoint * fix bug where dataset like network and network_summary were not differentiated * handle duplicate fields by merging fields of the same type --- .../__snapshots__/install.test.ts.snap | 366 ------------------ .../epm/kibana/index_pattern/install.test.ts | 29 +- .../epm/kibana/index_pattern/install.ts | 30 +- .../kibana/index_pattern/tests/test_data.ts | 98 +++++ .../server/services/epm/packages/assets.ts | 3 +- 5 files changed, 148 insertions(+), 378 deletions(-) create mode 100644 x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/tests/test_data.ts diff --git a/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/__snapshots__/install.test.ts.snap b/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/__snapshots__/install.test.ts.snap index 79f778f9bba8f..d063ebe92f938 100644 --- a/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/__snapshots__/install.test.ts.snap +++ b/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/__snapshots__/install.test.ts.snap @@ -565,372 +565,6 @@ exports[`creating index patterns from yaml fields createIndexPatternFields funct } `; -exports[`creating index patterns from yaml fields dedupFields function remove duplicated fields when parsing multiple files: dedupeFields 1`] = ` -[ - { - "name": "coredns", - "type": "group", - "description": "coredns fields after normalization\\n", - "fields": [ - { - "name": "id", - "type": "keyword", - "description": "id of the DNS transaction\\n" - }, - { - "name": "allParams", - "type": "integer", - "format": "bytes", - "pattern": "patternValQueryWeight", - "input_format": "inputFormatVal,", - "output_format": "outputFormalVal,", - "output_precision": "3,", - "label_template": "labelTemplateVal,", - "url_template": "urlTemplateVal,", - "openLinkInCurrentTab": "true,", - "description": "weight of the DNS query\\n" - }, - { - "name": "query.length", - "type": "integer", - "pattern": "patternValQueryLength", - "description": "length of the DNS query\\n" - }, - { - "name": "query.size", - "type": "integer", - "format": "bytes", - "pattern": "patternValQuerySize", - "description": "size of the DNS query\\n" - }, - { - "name": "query.class", - "type": "keyword", - "description": "DNS query class\\n" - }, - { - "name": "query.name", - "type": "keyword", - "description": "DNS query name\\n" - }, - { - "name": "query.type", - "type": "keyword", - "description": "DNS query type\\n" - }, - { - "name": "response.code", - "type": "keyword", - "description": "DNS response code\\n" - }, - { - "name": "response.flags", - "type": "keyword", - "description": "DNS response flags\\n" - }, - { - "name": "response.size", - "type": "integer", - "format": "bytes", - "description": "size of the DNS response\\n" - }, - { - "name": "dnssec_ok", - "type": "boolean", - "description": "dnssec flag\\n" - } - ] - }, - { - "name": "@timestamp", - "level": "core", - "required": true, - "type": "date", - "description": "Date/time when the event originated. This is the date/time extracted from the event, typically representing when the event was generated by the source. If the event source has no original timestamp, this value is typically populated by the first time the event was received by the pipeline. Required field for all events.", - "example": "2016-05-23T08:05:34.853Z" - }, - { - "name": "labels", - "level": "core", - "type": "object", - "object_type": "keyword", - "description": "Custom key/value pairs. Can be used to add meta information to events. Should not contain nested objects. All values are stored as keyword. Example: \`docker\` and \`k8s\` labels.", - "example": { - "application": "foo-bar", - "env": "production" - } - }, - { - "name": "message", - "level": "core", - "type": "text", - "description": "For log events the message field contains the log message, optimized for viewing in a log viewer. For structured logs without an original message field, other fields can be concatenated to form a human-readable summary of the event. If multiple messages exist, they can be combined into one message.", - "example": "Hello World" - }, - { - "name": "tags", - "level": "core", - "type": "keyword", - "ignore_above": 1024, - "description": "List of keywords used to tag each event.", - "example": "[\\"production\\", \\"env2\\"]" - }, - { - "name": "agent", - "title": "Agent", - "group": 2, - "description": "The agent fields contain the data about the software entity, if any, that collects, detects, or observes events on a host, or takes measurements on a host. Examples include Beats. Agents may also run on observers. ECS agent.* fields shall be populated with details of the agent running on the host or observer where the event happened or the measurement was taken.", - "footnote": "Examples: In the case of Beats for logs, the agent.name is filebeat. For APM, it is the agent running in the app/service. The agent information does not change if data is sent through queuing systems like Kafka, Redis, or processing systems such as Logstash or APM Server.", - "type": "group", - "fields": [ - { - "name": "ephemeral_id", - "level": "extended", - "type": "keyword", - "ignore_above": 1024, - "description": "Ephemeral identifier of this agent (if one exists). This id normally changes across restarts, but \`agent.id\` does not.", - "example": "8a4f500f" - }, - { - "name": "id", - "level": "core", - "type": "keyword", - "ignore_above": 1024, - "description": "Unique identifier of this agent (if one exists). Example: For Beats this would be beat.id.", - "example": "8a4f500d" - }, - { - "name": "name", - "level": "core", - "type": "keyword", - "ignore_above": 1024, - "description": "Custom name of the agent. This is a name that can be given to an agent. This can be helpful if for example two Filebeat instances are running on the same host but a human readable separation is needed on which Filebeat instance data is coming from. If no name is given, the name is often left empty.", - "example": "foo" - }, - { - "name": "type", - "level": "core", - "type": "keyword", - "ignore_above": 1024, - "description": "Type of the agent. The agent type stays always the same and should be given by the agent used. In case of Filebeat the agent would always be Filebeat also if two Filebeat instances are run on the same machine.", - "example": "filebeat" - }, - { - "name": "version", - "level": "core", - "type": "keyword", - "ignore_above": 1024, - "description": "Version of the agent.", - "example": "6.0.0-rc2" - } - ] - }, - { - "name": "as", - "title": "Autonomous System", - "group": 2, - "description": "An autonomous system (AS) is a collection of connected Internet Protocol (IP) routing prefixes under the control of one or more network operators on behalf of a single administrative entity or domain that presents a common, clearly defined routing policy to the internet.", - "type": "group", - "fields": [ - { - "name": "number", - "level": "extended", - "type": "long", - "description": "Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet.", - "example": 15169 - }, - { - "name": "organization.name", - "level": "extended", - "type": "keyword", - "ignore_above": 1024, - "description": "Organization name.", - "example": "Google LLC" - } - ] - }, - { - "name": "nginx.access", - "type": "group", - "description": "Contains fields for the Nginx access logs.\\n", - "fields": [ - { - "name": "group_disabled", - "type": "group", - "enabled": false, - "fields": [ - { - "name": "message", - "type": "text" - } - ] - }, - { - "name": "remote_ip_list", - "type": "array", - "description": "An array of remote IP addresses. It is a list because it is common to include, besides the client IP address, IP addresses from headers like \`X-Forwarded-For\`. Real source IP is restored to \`source.ip\`.\\n" - }, - { - "name": "body_sent.bytes", - "type": "alias", - "path": "http.response.body.bytes", - "migration": true - }, - { - "name": "user_name", - "type": "alias", - "path": "user.name", - "migration": true - }, - { - "name": "method", - "type": "alias", - "path": "http.request.method", - "migration": true - }, - { - "name": "url", - "type": "alias", - "path": "url.original", - "migration": true - }, - { - "name": "http_version", - "type": "alias", - "path": "http.version", - "migration": true - }, - { - "name": "response_code", - "type": "alias", - "path": "http.response.status_code", - "migration": true - }, - { - "name": "referrer", - "type": "alias", - "path": "http.request.referrer", - "migration": true - }, - { - "name": "agent", - "type": "alias", - "path": "user_agent.original", - "migration": true - }, - { - "name": "user_agent", - "type": "group", - "fields": [ - { - "name": "device", - "type": "alias", - "path": "user_agent.device.name", - "migration": true - }, - { - "name": "name", - "type": "alias", - "path": "user_agent.name", - "migration": true - }, - { - "name": "os", - "type": "alias", - "path": "user_agent.os.full_name", - "migration": true - }, - { - "name": "os_name", - "type": "alias", - "path": "user_agent.os.name", - "migration": true - }, - { - "name": "original", - "type": "alias", - "path": "user_agent.original", - "migration": true - } - ] - }, - { - "name": "geoip", - "type": "group", - "fields": [ - { - "name": "continent_name", - "type": "alias", - "path": "source.geo.continent_name", - "migration": true - }, - { - "name": "country_iso_code", - "type": "alias", - "path": "source.geo.country_iso_code", - "migration": true - }, - { - "name": "location", - "type": "alias", - "path": "source.geo.location", - "migration": true - }, - { - "name": "region_name", - "type": "alias", - "path": "source.geo.region_name", - "migration": true - }, - { - "name": "city_name", - "type": "alias", - "path": "source.geo.city_name", - "migration": true - }, - { - "name": "region_iso_code", - "type": "alias", - "path": "source.geo.region_iso_code", - "migration": true - } - ] - } - ] - }, - { - "name": "source", - "type": "group", - "fields": [ - { - "name": "geo", - "type": "group", - "fields": [ - { - "name": "continent_name", - "type": "text" - } - ] - } - ] - }, - { - "name": "country", - "type": "", - "multi_fields": [ - { - "name": "keyword", - "type": "keyword" - }, - { - "name": "text", - "type": "text" - } - ] - } -] -`; - exports[`creating index patterns from yaml fields flattenFields function flattens recursively and handles copying alias fields: flattenFields 1`] = ` [ { diff --git a/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/install.test.ts b/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/install.test.ts index 5e883772957d2..bc1694348b4c2 100644 --- a/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/install.test.ts +++ b/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/install.test.ts @@ -19,6 +19,7 @@ import { createIndexPattern, } from './install'; import { Fields, Field } from '../../fields/field'; +import { dupeFields } from './tests/test_data'; // Add our own serialiser to just do JSON.stringify expect.addSnapshotSerializer({ @@ -60,9 +61,31 @@ describe('creating index patterns from yaml fields', () => { expect(flattened).toMatchSnapshot('flattenFields'); }); - test('dedupFields function remove duplicated fields when parsing multiple files', () => { - const deduped = dedupeFields(fields); - expect(deduped).toMatchSnapshot('dedupeFields'); + describe('dedupFields', () => { + const deduped = dedupeFields(dupeFields); + const checkIfDup = (field: Field) => { + return deduped.filter(item => item.name === field.name); + }; + test('there there is one field object with name of "1"', () => { + expect(checkIfDup({ name: '1' }).length).toBe(1); + }); + test('there there is one field object with name of "1.1"', () => { + expect(checkIfDup({ name: '1.1' }).length).toBe(1); + }); + test('there there is one field object with name of "2"', () => { + expect(checkIfDup({ name: '2' }).length).toBe(1); + }); + test('there there is one field object with name of "4"', () => { + expect(checkIfDup({ name: '4' }).length).toBe(1); + }); + // existing field takes precendence + test('the new merged field has correct attributes', () => { + const mergedField = deduped.find(field => field.name === '1'); + expect(mergedField?.searchable).toBe(true); + expect(mergedField?.aggregatable).toBe(true); + expect(mergedField?.analyzed).toBe(true); + expect(mergedField?.count).toBe(0); + }); }); describe('getFieldByPath searches recursively for field in fields given dot separated path', () => { diff --git a/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/install.ts b/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/install.ts index 7aecc408e05fe..0657fb7759b49 100644 --- a/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/install.ts +++ b/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/install.ts @@ -78,8 +78,12 @@ export async function installIndexPatterns( savedObjectsClient, InstallationStatus.installed ); - // add this package - if (pkgkey) installedPackages.push(pkgkey); + // add this package to the array if it doesn't already exist + // this should not happen because a user can't "reinstall" a package + // if it does because the install endpoint is called directly, the install continues + if (pkgkey && !installedPackages.includes(pkgkey)) { + installedPackages.push(pkgkey); + } // get each package's registry info const installedPackagesFetchInfoPromise = installedPackages.map(pkg => Registry.fetchInfo(pkg)); @@ -151,17 +155,29 @@ export const createIndexPattern = (indexPatternType: string, fields: Fields) => export const createIndexPatternFields = ( fields: Fields ): { indexPatternFields: IndexPatternField[]; fieldFormatMap: FieldFormatMap } => { - const dedupedFields = dedupeFields(fields); - const flattenedFields = flattenFields(dedupedFields); + const flattenedFields = flattenFields(fields); const fieldFormatMap = createFieldFormatMap(flattenedFields); const transformedFields = flattenedFields.map(transformField); - return { indexPatternFields: transformedFields, fieldFormatMap }; + const dedupedFields = dedupeFields(transformedFields); + return { indexPatternFields: dedupedFields, fieldFormatMap }; }; -export const dedupeFields = (fields: Fields) => { - const uniqueObj = fields.reduce<{ [name: string]: Field }>((acc, field) => { +// merges fields that are duplicates with the existing taking precedence +export const dedupeFields = (fields: IndexPatternField[]) => { + const uniqueObj = fields.reduce<{ [name: string]: IndexPatternField }>((acc, field) => { + // if field doesn't exist yet if (!acc[field.name]) { acc[field.name] = field; + // if field exists already + } else { + const existingField = acc[field.name]; + // if the existing field and this field have the same type, merge + if (existingField.type === field.type) { + const mergedField = { ...field, ...existingField }; + acc[field.name] = mergedField; + } else { + // log when there is a dup with different types + } } return acc; }, {}); diff --git a/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/tests/test_data.ts b/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/tests/test_data.ts new file mode 100644 index 0000000000000..13bef1b6ddb2c --- /dev/null +++ b/x-pack/plugins/ingest_manager/server/services/epm/kibana/index_pattern/tests/test_data.ts @@ -0,0 +1,98 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { IndexPatternField } from '../install'; + +export const dupeFields: IndexPatternField[] = [ + { + name: '1', + type: 'integer', + searchable: true, + aggregatable: true, + count: 0, + indexed: true, + doc_values: true, + scripted: false, + analyzed: true, + }, + { + name: '2', + type: 'integer', + searchable: true, + aggregatable: true, + count: 0, + indexed: true, + doc_values: true, + scripted: false, + analyzed: true, + }, + { + name: '3', + type: 'integer', + searchable: true, + aggregatable: true, + count: 0, + indexed: true, + doc_values: true, + scripted: false, + analyzed: true, + }, + { + name: '1', + type: 'integer', + searchable: false, + aggregatable: false, + count: 2, + indexed: true, + doc_values: true, + scripted: false, + analyzed: true, + }, + { + name: '1.1', + type: 'integer', + searchable: false, + aggregatable: false, + count: 0, + indexed: true, + doc_values: true, + scripted: false, + analyzed: true, + }, + { + name: '4', + type: 'integer', + searchable: false, + aggregatable: false, + count: 0, + indexed: true, + doc_values: true, + scripted: false, + analyzed: true, + }, + { + name: '2', + type: 'integer', + searchable: false, + aggregatable: false, + count: 0, + indexed: true, + doc_values: true, + scripted: false, + analyzed: true, + }, + { + name: '1', + type: 'integer', + searchable: false, + aggregatable: false, + count: 1, + indexed: true, + doc_values: true, + scripted: false, + analyzed: false, + }, +]; diff --git a/x-pack/plugins/ingest_manager/server/services/epm/packages/assets.ts b/x-pack/plugins/ingest_manager/server/services/epm/packages/assets.ts index ecc882d9c2e70..e36c2de1b4e80 100644 --- a/x-pack/plugins/ingest_manager/server/services/epm/packages/assets.ts +++ b/x-pack/plugins/ingest_manager/server/services/epm/packages/assets.ts @@ -35,8 +35,7 @@ export function getAssets( // if dataset, filter for them if (datasetName) { - // TODO: Filter for dataset path - const comparePath = `${EPR_PATH_PREFIX}/${packageInfo.name}-${packageInfo.version}/dataset/${datasetName}`; + const comparePath = `${EPR_PATH_PREFIX}/${packageInfo.name}-${packageInfo.version}/dataset/${datasetName}/`; if (!path.includes(comparePath)) { continue; }