diff --git a/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/keyword_content.tsx b/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/keyword_content.tsx index 22fe8244ef760..1baea4b3f2f7c 100644 --- a/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/keyword_content.tsx +++ b/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/keyword_content.tsx @@ -14,13 +14,6 @@ import { DocumentStatsTable } from './document_stats'; import { ExpandedRowContent } from './expanded_row_content'; import { ChoroplethMap } from './choropleth_map'; -const COMMON_EMS_LAYER_IDS = [ - 'world_countries', - 'administrative_regions_lvl2', - 'usa_zip_codes', - 'usa_states', -]; - export const KeywordContent: FC = ({ config }) => { const [EMSSuggestion, setEMSSuggestion] = useState(); const { stats, fieldName } = config; @@ -32,7 +25,6 @@ export const KeywordContent: FC = ({ config }) => { const loadEMSTermSuggestions = useCallback(async () => { if (!mapsPlugin) return; const suggestion: EMSTermJoinConfig | null = await mapsPlugin.suggestEMSTermJoinConfig({ - emsLayerIds: COMMON_EMS_LAYER_IDS, sampleValues: Array.isArray(stats?.topValues) ? stats?.topValues.map((value) => value.key) : [], diff --git a/x-pack/plugins/maps/public/ems_autosuggest/ems_autosuggest.test.ts b/x-pack/plugins/maps/public/ems_autosuggest/ems_autosuggest.test.ts index eff49c1b1242e..cc0ed19db0b40 100644 --- a/x-pack/plugins/maps/public/ems_autosuggest/ems_autosuggest.test.ts +++ b/x-pack/plugins/maps/public/ems_autosuggest/ems_autosuggest.test.ts @@ -6,40 +6,22 @@ */ import { suggestEMSTermJoinConfig } from './ems_autosuggest'; -import { FeatureCollection } from 'geojson'; class MockFileLayer { - private readonly _url: string; private readonly _id: string; private readonly _fields: Array<{ id: string }>; - constructor(url: string, fields: Array<{ id: string }>) { - this._url = url; - this._id = url; + constructor(id: string, fields: Array<{ id: string; alias?: string[]; values?: string[] }>) { + this._id = id; this._fields = fields; } - getFields() { - return this._fields; + getId() { + return this._id; } - getGeoJson() { - if (this._url === 'world_countries') { - return ({ - type: 'FeatureCollection', - features: [ - { properties: { iso2: 'CA', iso3: 'CAN' } }, - { properties: { iso2: 'US', iso3: 'USA' } }, - ], - } as unknown) as FeatureCollection; - } else if (this._url === 'zips') { - return ({ - type: 'FeatureCollection', - features: [{ properties: { zip: '40204' } }, { properties: { zip: '40205' } }], - } as unknown) as FeatureCollection; - } else { - throw new Error(`unrecognized mock url ${this._url}`); - } + getFields() { + return this._fields; } hasId(id: string) { @@ -51,31 +33,31 @@ jest.mock('../util', () => { return { async getEmsFileLayers() { return [ - new MockFileLayer('world_countries', [{ id: 'iso2' }, { id: 'iso3' }]), - new MockFileLayer('zips', [{ id: 'zip' }]), + new MockFileLayer('world_countries', [ + { + id: 'iso2', + alias: ['(geo\\.){0,}country_iso_code$', '(country|countries)'], + values: ['CA', 'US'], + }, + { id: 'iso3', values: ['CAN', 'USA'] }, + { id: 'name', alias: ['(country|countries)'] }, + ]), + new MockFileLayer('usa_zip_codes', [ + { id: 'zip', alias: ['zip'], values: ['40204', '40205'] }, + ]), ]; }, }; }); describe('suggestEMSTermJoinConfig', () => { - test('no info provided', async () => { + test('Should not validate when no info provided', async () => { const termJoinConfig = await suggestEMSTermJoinConfig({}); expect(termJoinConfig).toBe(null); }); - describe('validate common column names', () => { - test('ecs region', async () => { - const termJoinConfig = await suggestEMSTermJoinConfig({ - sampleValuesColumnName: 'destination.geo.region_iso_code', - }); - expect(termJoinConfig).toEqual({ - layerId: 'administrative_regions_lvl2', - field: 'region_iso_code', - }); - }); - - test('ecs country', async () => { + describe('With common column names', () => { + test('should match first match', async () => { const termJoinConfig = await suggestEMSTermJoinConfig({ sampleValuesColumnName: 'country_iso_code', }); @@ -85,78 +67,61 @@ describe('suggestEMSTermJoinConfig', () => { }); }); - test('country', async () => { + test('When sampleValues are provided, should reject match if no sampleValues for a layer, even though the name matches', async () => { const termJoinConfig = await suggestEMSTermJoinConfig({ - sampleValuesColumnName: 'Country_name', - }); - expect(termJoinConfig).toEqual({ - layerId: 'world_countries', - field: 'name', + sampleValuesColumnName: 'country_iso_code', + sampleValues: ['FO', 'US', 'CA'], }); + expect(termJoinConfig).toEqual(null); }); - test('unknown name', async () => { + test('should reject match if sampleValues not in id-list', async () => { const termJoinConfig = await suggestEMSTermJoinConfig({ - sampleValuesColumnName: 'cntry', + sampleValuesColumnName: 'zip', + sampleValues: ['90201', '40205'], }); expect(termJoinConfig).toEqual(null); }); - }); - describe('validate well known formats', () => { - test('5-digit zip code', async () => { + test('should return first match (regex matches both iso2 and name)', async () => { const termJoinConfig = await suggestEMSTermJoinConfig({ - sampleValues: ['90201', 40204], + sampleValuesColumnName: 'Country_name', }); expect(termJoinConfig).toEqual({ - layerId: 'usa_zip_codes', - field: 'zip', + layerId: 'world_countries', + field: 'iso2', }); }); - test('mismatch', async () => { + test('unknown name', async () => { const termJoinConfig = await suggestEMSTermJoinConfig({ - sampleValues: ['90201', 'foobar'], + sampleValuesColumnName: 'cntry', }); expect(termJoinConfig).toEqual(null); }); }); - describe('validate based on EMS data', () => { - test('Should validate with zip codes layer', async () => { + describe('validate well known formats (using id-values in manifest)', () => { + test('Should validate known zipcodes', async () => { const termJoinConfig = await suggestEMSTermJoinConfig({ - sampleValues: ['40204', 40205], - emsLayerIds: ['world_countries', 'zips'], + sampleValues: ['40205', 40204], }); expect(termJoinConfig).toEqual({ - layerId: 'zips', + layerId: 'usa_zip_codes', field: 'zip', }); }); - test('Should not validate with faulty zip codes', async () => { + test('Should not validate unknown zipcode (in this case, 90201)', async () => { const termJoinConfig = await suggestEMSTermJoinConfig({ - sampleValues: ['40204', '00000'], - emsLayerIds: ['world_countries', 'zips'], + sampleValues: ['90201', 40204], }); expect(termJoinConfig).toEqual(null); }); - test('Should validate against countries', async () => { + test('Should not validate mismatches', async () => { const termJoinConfig = await suggestEMSTermJoinConfig({ - sampleValues: ['USA', 'USA', 'CAN'], - emsLayerIds: ['world_countries', 'zips'], - }); - expect(termJoinConfig).toEqual({ - layerId: 'world_countries', - field: 'iso3', - }); - }); - - test('Should not validate against missing countries', async () => { - const termJoinConfig = await suggestEMSTermJoinConfig({ - sampleValues: ['USA', 'BEL', 'CAN'], - emsLayerIds: ['world_countries', 'zips'], + sampleValues: ['90201', 'foobar'], }); expect(termJoinConfig).toEqual(null); }); diff --git a/x-pack/plugins/maps/public/ems_autosuggest/ems_autosuggest.ts b/x-pack/plugins/maps/public/ems_autosuggest/ems_autosuggest.ts index 952e48a71a9dc..66fcbd805f53e 100644 --- a/x-pack/plugins/maps/public/ems_autosuggest/ems_autosuggest.ts +++ b/x-pack/plugins/maps/public/ems_autosuggest/ems_autosuggest.ts @@ -7,10 +7,8 @@ import type { FileLayer } from '@elastic/ems-client'; import { getEmsFileLayers } from '../util'; -import { emsWorldLayerId, emsRegionLayerId, emsUsaZipLayerId } from '../../common'; export interface SampleValuesConfig { - emsLayerIds?: string[]; sampleValues?: Array; sampleValuesColumnName?: string; } @@ -20,44 +18,16 @@ export interface EMSTermJoinConfig { field: string; } -const wellKnownColumnNames = [ - { - regex: /(geo\.){0,}country_iso_code$/i, // ECS postfix for country - emsConfig: { - layerId: emsWorldLayerId, - field: 'iso2', - }, - }, - { - regex: /(geo\.){0,}region_iso_code$/i, // ECS postfixn for region - emsConfig: { - layerId: emsRegionLayerId, - field: 'region_iso_code', - }, - }, - { - regex: /^country/i, // anything starting with country - emsConfig: { - layerId: emsWorldLayerId, - field: 'name', - }, - }, -]; - -const wellKnownColumnFormats = [ - { - regex: /(^\d{5}$)/i, // 5-digit zipcode - emsConfig: { - layerId: emsUsaZipLayerId, - field: 'zip', - }, - }, -]; - interface UniqueMatch { - config: { layerId: string; field: string }; + config: EMSTermJoinConfig; count: number; } +interface FileLayerFieldShim { + id: string; + values?: string[]; + regex?: string; + alias?: string[]; +} export async function suggestEMSTermJoinConfig( sampleValuesConfig: SampleValuesConfig @@ -65,20 +35,17 @@ export async function suggestEMSTermJoinConfig( const matches: EMSTermJoinConfig[] = []; if (sampleValuesConfig.sampleValuesColumnName) { - matches.push(...suggestByName(sampleValuesConfig.sampleValuesColumnName)); + const matchesBasedOnColumnName = await suggestByName( + sampleValuesConfig.sampleValuesColumnName, + sampleValuesConfig.sampleValues + ); + matches.push(...matchesBasedOnColumnName); } if (sampleValuesConfig.sampleValues && sampleValuesConfig.sampleValues.length) { - if (sampleValuesConfig.emsLayerIds && sampleValuesConfig.emsLayerIds.length) { - matches.push( - ...(await suggestByEMSLayerIds( - sampleValuesConfig.emsLayerIds, - sampleValuesConfig.sampleValues - )) - ); - } else { - matches.push(...suggestByValues(sampleValuesConfig.sampleValues)); - } + // Only looks at id-values in main manifest + const matchesBasedOnIds = await suggestByIdValues(sampleValuesConfig.sampleValues); + matches.push(...matchesBasedOnIds); } const uniqMatches: UniqueMatch[] = matches.reduce((accum: UniqueMatch[], match) => { @@ -105,92 +72,80 @@ export async function suggestEMSTermJoinConfig( return uniqMatches.length ? uniqMatches[0].config : null; } -function suggestByName(columnName: string): EMSTermJoinConfig[] { - const matches = wellKnownColumnNames.filter((wellknown) => { - return columnName.match(wellknown.regex); - }); - - return matches.map((m) => { - return m.emsConfig; - }); -} +async function suggestByName( + columnName: string, + sampleValues?: Array +): Promise { + const fileLayers = await getEmsFileLayers(); -function suggestByValues(values: Array): EMSTermJoinConfig[] { - const matches = wellKnownColumnFormats.filter((wellknown) => { - for (let i = 0; i < values.length; i++) { - const value = values[i].toString(); - if (!value.match(wellknown.regex)) { - return false; + const matches: EMSTermJoinConfig[] = []; + fileLayers.forEach((fileLayer) => { + const emsFields: FileLayerFieldShim[] = fileLayer.getFields(); + emsFields.forEach((emsField: FileLayerFieldShim) => { + if (!emsField.alias || !emsField.alias.length) { + return; } - } - return true; - }); - return matches.map((m) => { - return m.emsConfig; + const emsConfig = { + layerId: fileLayer.getId(), + field: emsField.id, + }; + emsField.alias.forEach((alias: string) => { + const regex = new RegExp(alias, 'i'); + const nameMatchesAlias = !!columnName.match(regex); + // Check if this violates any known id-values. + + let isMatch: boolean; + if (sampleValues) { + if (emsField.values && emsField.values.length) { + isMatch = nameMatchesAlias && allSamplesMatch(sampleValues, emsField.values); + } else { + // requires validation against sample-values but EMS provides no meta to do so. + isMatch = false; + } + } else { + isMatch = nameMatchesAlias; + } + + if (isMatch) { + matches.push(emsConfig); + } + }); + }); }); -} -function existsInEMS(emsJson: any, emsFieldId: string, sampleValue: string): boolean { - for (let i = 0; i < emsJson.features.length; i++) { - const emsFieldValue = emsJson.features[i].properties[emsFieldId].toString(); - if (emsFieldValue.toString() === sampleValue) { - return true; - } - } - return false; + return matches; } -function matchesEmsField(emsJson: any, emsFieldId: string, sampleValues: Array) { +function allSamplesMatch(sampleValues: Array, ids: string[]) { for (let j = 0; j < sampleValues.length; j++) { const sampleValue = sampleValues[j].toString(); - if (!existsInEMS(emsJson, emsFieldId, sampleValue)) { + if (!ids.includes(sampleValue)) { return false; } } return true; } -async function getMatchesForEMSLayer( - emsLayerId: string, +async function suggestByIdValues( sampleValues: Array ): Promise { + const matches: EMSTermJoinConfig[] = []; const fileLayers: FileLayer[] = await getEmsFileLayers(); - const emsFileLayer: FileLayer | undefined = fileLayers.find((fl: FileLayer) => - fl.hasId(emsLayerId) - ); - - if (!emsFileLayer) { - return []; - } - - const emsFields = emsFileLayer.getFields(); - - try { - const emsJson = await emsFileLayer.getGeoJson(); - const matches: EMSTermJoinConfig[] = []; - for (let f = 0; f < emsFields.length; f++) { - if (matchesEmsField(emsJson, emsFields[f].id, sampleValues)) { - matches.push({ - layerId: emsLayerId, - field: emsFields[f].id, - }); + fileLayers.forEach((fileLayer) => { + const emsFields: FileLayerFieldShim[] = fileLayer.getFields(); + emsFields.forEach((emsField: FileLayerFieldShim) => { + if (!emsField.values || !emsField.values.length) { + return; } - } - return matches; - } catch (e) { - return []; - } -} - -async function suggestByEMSLayerIds( - emsLayerIds: string[], - values: Array -): Promise { - const matches = []; - for (const emsLayerId of emsLayerIds) { - const layerIdMathes = await getMatchesForEMSLayer(emsLayerId, values); - matches.push(...layerIdMathes); - } + const emsConfig = { + layerId: fileLayer.getId(), + field: emsField.id, + }; + if (allSamplesMatch(sampleValues, emsField.values)) { + matches.push(emsConfig); + } + }); + }); return matches; } diff --git a/x-pack/plugins/ml/common/constants/embeddable_map.ts b/x-pack/plugins/ml/common/constants/embeddable_map.ts deleted file mode 100644 index 6cb345bae630e..0000000000000 --- a/x-pack/plugins/ml/common/constants/embeddable_map.ts +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -export const COMMON_EMS_LAYER_IDS = [ - 'world_countries', - 'administrative_regions_lvl2', - 'usa_zip_codes', - 'usa_states', -]; diff --git a/x-pack/plugins/ml/public/application/explorer/anomalies_map.tsx b/x-pack/plugins/ml/public/application/explorer/anomalies_map.tsx index 73a6a9d64b60e..fe43bd659131f 100644 --- a/x-pack/plugins/ml/public/application/explorer/anomalies_map.tsx +++ b/x-pack/plugins/ml/public/application/explorer/anomalies_map.tsx @@ -28,7 +28,6 @@ import { isDefined } from '../../../common/types/guards'; import { MlEmbeddedMapComponent } from '../components/ml_embedded_map'; import { EMSTermJoinConfig } from '../../../../maps/public'; import { AnomaliesTableRecord } from '../../../common/types/anomalies'; -import { COMMON_EMS_LAYER_IDS } from '../../../common/constants/embeddable_map'; const MAX_ENTITY_VALUES = 3; @@ -177,7 +176,6 @@ export const AnomaliesMap: FC = ({ anomalies, jobIds }) => { } const suggestion: EMSTermJoinConfig | null = await mapsPlugin.suggestEMSTermJoinConfig({ - emsLayerIds: COMMON_EMS_LAYER_IDS, sampleValues: Array.from(entityValues), sampleValuesColumnName: entityName || '', });