Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Transforms: Support for terms agg in pivot configurations. #123634

Merged
merged 7 commits into from
Jan 27, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions x-pack/plugins/transform/common/types/pivot_aggs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export const PIVOT_SUPPORTED_AGGS = {
VALUE_COUNT: 'value_count',
FILTER: 'filter',
TOP_METRICS: 'top_metrics',
TERMS: 'terms',
} as const;

export type PivotSupportedAggs = typeof PIVOT_SUPPORTED_AGGS[keyof typeof PIVOT_SUPPORTED_AGGS];
Expand Down
2 changes: 2 additions & 0 deletions x-pack/plugins/transform/public/app/common/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ export {
getEsAggFromAggConfig,
isPivotAggsConfigWithUiSupport,
isPivotAggsConfigPercentiles,
isPivotAggsConfigTerms,
PERCENTILES_AGG_DEFAULT_PERCENTS,
TERMS_AGG_DEFAULT_SIZE,
pivotAggsFieldSupport,
} from './pivot_aggs';
export type {
Expand Down
13 changes: 13 additions & 0 deletions x-pack/plugins/transform/public/app/common/pivot_aggs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export function isPivotSupportedAggs(arg: unknown): arg is PivotSupportedAggs {
}

export const PERCENTILES_AGG_DEFAULT_PERCENTS = [1, 5, 25, 50, 75, 95, 99];
export const TERMS_AGG_DEFAULT_SIZE = 10;

export const pivotAggsFieldSupport = {
[KBN_FIELD_TYPES.ATTACHMENT]: [PIVOT_SUPPORTED_AGGS.VALUE_COUNT, PIVOT_SUPPORTED_AGGS.FILTER],
Expand All @@ -45,6 +46,7 @@ export const pivotAggsFieldSupport = {
PIVOT_SUPPORTED_AGGS.VALUE_COUNT,
PIVOT_SUPPORTED_AGGS.FILTER,
PIVOT_SUPPORTED_AGGS.TOP_METRICS,
PIVOT_SUPPORTED_AGGS.TERMS,
],
[KBN_FIELD_TYPES.MURMUR3]: [PIVOT_SUPPORTED_AGGS.VALUE_COUNT, PIVOT_SUPPORTED_AGGS.FILTER],
[KBN_FIELD_TYPES.NUMBER]: [
Expand All @@ -63,6 +65,7 @@ export const pivotAggsFieldSupport = {
PIVOT_SUPPORTED_AGGS.VALUE_COUNT,
PIVOT_SUPPORTED_AGGS.FILTER,
PIVOT_SUPPORTED_AGGS.TOP_METRICS,
PIVOT_SUPPORTED_AGGS.TERMS,
],
[KBN_FIELD_TYPES._SOURCE]: [PIVOT_SUPPORTED_AGGS.VALUE_COUNT, PIVOT_SUPPORTED_AGGS.FILTER],
[KBN_FIELD_TYPES.UNKNOWN]: [PIVOT_SUPPORTED_AGGS.VALUE_COUNT, PIVOT_SUPPORTED_AGGS.FILTER],
Expand Down Expand Up @@ -226,9 +229,15 @@ interface PivotAggsConfigPercentiles extends PivotAggsConfigWithUiBase {
percents: number[];
}

interface PivotAggsConfigTerms extends PivotAggsConfigWithUiBase {
agg: typeof PIVOT_SUPPORTED_AGGS.TERMS;
size: number;
}

export type PivotAggsConfigWithUiSupport =
| PivotAggsConfigWithUiBase
| PivotAggsConfigPercentiles
| PivotAggsConfigTerms
| PivotAggsConfigWithExtendedForm;

export function isPivotAggsConfigWithUiSupport(arg: unknown): arg is PivotAggsConfigWithUiSupport {
Expand Down Expand Up @@ -258,6 +267,10 @@ export function isPivotAggsConfigPercentiles(arg: unknown): arg is PivotAggsConf
);
}

export function isPivotAggsConfigTerms(arg: unknown): arg is PivotAggsConfigTerms {
return isPopulatedObject(arg, ['agg', 'field', 'size']) && arg.agg === PIVOT_SUPPORTED_AGGS.TERMS;
}

export type PivotAggsConfig = PivotAggsConfigBase | PivotAggsConfigWithUiSupport;

export type PivotAggsConfigWithUiSupportDict = Dictionary<PivotAggsConfigWithUiSupport>;
Expand Down
12 changes: 8 additions & 4 deletions x-pack/plugins/transform/public/app/hooks/use_pivot_data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { EuiDataGridColumn } from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { getFlattenedObject } from '@kbn/std';

import { sample, difference } from 'lodash';
import { difference } from 'lodash';
import { ES_FIELD_TYPES } from '../../../../../../src/plugins/data/common';

import type { PreviewMappingsProperties } from '../../../common/api_schemas/transforms';
Expand Down Expand Up @@ -79,12 +79,16 @@ export function getCombinedProperties(
populatedProperties: PreviewMappingsProperties,
docs: Array<Record<string, unknown>>
): PreviewMappingsProperties {
// Take a sample from docs and resolve missing mappings
const sampleDoc = sample(docs) ?? {};
const missingMappings = difference(Object.keys(sampleDoc), Object.keys(populatedProperties));
// Identify missing mappings
const missingMappings = difference(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The grid isn't inferring the type of the value correctly - it thinks it's a string, so left aligns the cell contents, and gives the alphabetical sort options (note sort isn't working on nested fields but I will raise a separate issue for that).

image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes sense to address this as part of #123796 as even if the type for the terms agg columns is correctly set to numeric it still won't be sorted correctly as the column name will contain a dot.

// Create an array of unique flattened field names across all docs
[...new Set(docs.flatMap(Object.keys))],
Object.keys(populatedProperties)
);
return {
...populatedProperties,
...missingMappings.reduce((acc, curr) => {
const sampleDoc = docs.find((d) => typeof d[curr] !== 'undefined') ?? {};
acc[curr] = { type: typeof sampleDoc[curr] as ES_FIELD_TYPES };
return acc;
}, {} as PreviewMappingsProperties),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ import {
import {
isAggName,
isPivotAggsConfigPercentiles,
isPivotAggsConfigTerms,
isPivotAggsConfigWithUiSupport,
getEsAggFromAggConfig,
PERCENTILES_AGG_DEFAULT_PERCENTS,
TERMS_AGG_DEFAULT_SIZE,
PivotAggsConfig,
PivotAggsConfigWithUiSupportDict,
} from '../../../../common';
Expand Down Expand Up @@ -75,6 +77,20 @@ function parsePercentsInput(inputValue: string | undefined) {
return [];
}

function getDefaultSize(defaultData: PivotAggsConfig): number | undefined {
if (isPivotAggsConfigTerms(defaultData)) {
return defaultData.size;
}
}

function parseSizeInput(inputValue: string | undefined) {
if (inputValue !== undefined) {
return parseInt(inputValue, 10);
}

return TERMS_AGG_DEFAULT_SIZE;
}

export const PopoverForm: React.FC<Props> = ({ defaultData, otherAggNames, onChange, options }) => {
const [aggConfigDef, setAggConfigDef] = useState(cloneDeep(defaultData));

Expand All @@ -85,6 +101,7 @@ export const PopoverForm: React.FC<Props> = ({ defaultData, otherAggNames, onCha
);

const [percents, setPercents] = useState(getDefaultPercents(defaultData));
const [size, setSize] = useState(getDefaultSize(defaultData));

const isUnsupportedAgg = !isPivotAggsConfigWithUiSupport(defaultData);

Expand Down Expand Up @@ -118,12 +135,19 @@ export const PopoverForm: React.FC<Props> = ({ defaultData, otherAggNames, onCha
if (aggVal === PIVOT_SUPPORTED_AGGS.PERCENTILES && percents === undefined) {
setPercents(PERCENTILES_AGG_DEFAULT_PERCENTS);
}
if (aggVal === PIVOT_SUPPORTED_AGGS.TERMS && size === undefined) {
setSize(TERMS_AGG_DEFAULT_SIZE);
}
}

function updatePercents(inputValue: string) {
setPercents(parsePercentsInput(inputValue));
}

function updateSize(inputValue: string) {
setSize(parseSizeInput(inputValue));
}

function getUpdatedItem(): PivotAggsConfig {
let updatedItem: PivotAggsConfig;

Expand All @@ -137,21 +161,29 @@ export const PopoverForm: React.FC<Props> = ({ defaultData, otherAggNames, onCha
resultField = field[0];
}

if (agg !== PIVOT_SUPPORTED_AGGS.PERCENTILES) {
if (agg === PIVOT_SUPPORTED_AGGS.PERCENTILES) {
updatedItem = {
agg,
aggName,
field: resultField,
dropDownName: defaultData.dropDownName,
percents,
};
} else if (agg === PIVOT_SUPPORTED_AGGS.TERMS) {
updatedItem = {
...aggConfigDef,
agg,
aggName,
field: resultField,
dropDownName: defaultData.dropDownName,
size,
};
} else {
updatedItem = {
...aggConfigDef,
agg,
aggName,
field: resultField,
dropDownName: defaultData.dropDownName,
percents,
};
}

Expand Down Expand Up @@ -205,10 +237,20 @@ export const PopoverForm: React.FC<Props> = ({ defaultData, otherAggNames, onCha
const validPercents =
agg === PIVOT_SUPPORTED_AGGS.PERCENTILES && parsePercentsInput(percentsText).length > 0;

let sizeText;
if (size !== undefined) {
sizeText = size.toString();
}

const validSize = agg === PIVOT_SUPPORTED_AGGS.TERMS && parseSizeInput(sizeText) > 0;

let formValid = validAggName;
if (formValid && agg === PIVOT_SUPPORTED_AGGS.PERCENTILES) {
formValid = validPercents;
}
if (formValid && agg === PIVOT_SUPPORTED_AGGS.TERMS) {
formValid = validSize;
}
if (isPivotAggsWithExtendedForm(aggConfigDef)) {
formValid = validAggName && aggConfigDef.isValid();
}
Expand Down Expand Up @@ -325,6 +367,23 @@ export const PopoverForm: React.FC<Props> = ({ defaultData, otherAggNames, onCha
/>
</EuiFormRow>
)}
{agg === PIVOT_SUPPORTED_AGGS.TERMS && (
<EuiFormRow
label={i18n.translate('xpack.transform.agg.popoverForm.sizeLabel', {
defaultMessage: 'Size',
})}
error={
!validSize && [
i18n.translate('xpack.transform.groupBy.popoverForm.invalidSizeErrorMessage', {
defaultMessage: 'Enter a valid positive number',
}),
]
}
isInvalid={!validSize}
>
<EuiFieldText defaultValue={sizeText} onChange={(e) => updateSize(e.target.value)} />
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

size is a number type, shall we use https://elastic.github.io/eui/#/forms/form-controls#number-field control instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember we intentionally went with the text field in other cases too because we had problems in the past with custom validations. So for consistency I went with the text based one here again. See the input for 'Maximum page search size' for example.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found EuiFieldNumber in

. But in case you still experience issues with custom validation we can stick to the text field

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be worth another look at EuiFieldNumber as currently it silently accepts fractional values (rounding down to nearest integer):

image

Copy link
Contributor Author

@walterra walterra Jan 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the additional feedback, I switched to EuiFieldNumber in 4878e18. Looking into this, I noticed the form validation approach I copied over from the percentiles variant was a bit flawed. I fixed that in the same commit. For both percentiles and size it's now validating the input string and keeping that as separate state. Previously, similar like the screenshot above with a float not triggering an error, for percentiles it also wouldn't show an error but just fall back to the default value when applied.

</EuiFormRow>
)}
{isUnsupportedAgg && (
<EuiCodeBlock
aria-label={i18n.translate('xpack.transform.agg.popoverForm.codeBlock', {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,64 @@ export default function ({ getService }: FtrProviderContext) {
discoverQueryHits: '10',
},
} as PivotTransformTestData,
{
type: 'pivot',
suiteTitle: 'batch transform with terms group and terms agg',
source: 'ft_ecommerce',
groupByEntries: [
{
identifier: 'terms(customer_gender)',
label: 'customer_gender',
} as GroupByEntry,
],
aggregationEntries: [
{
identifier: 'terms(geoip.city_name)',
label: 'geoip.city_name.terms',
},
],
transformId: `ec_3_${Date.now()}`,
transformDescription:
'ecommerce batch transform with group by terms(customer_gender) and aggregation terms(geoip.city_name)',
get destinationIndex(): string {
return `user-${this.transformId}`;
},
discoverAdjustSuperDatePicker: false,
expected: {
pivotAdvancedEditorValueArr: ['{', ' "group_by": {', ' "customer_gender": {'],
pivotAdvancedEditorValue: {
group_by: {
customer_gender: {
terms: {
field: 'customer_gender',
},
},
},
aggregations: {
'geoip.city_name': {
terms: {
field: 'geoip.city_name',
size: 3,
},
},
},
},
transformPreview: {
column: 0,
values: ['FEMALE', 'MALE'],
},
row: {
status: TRANSFORM_STATE.STOPPED,
mode: 'batch',
progress: '100',
},
indexPreview: {
columns: 10,
rows: 5,
},
discoverQueryHits: '2',
},
} as PivotTransformTestData,
{
type: 'latest',
suiteTitle: 'batch transform with the latest function',
Expand All @@ -351,7 +409,7 @@ export default function ({ getService }: FtrProviderContext) {
identifier: 'order_date',
label: 'order_date',
},
transformId: `ec_3_${Date.now()}`,
transformId: `ec_4_${Date.now()}`,

transformDescription:
'ecommerce batch transform with the latest function config, sort by order_data, country code as unique key',
Expand Down