Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Visualizations] Expensive queries are causing unnecessary load and delays on Elasticsearch #99031

Merged
merged 9 commits into from
Aug 30, 2021
24 changes: 10 additions & 14 deletions src/plugins/visualizations/server/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,29 @@

import { i18n } from '@kbn/i18n';
import { schema } from '@kbn/config-schema';
import { Observable } from 'rxjs';
import { UsageCollectionSetup } from 'src/plugins/usage_collection/server';
import {

import { VISUALIZE_ENABLE_LABS_SETTING } from '../common/constants';
import { visualizationSavedObjectType } from './saved_objects';
import { registerVisualizationsCollector } from './usage_collector';

import type { VisualizationsPluginSetup, VisualizationsPluginStart } from './types';
import type {
PluginInitializerContext,
CoreSetup,
CoreStart,
Plugin,
Logger,
} from '../../../core/server';

import { VISUALIZE_ENABLE_LABS_SETTING } from '../common/constants';

import { visualizationSavedObjectType } from './saved_objects';

import { VisualizationsPluginSetup, VisualizationsPluginStart } from './types';
import { registerVisualizationsCollector } from './usage_collector';
import { EmbeddableSetup } from '../../embeddable/server';
import type { UsageCollectionSetup } from '../../usage_collection/server';
import type { EmbeddableSetup } from '../../embeddable/server';
import { visualizeEmbeddableFactory } from './embeddable/visualize_embeddable_factory';

export class VisualizationsPlugin
implements Plugin<VisualizationsPluginSetup, VisualizationsPluginStart> {
private readonly logger: Logger;
private readonly config: Observable<{ kibana: { index: string } }>;

constructor(initializerContext: PluginInitializerContext) {
this.logger = initializerContext.logger.get();
this.config = initializerContext.config.legacy.globalConfig$;
}

public setup(
Expand All @@ -61,7 +57,7 @@ export class VisualizationsPlugin
});

if (plugins.usageCollection) {
registerVisualizationsCollector(plugins.usageCollection, this.config);
registerVisualizationsCollector(plugins.usageCollection);
}

plugins.embeddable.registerEmbeddableFactory(visualizeEmbeddableFactory());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,126 +7,97 @@
*/

import moment from 'moment';
import { ElasticsearchClient } from 'src/core/server';
import { getStats } from './get_usage_collector';
import type { SavedObjectsClientContract } from '../../../../core/server';

const defaultMockSavedObjects = [
{
_id: 'visualization:coolviz-123',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "shell_beads"}' },
updated_at: moment().subtract(7, 'days').startOf('day').toString(),
},
id: 'visualization:coolviz-123',
attributes: { visState: '{"type": "shell_beads"}' },
updated_at: moment().subtract(7, 'days').startOf('day').toString(),
},
];

const enlargedMockSavedObjects = [
// default space
{
_id: 'visualization:coolviz-123',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "cave_painting"}' },
updated_at: moment().subtract(7, 'days').startOf('day').toString(),
},
id: 'visualization:coolviz-123',
attributes: { visState: '{"type": "cave_painting"}' },
updated_at: moment().subtract(7, 'days').startOf('day').toString(),
},
{
_id: 'visualization:coolviz-456',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "printing_press"}' },
updated_at: moment().subtract(20, 'days').startOf('day').toString(),
},
id: 'visualization:coolviz-456',
attributes: { visState: '{"type": "printing_press"}' },
updated_at: moment().subtract(20, 'days').startOf('day').toString(),
},
{
_id: 'meat:visualization:coolviz-789',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "floppy_disk"}' },
updated_at: moment().subtract(2, 'months').startOf('day').toString(),
},
id: 'meat:visualization:coolviz-789',
attributes: { visState: '{"type": "floppy_disk"}' },
updated_at: moment().subtract(2, 'months').startOf('day').toString(),
},
// meat space
{
_id: 'meat:visualization:coolviz-789',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "cave_painting"}' },
updated_at: moment().subtract(89, 'days').startOf('day').toString(),
},
id: 'meat:visualization:coolviz-789',
attributes: { visState: '{"type": "cave_painting"}' },
updated_at: moment().subtract(89, 'days').startOf('day').toString(),
},
{
_id: 'meat:visualization:coolviz-789',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "cuneiform"}' },
updated_at: moment().subtract(5, 'months').startOf('day').toString(),
},
id: 'meat:visualization:coolviz-789',
attributes: { visState: '{"type": "cuneiform"}' },
updated_at: moment().subtract(5, 'months').startOf('day').toString(),
},
{
_id: 'meat:visualization:coolviz-789',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "cuneiform"}' },
updated_at: moment().subtract(2, 'days').startOf('day').toString(),
},
id: 'meat:visualization:coolviz-789',
attributes: { visState: '{"type": "cuneiform"}' },
updated_at: moment().subtract(2, 'days').startOf('day').toString(),
},
{
_id: 'meat:visualization:coolviz-789',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "floppy_disk"}' },
updated_at: moment().subtract(7, 'days').startOf('day').toString(),
},
id: 'meat:visualization:coolviz-789',
attributes: { visState: '{"type": "floppy_disk"}' },
updated_at: moment().subtract(7, 'days').startOf('day').toString(),
},
// cyber space
{
_id: 'cyber:visualization:coolviz-789',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "floppy_disk"}' },
updated_at: moment().subtract(7, 'months').startOf('day').toString(),
},
id: 'cyber:visualization:coolviz-789',
attributes: { visState: '{"type": "floppy_disk"}' },
updated_at: moment().subtract(7, 'months').startOf('day').toString(),
},
{
_id: 'cyber:visualization:coolviz-789',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "floppy_disk"}' },
updated_at: moment().subtract(3, 'days').startOf('day').toString(),
},
id: 'cyber:visualization:coolviz-789',
attributes: { visState: '{"type": "floppy_disk"}' },
updated_at: moment().subtract(3, 'days').startOf('day').toString(),
},
{
_id: 'cyber:visualization:coolviz-123',
_source: {
type: 'visualization',
visualization: { visState: '{"type": "cave_painting"}' },
updated_at: moment().subtract(15, 'days').startOf('day').toString(),
},
id: 'cyber:visualization:coolviz-123',
attributes: { visState: '{"type": "cave_painting"}' },
updated_at: moment().subtract(15, 'days').startOf('day').toString(),
},
];

describe('Visualizations usage collector', () => {
const mockIndex = '';

const getMockCallCluster = (hits: unknown[]) =>
({
search: () => Promise.resolve({ body: { hits: { hits } } }) as unknown,
} as ElasticsearchClient);
const getMockCallCluster = (savedObjects: unknown[]) =>
(({
createPointInTimeFinder: jest.fn().mockResolvedValue({
close: jest.fn(),
find: function* asyncGenerator() {
yield { saved_objects: savedObjects };
},
}),
} as unknown) as SavedObjectsClientContract);

test('Returns undefined when no results found (undefined)', async () => {
const result = await getStats(getMockCallCluster(undefined as any), mockIndex);
const result = await getStats(getMockCallCluster(undefined as any));

expect(result).toBeUndefined();
});

test('Returns undefined when no results found (0 results)', async () => {
const result = await getStats(getMockCallCluster([]), mockIndex);
const result = await getStats(getMockCallCluster([]));
expect(result).toBeUndefined();
});

test('Summarizes visualizations response data', async () => {
const result = await getStats(getMockCallCluster(defaultMockSavedObjects), mockIndex);
const result = await getStats(getMockCallCluster(defaultMockSavedObjects));

expect(result).toMatchObject({
shell_beads: {
Expand Down Expand Up @@ -181,7 +152,7 @@ describe('Visualizations usage collector', () => {
},
};

const result = await getStats(getMockCallCluster(enlargedMockSavedObjects), mockIndex);
const result = await getStats(getMockCallCluster(enlargedMockSavedObjects));

expect(result).toMatchObject(expectedStats);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@
* Side Public License, v 1.
*/

import { countBy, get, groupBy, mapValues, max, min, values } from 'lodash';
import { ElasticsearchClient } from 'kibana/server';
import type { estypes } from '@elastic/elasticsearch';

import { countBy, groupBy, mapValues, max, min, values } from 'lodash';
import { getPastDays } from './get_past_days';
type ESResponse = estypes.SearchResponse<{ visualization: { visState: string } }>;

import type { SavedObjectsClientContract, SavedObjectsFindResult } from '../../../../core/server';
import type { SavedVisState } from '../../../visualizations/common';

interface VisSummary {
type: string;
Expand All @@ -35,61 +34,50 @@ export interface VisualizationUsage {
* Parse the response data into telemetry payload
*/
export async function getStats(
esClient: ElasticsearchClient,
index: string
soClient: SavedObjectsClientContract
): Promise<VisualizationUsage | undefined> {
const searchParams = {
size: 10000, // elasticsearch index.max_result_window default value
index,
ignoreUnavailable: true,
filterPath: [
'hits.hits._id',
'hits.hits._source.visualization',
'hits.hits._source.updated_at',
],
body: {
query: {
bool: { filter: { term: { type: 'visualization' } } },
},
},
};
const { body: esResponse } = await esClient.search<ESResponse>(searchParams);
const size = get(esResponse, 'hits.hits.length', 0);
if (size < 1) {
return;
}

// `map` to get the raw types
const visSummaries: VisSummary[] = esResponse.hits.hits.map((hit) => {
const spacePhrases = hit._id.toString().split(':');
const lastUpdated: string = get(hit, '_source.updated_at');
const space = spacePhrases.length === 3 ? spacePhrases[0] : 'default'; // if in a custom space, the format of a saved object ID is space:type:id
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this code was removed because we can get space in more legal way from so.namespaces

const visualization = get(hit, '_source.visualization', { visState: '{}' });
const visState: { type?: string } = JSON.parse(visualization.visState);
return {
type: visState.type || '_na_',
space,
past_days: getPastDays(lastUpdated),
};
const finder = await soClient.createPointInTimeFinder({
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my question applies to all refactored telemetries (#98903, #98914, #99023) where we replaced esClient with soClient.
During my testing today, I found that this finder returns only data for the default namespace. If I create visualizations for a non-default namespace, this will not be reflected in the telemetry.

I've tried to fix that by adding namespaces: ['*'], but with that argument pagination doesn't work. see:
image

@lukeelmers @afharo could you please help me a little cause we have a recommendation to use soClient ( #88604 (comment) ) for telemetry with pagination.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yeah we specifically decided against adding namespace support when using PIT until we had a concrete need for it. I guess we do now 🙂

I've created an issue to track this enhancement: #99044

@afharo WDYT, do we push forward on this work without soClient (or with using traditional pagination which typically works with <10k results), or do we wait until such a time as we have namespace support in #99044?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I vote to wait, because the code with soClient looks easier to me + we don't need to implement paging ourselves. If I'm not mistaken in esClient we don't have a simple wrapper like createPointInTimeFinder . + #88604 (comment)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we push forward on this work without soClient

IMO, it feels like we keep coming back and forth with this conversation. And, usually, AFAIK, the general recommendation is to use the SO Client whenever possible because using plain ES client requests would break if/when we change the internal implementations of SOs (underlying indices, ways to store them, ...).

I'm not fully familiar with the limitations of allowing namespaces: ['*']. I need to better understand the reasons exposed in #99044 before recommending migrating to esClient as a workaround :)

So ++ to wait :)

Copy link
Member

@afharo afharo May 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been playing around with this PR locally, and I think these changes would solve the issue: alexwizp#6

I tested it locally and it works as expected to me :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nevermind, I don't know how I thought it worked when I tried... It's def not working 🤷

Sorry about that! Then, we need to rely on the outcome of #99044

Copy link
Member

@lukeelmers lukeelmers May 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I guess the question remains, do we adjust the scope of this PR as follows?

  • Size parameter was changed from 10000 to 1000
  • Code was refactored to use soClient instead of esClient
  • The paging is used through the soClient.createPointInTimeFinder method
  • Code was optimized, extra cycles were removed

Basically, since @alexwizp has already started the work, is it worth merging with the same 10k size, only switching to SO client instead? Or do we close this PR and consider it blocked by #99044? WDYT @alexwizp @afharo

(Either way it doesn't actually resolve #93770, it would just be a small refactor if we moved ahead)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see a lot of sense just to switch esClient -> soClient. We started that work to resolve #93770.
But on the other hand I'm ok to use soClient with size === 10000. it adds more contributions to my piggy bank =)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

++ to what @alexwizp said. I guess the question is how long would it take us to fix #99044? If it's just a couple of weeks, probably it's worth putting this PR on hold and rebase when ready?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add #99044 to our list of items to discuss for our next sprint. I don't think it would be a particularly large amount of work, it's mostly about deciding on a strategy with the security team & ensuring there's adequate test coverage.

type: 'visualization',
perPage: 1000,
namespaces: ['*'],
});

// organize stats per type
const visTypes = groupBy(visSummaries, 'type');
const visSummaries: VisSummary[] = [];

// get the final result
return mapValues(visTypes, (curr) => {
const total = curr.length;
const spacesBreakdown = countBy(curr, 'space');
const spaceCounts: number[] = values(spacesBreakdown);
for await (const response of finder.find()) {
(response.saved_objects || []).forEach((so: SavedObjectsFindResult<any>) => {
if (so.attributes?.visState) {
const visState: SavedVisState = JSON.parse(so.attributes.visState);

return {
total,
spaces_min: min(spaceCounts),
spaces_max: max(spaceCounts),
spaces_avg: total / spaceCounts.length,
saved_7_days_total: curr.filter((c) => c.past_days <= 7).length,
saved_30_days_total: curr.filter((c) => c.past_days <= 30).length,
saved_90_days_total: curr.filter((c) => c.past_days <= 90).length,
};
});
visSummaries.push({
type: visState.type ?? '_na_',
space: so.namespaces?.[0] ?? 'default',
past_days: getPastDays(so.updated_at!),
});
}
});
}
await finder.close();

if (visSummaries.length) {
// organize stats per type
const visTypes = groupBy(visSummaries, 'type');

// get the final result
return mapValues(visTypes, (curr) => {
const total = curr.length;
const spacesBreakdown = countBy(curr, 'space');
const spaceCounts: number[] = values(spacesBreakdown);

return {
total,
spaces_min: min(spaceCounts),
spaces_max: max(spaceCounts),
spaces_avg: total / spaceCounts.length,
saved_7_days_total: curr.filter((c) => c.past_days <= 7).length,
saved_30_days_total: curr.filter((c) => c.past_days <= 30).length,
saved_90_days_total: curr.filter((c) => c.past_days <= 90).length,
};
});
}
}
Loading