diff --git a/x-pack/plugins/event_log/server/es/context.mock.ts b/x-pack/plugins/event_log/server/es/context.mock.ts index e0c3e34f84431..c974a63a6407f 100644 --- a/x-pack/plugins/event_log/server/es/context.mock.ts +++ b/x-pack/plugins/event_log/server/es/context.mock.ts @@ -12,6 +12,8 @@ import { namesMock } from './names.mock'; import { IClusterClientAdapter } from './cluster_client_adapter'; import { clusterClientAdapterMock } from './cluster_client_adapter.mock'; +export const MOCK_RETRY_DELAY = 20; + const createContextMock = () => { const mock: jest.Mocked & { esAdapter: jest.Mocked; @@ -23,6 +25,7 @@ const createContextMock = () => { waitTillReady: jest.fn(async () => true), esAdapter: clusterClientAdapterMock.create(), initialized: true, + retryDelay: MOCK_RETRY_DELAY, }; return mock; }; diff --git a/x-pack/plugins/event_log/server/es/context.ts b/x-pack/plugins/event_log/server/es/context.ts index c98710a53f96d..614ea79d97257 100644 --- a/x-pack/plugins/event_log/server/es/context.ts +++ b/x-pack/plugins/event_log/server/es/context.ts @@ -12,14 +12,17 @@ import { initializeEs } from './init'; import { ClusterClientAdapter, IClusterClientAdapter } from './cluster_client_adapter'; import { createReadySignal, ReadySignal } from '../lib/ready_signal'; +export const RETRY_DELAY = 2000; + export interface EsContext { - logger: Logger; - esNames: EsNames; - esAdapter: IClusterClientAdapter; + readonly logger: Logger; + readonly esNames: EsNames; + readonly esAdapter: IClusterClientAdapter; initialize(): void; shutdown(): Promise; waitTillReady(): Promise; - initialized: boolean; + readonly initialized: boolean; + readonly retryDelay: number; } export interface EsError { @@ -44,12 +47,14 @@ class EsContextImpl implements EsContext { public esAdapter: IClusterClientAdapter; private readonly readySignal: ReadySignal; public initialized: boolean; + public readonly retryDelay: number; constructor(params: EsContextCtorParams) { this.logger = params.logger; this.esNames = getEsNames(params.indexNameRoot, params.kibanaVersion); this.readySignal = createReadySignal(); this.initialized = false; + this.retryDelay = RETRY_DELAY; this.esAdapter = new ClusterClientAdapter({ logger: params.logger, elasticsearchClientPromise: params.elasticsearchClientPromise, diff --git a/x-pack/plugins/event_log/server/es/init.test.ts b/x-pack/plugins/event_log/server/es/init.test.ts index 6a261438de1af..5165dc69cd7ef 100644 --- a/x-pack/plugins/event_log/server/es/init.test.ts +++ b/x-pack/plugins/event_log/server/es/init.test.ts @@ -5,7 +5,7 @@ * 2.0. */ -import { contextMock } from './context.mock'; +import { contextMock, MOCK_RETRY_DELAY } from './context.mock'; import { initializeEs, parseIndexAliases } from './init'; describe('initializeEs', () => { @@ -454,3 +454,91 @@ describe('parseIndexAliases', () => { ]); }); }); + +describe('retries', () => { + let esContext = contextMock.create(); + // set up context APIs to return defaults indicating already created + beforeEach(() => { + esContext = contextMock.create(); + esContext.esAdapter.getExistingLegacyIndexTemplates.mockResolvedValue({}); + esContext.esAdapter.getExistingIndices.mockResolvedValue({}); + esContext.esAdapter.getExistingIndexAliases.mockResolvedValue({}); + esContext.esAdapter.doesIlmPolicyExist.mockResolvedValue(true); + esContext.esAdapter.doesIndexTemplateExist.mockResolvedValue(true); + esContext.esAdapter.doesAliasExist.mockResolvedValue(true); + }); + + test('createIlmPolicyIfNotExists with 1 retry', async () => { + esContext.esAdapter.doesIlmPolicyExist.mockRejectedValueOnce(new Error('retry 1')); + + const timeStart = Date.now(); + await initializeEs(esContext); + const timeElapsed = Date.now() - timeStart; + + expect(timeElapsed).toBeGreaterThanOrEqual(MOCK_RETRY_DELAY); + + expect(esContext.esAdapter.getExistingLegacyIndexTemplates).toHaveBeenCalledTimes(1); + expect(esContext.esAdapter.doesIlmPolicyExist).toHaveBeenCalledTimes(2); + expect(esContext.esAdapter.doesIndexTemplateExist).toHaveBeenCalledTimes(1); + expect(esContext.esAdapter.doesAliasExist).toHaveBeenCalledTimes(1); + + const prefix = `eventLog initialization operation failed and will be retried: createIlmPolicyIfNotExists`; + expect(esContext.logger.warn).toHaveBeenCalledTimes(1); + expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 4 more times; error: retry 1`); + }); + + test('createIndexTemplateIfNotExists with 2 retries', async () => { + esContext.esAdapter.doesIndexTemplateExist.mockRejectedValueOnce(new Error('retry 2a')); + esContext.esAdapter.doesIndexTemplateExist.mockRejectedValueOnce(new Error('retry 2b')); + + const timeStart = Date.now(); + await initializeEs(esContext); + const timeElapsed = Date.now() - timeStart; + + expect(timeElapsed).toBeGreaterThanOrEqual(MOCK_RETRY_DELAY * (1 + 2)); + + expect(esContext.esAdapter.getExistingLegacyIndexTemplates).toHaveBeenCalledTimes(1); + expect(esContext.esAdapter.doesIlmPolicyExist).toHaveBeenCalledTimes(1); + expect(esContext.esAdapter.doesIndexTemplateExist).toHaveBeenCalledTimes(3); + expect(esContext.esAdapter.doesAliasExist).toHaveBeenCalledTimes(1); + + const prefix = `eventLog initialization operation failed and will be retried: createIndexTemplateIfNotExists`; + expect(esContext.logger.warn).toHaveBeenCalledTimes(2); + expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 4 more times; error: retry 2a`); + expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 3 more times; error: retry 2b`); + }); + + test('createInitialIndexIfNotExists', async () => { + esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5a')); + esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5b')); + esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5c')); + esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5d')); + esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5e')); + // make sure it only tries 5 times - this one should not be reported + esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5f')); + + const timeStart = Date.now(); + await initializeEs(esContext); + const timeElapsed = Date.now() - timeStart; + + expect(timeElapsed).toBeGreaterThanOrEqual(MOCK_RETRY_DELAY * (1 + 2 + 4 + 8)); + + expect(esContext.esAdapter.getExistingLegacyIndexTemplates).toHaveBeenCalledTimes(1); + expect(esContext.esAdapter.doesIlmPolicyExist).toHaveBeenCalledTimes(1); + expect(esContext.esAdapter.doesIndexTemplateExist).toHaveBeenCalledTimes(1); + expect(esContext.esAdapter.doesAliasExist).toHaveBeenCalledTimes(5); + + const prefix = `eventLog initialization operation failed and will be retried: createInitialIndexIfNotExists`; + expect(esContext.logger.warn).toHaveBeenCalledTimes(5); + expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 4 more times; error: retry 5a`); + expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 3 more times; error: retry 5b`); + expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 2 more times; error: retry 5c`); + expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 1 more times; error: retry 5d`); + expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 0 more times; error: retry 5e`); + + expect(esContext.logger.error).toHaveBeenCalledTimes(1); + expect(esContext.logger.error).toHaveBeenCalledWith( + `error initializing elasticsearch resources: retry 5e` + ); + }); +}); diff --git a/x-pack/plugins/event_log/server/es/init.ts b/x-pack/plugins/event_log/server/es/init.ts index 4440102fcd381..da98a18e9d758 100644 --- a/x-pack/plugins/event_log/server/es/init.ts +++ b/x-pack/plugins/event_log/server/es/init.ts @@ -8,9 +8,12 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import { asyncForEach } from '@kbn/std'; import { groupBy } from 'lodash'; +import pRetry, { FailedAttemptError } from 'p-retry'; import { getIlmPolicy, getIndexTemplate } from './documents'; import { EsContext } from './context'; +const MAX_RETRY_DELAY = 30000; + export async function initializeEs(esContext: EsContext): Promise { esContext.logger.debug('initializing elasticsearch resources starting'); @@ -28,10 +31,33 @@ export async function initializeEs(esContext: EsContext): Promise { async function initializeEsResources(esContext: EsContext) { const steps = new EsInitializationSteps(esContext); - await steps.setExistingAssetsToHidden(); - await steps.createIlmPolicyIfNotExists(); - await steps.createIndexTemplateIfNotExists(); - await steps.createInitialIndexIfNotExists(); + // today, setExistingAssetsToHidden() never throws, but just in case ... + await retry(steps.setExistingAssetsToHidden); + await retry(steps.createIlmPolicyIfNotExists); + await retry(steps.createIndexTemplateIfNotExists); + await retry(steps.createInitialIndexIfNotExists); + + async function retry(stepMethod: () => Promise): Promise { + // call the step method with retry options via p-retry + await pRetry(() => stepMethod.call(steps), getRetryOptions(esContext, stepMethod.name)); + } +} + +function getRetryOptions(esContext: EsContext, operation: string) { + const logger = esContext.logger; + // should retry on the order of 2s, 4s, 8s, 16s + // see: https://github.com/tim-kos/node-retry#retryoperationoptions + return { + minTimeout: esContext.retryDelay, + maxTimeout: MAX_RETRY_DELAY, + retries: 4, + factor: 2, + randomize: true, + onFailedAttempt: (err: FailedAttemptError) => { + const message = `eventLog initialization operation failed and will be retried: ${operation}; ${err.retriesLeft} more times; error: ${err.message}`; + logger.warn(message); + }, + }; } export interface ParsedIndexAlias extends estypes.IndicesAliasDefinition {