From 620171140803e7054535b8651d29b9c0be529f57 Mon Sep 17 00:00:00 2001 From: Victor Martinez Date: Tue, 6 Jul 2021 13:17:59 +0100 Subject: [PATCH 1/2] CI: retry the step only --- Jenkinsfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3070fba1903..4f9e4d9a2a2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -562,7 +562,11 @@ def target(Map args = [:]) { // make commands use -C while mage commands require the dir(folder) // let's support this scenario with the location variable. dir(isMage ? directory : '') { - cmd(label: "${args.id?.trim() ? args.id : env.STAGE_NAME} - ${command}", script: "${command}") + // Easy proposal to retry the same command to bypass any kind of flakiness. + // Downside: genuine failures will be repeated. + retry(3) { + cmd(label: "${args.id?.trim() ? args.id : env.STAGE_NAME} - ${command}", script: "${command}") + } } // TODO: // Packaging should happen only after the e2e? From d4290ec45723125717fc3ba41f47bd93ea84d260 Mon Sep 17 00:00:00 2001 From: Victor Martinez Date: Tue, 6 Jul 2021 14:26:17 +0100 Subject: [PATCH 2/2] Enable retry flag for more granularity --- Jenkinsfile | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4f9e4d9a2a2..36ab95e4126 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -555,6 +555,7 @@ def target(Map args = [:]) { def isE2E = args.e2e?.get('enabled', false) def isPackaging = args.get('package', false) def dockerArch = args.get('dockerArch', 'amd64') + def enableRetry = args.get('enableRetry', false) withNode(labels: args.label, forceWorkspace: true){ withGithubNotify(context: "${context}") { withBeatsEnv(archive: true, withModule: withModule, directory: directory, id: args.id) { @@ -562,9 +563,13 @@ def target(Map args = [:]) { // make commands use -C while mage commands require the dir(folder) // let's support this scenario with the location variable. dir(isMage ? directory : '') { - // Easy proposal to retry the same command to bypass any kind of flakiness. - // Downside: genuine failures will be repeated. - retry(3) { + if (enableRetry) { + // Retry the same command to bypass any kind of flakiness. + // Downside: genuine failures will be repeated. + retry(3) { + cmd(label: "${args.id?.trim() ? args.id : env.STAGE_NAME} - ${command}", script: "${command}") + } + } else { cmd(label: "${args.id?.trim() ? args.id : env.STAGE_NAME} - ${command}", script: "${command}") } } @@ -1028,11 +1033,35 @@ class RunCommand extends co.elastic.beats.BeatsFunction { public run(Map args = [:]){ steps.stageStatusCache(args){ def withModule = args.content.get('withModule', false) + // + // What's the retry policy for fighting the flakiness: + // 1) Lint/Packaging/Cloud/k8sTest stages don't retry, since their failures are normally legitim + // 2) All the remaining stages will retry the command within the same worker/workspace if any failure + // + // NOTE: stage: lint uses target function while cloud and k8sTest use a different function + // + def enableRetry = (args.content.get('stage', 'enabled').toLowerCase().equals('lint') || + args?.content?.containsKey('packaging-arm') || + args?.content?.containsKey('packaging-linux')) ? false : true if(args?.content?.containsKey('make')) { - steps.target(context: args.context, command: args.content.make, directory: args.project, label: args.label, withModule: withModule, isMage: false, id: args.id) + steps.target(context: args.context, + command: args.content.make, + directory: args.project, + label: args.label, + withModule: withModule, + isMage: false, + id: args.id, + enableRetry: enableRetry) } if(args?.content?.containsKey('mage')) { - steps.target(context: args.context, command: args.content.mage, directory: args.project, label: args.label, withModule: withModule, isMage: true, id: args.id) + steps.target(context: args.context, + command: args.content.mage, + directory: args.project, + label: args.label, + withModule: withModule, + isMage: true, + id: args.id, + enableRetry: enableRetry) } if(args?.content?.containsKey('packaging-arm')) { steps.packagingArm(context: args.context, @@ -1043,7 +1072,8 @@ class RunCommand extends co.elastic.beats.BeatsFunction { id: args.id, e2e: args.content.get('e2e'), package: true, - dockerArch: 'arm64') + dockerArch: 'arm64', + enableRetry: enableRetry) } if(args?.content?.containsKey('packaging-linux')) { steps.packagingLinux(context: args.context, @@ -1054,7 +1084,8 @@ class RunCommand extends co.elastic.beats.BeatsFunction { id: args.id, e2e: args.content.get('e2e'), package: true, - dockerArch: 'amd64') + dockerArch: 'amd64', + enableRetry: enableRetry) } if(args?.content?.containsKey('k8sTest')) { steps.k8sTest(context: args.context, versions: args.content.k8sTest.split(','), label: args.label, id: args.id)