From 61211f9db1568190dd46b7469fa79eb6530bba73 Mon Sep 17 00:00:00 2001 From: Ezequiel Muns Date: Wed, 29 Jun 2022 17:49:59 +0200 Subject: [PATCH] fix: Add workflow failures before hooks run. Fixes #8882 (#9009) Signed-off-by: Ezequiel Muns --- USERS.md | 1 + workflow/controller/hooks_test.go | 57 ++++++++++++++++++++++++++++++- workflow/controller/operator.go | 44 ++++++++++++------------ 3 files changed, 79 insertions(+), 23 deletions(-) diff --git a/USERS.md b/USERS.md index 72b0f8ae7423..f5d53999bc8f 100644 --- a/USERS.md +++ b/USERS.md @@ -83,6 +83,7 @@ Currently, the following organizations are **officially** using Argo Workflows: 1. [H2O.ai](https://h2o.ai/) 1. [Habx](https://www.habx.com/) 1. [Helio](https://helio.exchange) +1. [Hemisphere Digital](https://hemisphere.digital) 1. [HOVER](https://hover.to) 1. [HSBC](https://hsbc.com) 1. [IBM](https://ibm.com) diff --git a/workflow/controller/hooks_test.go b/workflow/controller/hooks_test.go index 9751876ace20..d60b0e3bbc84 100644 --- a/workflow/controller/hooks_test.go +++ b/workflow/controller/hooks_test.go @@ -6,8 +6,10 @@ import ( "testing" "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" + "github.com/argoproj/argo-workflows/v3/workflow/common" ) func TestExecuteWfLifeCycleHook(t *testing.T) { @@ -324,7 +326,7 @@ spec: parameters: - name: message value: test - entrypoint: main + entrypoint: main workflowTemplateRef: name: workflow-template-whalesay-template status: @@ -932,3 +934,56 @@ status: node := woc.wf.Status.Nodes.FindByDisplayName("lifecycle-hook-fh7t4.hooks.Failed") assert.NotNil(t, node) } + +func TestWfHookHasFailures(t *testing.T) { + wf := wfv1.MustUnmarshalWorkflow(` +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + name: hook-failures + namespace: argo +spec: + entrypoint: intentional-fail + hooks: + failure: + expression: workflow.status == "Failed" + template: message + arguments: + parameters: + - name: message + value: | + Workflow {{ workflow.name }} {{ workflow.status }} {{ workflow.failures }} + templates: + - name: intentional-fail + container: + image: alpine:latest + command: [sh, -c] + args: ["echo intentional failure; exit 1"] + - name: message + inputs: + parameters: + - name: message + script: + image: alpine:latest + command: [sh] + source: | + echo {{ inputs.parameters.message }} +`) + + cancel, controller := newController(wf) + defer cancel() + + ctx := context.Background() + woc := newWorkflowOperationCtx(wf, controller) + woc.operate(ctx) + makePodsPhase(ctx, woc, apiv1.PodFailed) + + woc = newWorkflowOperationCtx(woc.wf, controller) + woc.operate(ctx) + node := woc.wf.Status.Nodes.FindByDisplayName("hook-failures.hooks.failure") + assert.NotNil(t, node) + assert.Contains(t, + woc.globalParams[common.GlobalVarWorkflowFailures], + `[{\"displayName\":\"hook-failures\",\"message\":\"Pod failed\",\"templateName\":\"intentional-fail\",\"phase\":\"Failed\",\"podName\":\"hook-failures\"`, + ) +} diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go index dc6b9c80ada8..ebe860d59c0a 100644 --- a/workflow/controller/operator.go +++ b/workflow/controller/operator.go @@ -360,6 +360,28 @@ func (woc *wfOperationCtx) operate(ctx context.Context) { woc.globalParams[common.GlobalVarWorkflowStatus] = string(workflowStatus) + var failures []failedNodeStatus + for _, node := range woc.wf.Status.Nodes { + if node.Phase == wfv1.NodeFailed || node.Phase == wfv1.NodeError { + failures = append(failures, + failedNodeStatus{ + DisplayName: node.DisplayName, + Message: node.Message, + TemplateName: node.TemplateName, + Phase: string(node.Phase), + PodName: node.ID, + FinishedAt: node.FinishedAt, + }) + } + } + failedNodeBytes, err := json.Marshal(failures) + if err != nil { + woc.log.Errorf("Error marshalling failed nodes list: %+v", err) + // No need to return here + } + // This strconv.Quote is necessary so that the escaped quotes are not removed during parameter substitution + woc.globalParams[common.GlobalVarWorkflowFailures] = strconv.Quote(string(failedNodeBytes)) + err = woc.executeWfLifeCycleHook(ctx, tmplCtx) if err != nil { woc.markNodeError(node.Name, err) @@ -374,28 +396,6 @@ func (woc *wfOperationCtx) operate(ctx context.Context) { var onExitNode *wfv1.NodeStatus if woc.execWf.Spec.HasExitHook() && woc.GetShutdownStrategy().ShouldExecute(true) { - var failures []failedNodeStatus - for _, node := range woc.wf.Status.Nodes { - if node.Phase == wfv1.NodeFailed || node.Phase == wfv1.NodeError { - failures = append(failures, - failedNodeStatus{ - DisplayName: node.DisplayName, - Message: node.Message, - TemplateName: node.TemplateName, - Phase: string(node.Phase), - PodName: node.ID, - FinishedAt: node.FinishedAt, - }) - } - } - failedNodeBytes, err := json.Marshal(failures) - if err != nil { - woc.log.Errorf("Error marshalling failed nodes list: %+v", err) - // No need to return here - } - // This strconv.Quote is necessary so that the escaped quotes are not removed during parameter substitution - woc.globalParams[common.GlobalVarWorkflowFailures] = strconv.Quote(string(failedNodeBytes)) - woc.log.Infof("Running OnExit handler: %s", woc.execWf.Spec.OnExit) onExitNodeName := common.GenerateOnExitNodeName(woc.wf.ObjectMeta.Name) exitHook := woc.execWf.Spec.GetExitHook(woc.execWf.Spec.Arguments)