From 49a3b5e0454aceabb4d2b81aadc88899d5d5b13c Mon Sep 17 00:00:00 2001 From: smile-luobin Date: Thu, 4 Aug 2022 18:00:22 +0800 Subject: [PATCH 1/5] feat: fix bugs in retry workflow if failed node has children nodes. Signed-off-by: smile-luobin --- workflow/util/util.go | 18 +++ workflow/util/util_test.go | 277 +++++++++++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+) diff --git a/workflow/util/util.go b/workflow/util/util.go index 6b224de67eba..2567e088d700 100644 --- a/workflow/util/util.go +++ b/workflow/util/util.go @@ -745,6 +745,15 @@ func convertNodeID(newWf *wfv1.Workflow, regex *regexp.Regexp, oldNodeID string, return newWf.NodeID(newNodeName) } +func getDescendantNodes(wf *wfv1.Workflow, node wfv1.NodeStatus) []string { + var descendantNodes []string + descendantNodes = append(descendantNodes, node.Children...) + for _, child := range node.Children { + descendantNodes = append(descendantNodes, getDescendantNodes(wf, wf.Status.Nodes[child])...) + } + return descendantNodes +} + // FormulateRetryWorkflow formulates a previous workflow to be retried, deleting all failed steps as well as the onExit node (and children) func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSuccessful bool, nodeFieldSelector string, parameters []string) (*wfv1.Workflow, []string, error) { @@ -836,6 +845,10 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce continue } else { deletedNodes[node.ID] = true + descendantNodes := getDescendantNodes(wf, node) + for _, descendantNode := range descendantNodes { + deletedNodes[descendantNode] = true + } } // do not add this status to the node. pretend as if this node never existed. default: @@ -857,6 +870,11 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce if len(deletedNodes) > 0 { for _, node := range newWF.Status.Nodes { + if deletedNodes[node.ID] { + delete(newWF.Status.Nodes, node.ID) + continue + } + var newChildren []string for _, child := range node.Children { if !deletedNodes[child] { diff --git a/workflow/util/util_test.go b/workflow/util/util_test.go index 8f42d4b0f03a..3ea915a92641 100644 --- a/workflow/util/util_test.go +++ b/workflow/util/util_test.go @@ -1048,3 +1048,280 @@ func TestGetTemplateFromNode(t *testing.T) { assert.Equal(t, tc.expectedTemplateName, actual) } } + +var retryWorkflowWithFailedNodeHasChildrenNodes = ` +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + annotations: + workflows.argoproj.io/pod-name-format: v1 + creationTimestamp: "2022-08-04T08:35:10Z" + generateName: test-pipeline- + generation: 10 + labels: + workflows.argoproj.io/completed: "true" + workflows.argoproj.io/phase: Failed + workflows.argoproj.io/resubmitted-from-workflow: test-pipeline-t5h77 + managedFields: + - apiVersion: argoproj.io/v1alpha1 + fieldsType: FieldsV1 + fieldsV1: + f:metadata: + f:annotations: + .: {} + f:workflows.argoproj.io/pod-name-format: {} + f:generateName: {} + f:labels: + .: {} + f:workflows.argoproj.io/resubmitted-from-workflow: {} + f:spec: {} + manager: argo + operation: Update + time: "2022-08-04T08:35:10Z" + - apiVersion: argoproj.io/v1alpha1 + fieldsType: FieldsV1 + fieldsV1: + f:metadata: + f:labels: + f:workflows.argoproj.io/completed: {} + f:workflows.argoproj.io/phase: {} + f:status: {} + manager: workflow-controller + operation: Update + time: "2022-08-04T08:36:31Z" + name: test-pipeline-bnzwv + namespace: argo-system + resourceVersion: "2588589" + uid: 1ce08a8a-0d95-46a3-b7d8-b6e04a0e6136 +spec: + arguments: {} + entrypoint: test-pipeline-dag + templates: + - dag: + tasks: + - arguments: {} + name: t1 + template: succeeded + - arguments: {} + depends: t1 + name: t2 + template: failed + - arguments: {} + depends: t2 || t2.Failed + name: t3 + template: succeeded + - arguments: {} + depends: t3 + name: t4-1 + template: succeeded + - arguments: {} + depends: t3 + name: t4-2 + template: succeeded + - arguments: {} + depends: t3 + name: t4-3 + template: failed + inputs: {} + metadata: {} + name: test-pipeline-dag + outputs: {} + - container: + command: + - "true" + image: alpine + name: "" + resources: {} + inputs: {} + metadata: {} + name: succeeded + outputs: {} + - container: + command: + - "false" + image: alpine + name: "" + resources: {} + inputs: {} + metadata: {} + name: failed + outputs: {} +status: + artifactRepositoryRef: + artifactRepository: + s3: + accessKeySecret: + key: accessKey + name: minio-secret + bucket: argo-artifact + endpoint: minio-service.middleware-system.svc:9000 + insecure: true + secretKeySecret: + key: secretKey + name: minio-secret + default: true + conditions: + - status: "False" + type: PodRunning + - status: "True" + type: Completed + finishedAt: "2022-08-04T08:36:31Z" + nodes: + test-pipeline-bnzwv: + children: + - test-pipeline-bnzwv-929756297 + displayName: test-pipeline-bnzwv + finishedAt: "2022-08-04T08:36:31Z" + id: test-pipeline-bnzwv + name: test-pipeline-bnzwv + outboundNodes: + - test-pipeline-bnzwv-748480356 + - test-pipeline-bnzwv-798813213 + - test-pipeline-bnzwv-782035594 + phase: Failed + progress: 4/6 + resourcesDuration: + cpu: 32 + memory: 32 + startedAt: "2022-08-04T08:35:10Z" + templateName: test-pipeline-dag + templateScope: local/test-pipeline-bnzwv + type: DAG + test-pipeline-bnzwv-748480356: + boundaryID: test-pipeline-bnzwv + displayName: t4-1 + finishedAt: "2022-08-04T08:36:19Z" + hostNodeName: node2 + id: test-pipeline-bnzwv-748480356 + name: test-pipeline-bnzwv.t4-1 + outputs: + exitCode: "0" + phase: Succeeded + progress: 1/1 + resourcesDuration: + cpu: 5 + memory: 5 + startedAt: "2022-08-04T08:36:11Z" + templateName: succeeded + templateScope: local/test-pipeline-bnzwv + type: Pod + test-pipeline-bnzwv-782035594: + boundaryID: test-pipeline-bnzwv + displayName: t4-3 + finishedAt: "2022-08-04T08:36:16Z" + hostNodeName: node1 + id: test-pipeline-bnzwv-782035594 + message: Error (exit code 1) + name: test-pipeline-bnzwv.t4-3 + outputs: + exitCode: "1" + phase: Failed + progress: 0/1 + resourcesDuration: + cpu: 4 + memory: 4 + startedAt: "2022-08-04T08:36:11Z" + templateName: failed + templateScope: local/test-pipeline-bnzwv + type: Pod + test-pipeline-bnzwv-798813213: + boundaryID: test-pipeline-bnzwv + displayName: t4-2 + finishedAt: "2022-08-04T08:36:19Z" + hostNodeName: node1 + id: test-pipeline-bnzwv-798813213 + name: test-pipeline-bnzwv.t4-2 + outputs: + exitCode: "0" + phase: Succeeded + progress: 1/1 + resourcesDuration: + cpu: 8 + memory: 8 + startedAt: "2022-08-04T08:36:11Z" + templateName: succeeded + templateScope: local/test-pipeline-bnzwv + type: Pod + test-pipeline-bnzwv-879423440: + boundaryID: test-pipeline-bnzwv + children: + - test-pipeline-bnzwv-896201059 + displayName: t2 + finishedAt: "2022-08-04T08:35:39Z" + hostNodeName: node2 + id: test-pipeline-bnzwv-879423440 + message: Error (exit code 1) + name: test-pipeline-bnzwv.t2 + outputs: + exitCode: "1" + phase: Failed + progress: 0/1 + resourcesDuration: + cpu: 5 + memory: 5 + startedAt: "2022-08-04T08:35:30Z" + templateName: failed + templateScope: local/test-pipeline-bnzwv + type: Pod + test-pipeline-bnzwv-896201059: + boundaryID: test-pipeline-bnzwv + children: + - test-pipeline-bnzwv-748480356 + - test-pipeline-bnzwv-798813213 + - test-pipeline-bnzwv-782035594 + displayName: t3 + finishedAt: "2022-08-04T08:36:00Z" + hostNodeName: node2 + id: test-pipeline-bnzwv-896201059 + name: test-pipeline-bnzwv.t3 + outputs: + exitCode: "0" + phase: Succeeded + progress: 1/1 + resourcesDuration: + cpu: 5 + memory: 5 + startedAt: "2022-08-04T08:35:50Z" + templateName: succeeded + templateScope: local/test-pipeline-bnzwv + type: Pod + test-pipeline-bnzwv-929756297: + boundaryID: test-pipeline-bnzwv + children: + - test-pipeline-bnzwv-879423440 + displayName: t1 + finishedAt: "2022-08-04T08:35:18Z" + hostNodeName: node2 + id: test-pipeline-bnzwv-929756297 + name: test-pipeline-bnzwv.t1 + outputs: + exitCode: "0" + phase: Succeeded + progress: 1/1 + resourcesDuration: + cpu: 5 + memory: 5 + startedAt: "2022-08-04T08:35:10Z" + templateName: succeeded + templateScope: local/test-pipeline-bnzwv + type: Pod + phase: Failed + progress: 4/6 + resourcesDuration: + cpu: 32 + memory: 32 + startedAt: "2022-08-04T08:35:10Z" +` + +func TestRetryWorkflowWithFailedNodeHasChildrenNodes(t *testing.T) { + ctx := context.Background() + wf := wfv1.MustUnmarshalWorkflow(retryWorkflowWithFailedNodeHasChildrenNodes) + + wf, _, err := FormulateRetryWorkflow(ctx, wf, false, "", nil) + assert.NoError(t, err) + assert.Equal(t, 2, len(wf.Status.Nodes)) + t2Node := wf.Status.Nodes.FindByDisplayName("t2") + assert.Nil(t, t2Node) + t3Node := wf.Status.Nodes.FindByDisplayName("t3") + assert.Nil(t, t3Node) +} From 7e7cd4bb91ad641a83d6ceab177be667f5eee8a4 Mon Sep 17 00:00:00 2001 From: smile-luobin Date: Fri, 5 Aug 2022 16:56:50 +0800 Subject: [PATCH 2/5] Fix bugs in retryWorkflow Signed-off-by: smile-luobin --- workflow/util/util.go | 25 ++++++++++++++++++------- workflow/util/util_test.go | 12 +++++++----- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/workflow/util/util.go b/workflow/util/util.go index 2567e088d700..a07e23a86825 100644 --- a/workflow/util/util.go +++ b/workflow/util/util.go @@ -745,9 +745,11 @@ func convertNodeID(newWf *wfv1.Workflow, regex *regexp.Regexp, oldNodeID string, return newWf.NodeID(newNodeName) } -func getDescendantNodes(wf *wfv1.Workflow, node wfv1.NodeStatus) []string { - var descendantNodes []string - descendantNodes = append(descendantNodes, node.Children...) +func getDescendantNodes(wf *wfv1.Workflow, node wfv1.NodeStatus) []wfv1.NodeStatus { + var descendantNodes []wfv1.NodeStatus + for _, child := range node.Children { + descendantNodes = append(descendantNodes, wf.Status.Nodes[child]) + } for _, child := range node.Children { descendantNodes = append(descendantNodes, getDescendantNodes(wf, wf.Status.Nodes[child])...) } @@ -845,10 +847,6 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce continue } else { deletedNodes[node.ID] = true - descendantNodes := getDescendantNodes(wf, node) - for _, descendantNode := range descendantNodes { - deletedNodes[descendantNode] = true - } } // do not add this status to the node. pretend as if this node never existed. default: @@ -861,6 +859,19 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce version := GetWorkflowPodNameVersion(wf) podName := PodName(wf.Name, node.Name, templateName, node.ID, version) podsToDelete = append(podsToDelete, podName) + + deletedNodes[node.ID] = true + + descendantNodes := getDescendantNodes(wf, node) + for _, descendantNode := range descendantNodes { + deletedNodes[descendantNode.ID] = true + if descendantNode.Type == wfv1.NodeTypePod { + templateName := getTemplateFromNode(descendantNode) + version := GetWorkflowPodNameVersion(wf) + podName := PodName(wf.Name, descendantNode.Name, templateName, descendantNode.ID, version) + podsToDelete = append(podsToDelete, podName) + } + } } else if node.Name == wf.ObjectMeta.Name { newNode := node.DeepCopy() newWF.Status.Nodes[newNode.ID] = resetNode(*newNode) diff --git a/workflow/util/util_test.go b/workflow/util/util_test.go index 3ea915a92641..1407c0700654 100644 --- a/workflow/util/util_test.go +++ b/workflow/util/util_test.go @@ -920,13 +920,14 @@ func TestFormulateRetryWorkflow(t *testing.T) { assert.NoError(t, err) wf, _, err = FormulateRetryWorkflow(ctx, wf, true, "id=3", nil) if assert.NoError(t, err) { - if assert.Len(t, wf.Status.Nodes, 5) { + // node #3 & node #4 are deleted and will be recreated. so only 3 nodes in wf.Status.Nodes + if assert.Len(t, wf.Status.Nodes, 3) { assert.Equal(t, wfv1.NodeSucceeded, wf.Status.Nodes["my-nested-dag-1"].Phase) // These should all be running since the child node #3 belongs up to node #1. assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["1"].Phase) assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["2"].Phase) - assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["3"].Phase) - assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["4"].Phase) + //assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["3"].Phase) + //assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["4"].Phase) } } }) @@ -950,14 +951,15 @@ func TestFormulateRetryWorkflow(t *testing.T) { assert.NoError(t, err) wf, _, err = FormulateRetryWorkflow(ctx, wf, true, "", nil) if assert.NoError(t, err) { - if assert.Len(t, wf.Status.Nodes, 5) { + // node #4 is deleted and will be recreated. so only 4 nodes in wf.Status.Nodes + if assert.Len(t, wf.Status.Nodes, 4) { assert.Equal(t, wfv1.NodeSucceeded, wf.Status.Nodes["my-nested-dag-2"].Phase) // This should be running since it's node #4's parent node. assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["1"].Phase) // This should be running since it's node #1's child node and node #1 is being retried. assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["2"].Phase) assert.Equal(t, wfv1.NodeSucceeded, wf.Status.Nodes["3"].Phase) - assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["4"].Phase) + //assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["4"].Phase) } } }) From ca7f8aa24e688c5ff82317a2792782064385dca2 Mon Sep 17 00:00:00 2001 From: smile-luobin Date: Sat, 6 Aug 2022 22:59:15 +0800 Subject: [PATCH 3/5] refactor the method that gets the descendantNodes Signed-off-by: smile-luobin --- workflow/util/util.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/workflow/util/util.go b/workflow/util/util.go index a07e23a86825..b3848d8037ee 100644 --- a/workflow/util/util.go +++ b/workflow/util/util.go @@ -745,15 +745,13 @@ func convertNodeID(newWf *wfv1.Workflow, regex *regexp.Regexp, oldNodeID string, return newWf.NodeID(newNodeName) } -func getDescendantNodes(wf *wfv1.Workflow, node wfv1.NodeStatus) []wfv1.NodeStatus { - var descendantNodes []wfv1.NodeStatus +func getDescendantNodeIDs(wf *wfv1.Workflow, node wfv1.NodeStatus) []string { + var descendantNodeIDs []string + descendantNodeIDs = append(descendantNodeIDs, node.Children...) for _, child := range node.Children { - descendantNodes = append(descendantNodes, wf.Status.Nodes[child]) + descendantNodeIDs = append(descendantNodeIDs, getDescendantNodeIDs(wf, wf.Status.Nodes[child])...) } - for _, child := range node.Children { - descendantNodes = append(descendantNodes, getDescendantNodes(wf, wf.Status.Nodes[child])...) - } - return descendantNodes + return descendantNodeIDs } // FormulateRetryWorkflow formulates a previous workflow to be retried, deleting all failed steps as well as the onExit node (and children) @@ -862,9 +860,10 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce deletedNodes[node.ID] = true - descendantNodes := getDescendantNodes(wf, node) - for _, descendantNode := range descendantNodes { - deletedNodes[descendantNode.ID] = true + descendantNodeIDs := getDescendantNodeIDs(wf, node) + for _, descendantNodeID := range descendantNodeIDs { + deletedNodes[descendantNodeID] = true + descendantNode := wf.Status.Nodes[descendantNodeID] if descendantNode.Type == wfv1.NodeTypePod { templateName := getTemplateFromNode(descendantNode) version := GetWorkflowPodNameVersion(wf) From 58cb0c8077e2fa9a185a8538eeaff70ed368be2d Mon Sep 17 00:00:00 2001 From: smile-luobin Date: Thu, 11 Aug 2022 14:09:01 +0800 Subject: [PATCH 4/5] do some refactoring Signed-off-by: smile-luobin --- workflow/util/util.go | 23 ++++---- workflow/util/util_test.go | 104 +++---------------------------------- 2 files changed, 20 insertions(+), 107 deletions(-) diff --git a/workflow/util/util.go b/workflow/util/util.go index b3848d8037ee..df082588dfd6 100644 --- a/workflow/util/util.go +++ b/workflow/util/util.go @@ -754,6 +754,17 @@ func getDescendantNodeIDs(wf *wfv1.Workflow, node wfv1.NodeStatus) []string { return descendantNodeIDs } +func deletePodNodeDuringRetryWorkflow(wf *wfv1.Workflow, node wfv1.NodeStatus, deletedPods map[string]bool, podsToDelete []string) []string { + templateName := getTemplateFromNode(node) + version := GetWorkflowPodNameVersion(wf) + podName := PodName(wf.Name, node.Name, templateName, node.ID, version) + if _, ok := deletedPods[podName]; !ok { + deletedPods[podName] = true + podsToDelete = append(podsToDelete, podName) + } + return podsToDelete +} + // FormulateRetryWorkflow formulates a previous workflow to be retried, deleting all failed steps as well as the onExit node (and children) func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSuccessful bool, nodeFieldSelector string, parameters []string) (*wfv1.Workflow, []string, error) { @@ -800,6 +811,7 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce // Iterate the previous nodes. If it was successful Pod carry it forward deletedNodes := make(map[string]bool) + deletedPods := make(map[string]bool) var podsToDelete []string for _, node := range wf.Status.Nodes { doForceResetNode := false @@ -853,22 +865,15 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce } if node.Type == wfv1.NodeTypePod { - templateName := getTemplateFromNode(node) - version := GetWorkflowPodNameVersion(wf) - podName := PodName(wf.Name, node.Name, templateName, node.ID, version) - podsToDelete = append(podsToDelete, podName) - deletedNodes[node.ID] = true + podsToDelete = deletePodNodeDuringRetryWorkflow(wf, node, deletedPods, podsToDelete) descendantNodeIDs := getDescendantNodeIDs(wf, node) for _, descendantNodeID := range descendantNodeIDs { deletedNodes[descendantNodeID] = true descendantNode := wf.Status.Nodes[descendantNodeID] if descendantNode.Type == wfv1.NodeTypePod { - templateName := getTemplateFromNode(descendantNode) - version := GetWorkflowPodNameVersion(wf) - podName := PodName(wf.Name, descendantNode.Name, templateName, descendantNode.ID, version) - podsToDelete = append(podsToDelete, podName) + podsToDelete = deletePodNodeDuringRetryWorkflow(wf, node, deletedPods, podsToDelete) } } } else if node.Name == wf.ObjectMeta.Name { diff --git a/workflow/util/util_test.go b/workflow/util/util_test.go index 1407c0700654..2eec8ecb2767 100644 --- a/workflow/util/util_test.go +++ b/workflow/util/util_test.go @@ -926,8 +926,6 @@ func TestFormulateRetryWorkflow(t *testing.T) { // These should all be running since the child node #3 belongs up to node #1. assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["1"].Phase) assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["2"].Phase) - //assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["3"].Phase) - //assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["4"].Phase) } } }) @@ -959,7 +957,6 @@ func TestFormulateRetryWorkflow(t *testing.T) { // This should be running since it's node #1's child node and node #1 is being retried. assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["2"].Phase) assert.Equal(t, wfv1.NodeSucceeded, wf.Status.Nodes["3"].Phase) - //assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["4"].Phase) } } }) @@ -1055,113 +1052,45 @@ var retryWorkflowWithFailedNodeHasChildrenNodes = ` apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: - annotations: - workflows.argoproj.io/pod-name-format: v1 - creationTimestamp: "2022-08-04T08:35:10Z" - generateName: test-pipeline- - generation: 10 labels: workflows.argoproj.io/completed: "true" workflows.argoproj.io/phase: Failed - workflows.argoproj.io/resubmitted-from-workflow: test-pipeline-t5h77 - managedFields: - - apiVersion: argoproj.io/v1alpha1 - fieldsType: FieldsV1 - fieldsV1: - f:metadata: - f:annotations: - .: {} - f:workflows.argoproj.io/pod-name-format: {} - f:generateName: {} - f:labels: - .: {} - f:workflows.argoproj.io/resubmitted-from-workflow: {} - f:spec: {} - manager: argo - operation: Update - time: "2022-08-04T08:35:10Z" - - apiVersion: argoproj.io/v1alpha1 - fieldsType: FieldsV1 - fieldsV1: - f:metadata: - f:labels: - f:workflows.argoproj.io/completed: {} - f:workflows.argoproj.io/phase: {} - f:status: {} - manager: workflow-controller - operation: Update - time: "2022-08-04T08:36:31Z" name: test-pipeline-bnzwv namespace: argo-system - resourceVersion: "2588589" - uid: 1ce08a8a-0d95-46a3-b7d8-b6e04a0e6136 spec: - arguments: {} entrypoint: test-pipeline-dag templates: - dag: tasks: - - arguments: {} - name: t1 + - name: t1 template: succeeded - - arguments: {} - depends: t1 + - depends: t1 name: t2 template: failed - - arguments: {} - depends: t2 || t2.Failed + - depends: t2 || t2.Failed name: t3 template: succeeded - - arguments: {} - depends: t3 + - depends: t3 name: t4-1 template: succeeded - - arguments: {} - depends: t3 + - depends: t3 name: t4-2 template: succeeded - - arguments: {} - depends: t3 + - depends: t3 name: t4-3 template: failed - inputs: {} - metadata: {} name: test-pipeline-dag - outputs: {} - container: command: - "true" image: alpine - name: "" - resources: {} - inputs: {} - metadata: {} name: succeeded - outputs: {} - container: command: - "false" image: alpine - name: "" - resources: {} - inputs: {} - metadata: {} name: failed - outputs: {} status: - artifactRepositoryRef: - artifactRepository: - s3: - accessKeySecret: - key: accessKey - name: minio-secret - bucket: argo-artifact - endpoint: minio-service.middleware-system.svc:9000 - insecure: true - secretKeySecret: - key: secretKey - name: minio-secret - default: true conditions: - status: "False" type: PodRunning @@ -1182,9 +1111,6 @@ status: - test-pipeline-bnzwv-782035594 phase: Failed progress: 4/6 - resourcesDuration: - cpu: 32 - memory: 32 startedAt: "2022-08-04T08:35:10Z" templateName: test-pipeline-dag templateScope: local/test-pipeline-bnzwv @@ -1200,9 +1126,6 @@ status: exitCode: "0" phase: Succeeded progress: 1/1 - resourcesDuration: - cpu: 5 - memory: 5 startedAt: "2022-08-04T08:36:11Z" templateName: succeeded templateScope: local/test-pipeline-bnzwv @@ -1219,9 +1142,6 @@ status: exitCode: "1" phase: Failed progress: 0/1 - resourcesDuration: - cpu: 4 - memory: 4 startedAt: "2022-08-04T08:36:11Z" templateName: failed templateScope: local/test-pipeline-bnzwv @@ -1237,9 +1157,6 @@ status: exitCode: "0" phase: Succeeded progress: 1/1 - resourcesDuration: - cpu: 8 - memory: 8 startedAt: "2022-08-04T08:36:11Z" templateName: succeeded templateScope: local/test-pipeline-bnzwv @@ -1258,9 +1175,6 @@ status: exitCode: "1" phase: Failed progress: 0/1 - resourcesDuration: - cpu: 5 - memory: 5 startedAt: "2022-08-04T08:35:30Z" templateName: failed templateScope: local/test-pipeline-bnzwv @@ -1280,9 +1194,6 @@ status: exitCode: "0" phase: Succeeded progress: 1/1 - resourcesDuration: - cpu: 5 - memory: 5 startedAt: "2022-08-04T08:35:50Z" templateName: succeeded templateScope: local/test-pipeline-bnzwv @@ -1300,9 +1211,6 @@ status: exitCode: "0" phase: Succeeded progress: 1/1 - resourcesDuration: - cpu: 5 - memory: 5 startedAt: "2022-08-04T08:35:10Z" templateName: succeeded templateScope: local/test-pipeline-bnzwv From 691ffe32e2e9fdcaba4de60707c2c909736d5ad8 Mon Sep 17 00:00:00 2001 From: smile-luobin Date: Thu, 11 Aug 2022 16:58:47 +0800 Subject: [PATCH 5/5] fix bugs, and add more checks in test Signed-off-by: smile-luobin --- workflow/util/util.go | 2 +- workflow/util/util_test.go | 25 ++++++++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/workflow/util/util.go b/workflow/util/util.go index df082588dfd6..6a44d3d4d1bd 100644 --- a/workflow/util/util.go +++ b/workflow/util/util.go @@ -873,7 +873,7 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce deletedNodes[descendantNodeID] = true descendantNode := wf.Status.Nodes[descendantNodeID] if descendantNode.Type == wfv1.NodeTypePod { - podsToDelete = deletePodNodeDuringRetryWorkflow(wf, node, deletedPods, podsToDelete) + podsToDelete = deletePodNodeDuringRetryWorkflow(wf, descendantNode, deletedPods, podsToDelete) } } } else if node.Name == wf.ObjectMeta.Name { diff --git a/workflow/util/util_test.go b/workflow/util/util_test.go index 2eec8ecb2767..c55f920f53dc 100644 --- a/workflow/util/util_test.go +++ b/workflow/util/util_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "golang.org/x/exp/slices" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -1226,12 +1227,26 @@ status: func TestRetryWorkflowWithFailedNodeHasChildrenNodes(t *testing.T) { ctx := context.Background() wf := wfv1.MustUnmarshalWorkflow(retryWorkflowWithFailedNodeHasChildrenNodes) + version := GetWorkflowPodNameVersion(wf) + needDeletedNodeNames := []string{"t2", "t3", "t4-1", "t4-2", "t4-3"} + needDeletedPodNames := make([]string, 5) + for i, nodeName := range needDeletedNodeNames { + node := wf.Status.Nodes.FindByDisplayName(nodeName) + templateName := getTemplateFromNode(*node) + podName := PodName(wf.Name, node.Name, templateName, node.ID, version) + needDeletedPodNames[i] = podName + } + slices.Sort(needDeletedPodNames) - wf, _, err := FormulateRetryWorkflow(ctx, wf, false, "", nil) + wf, podsToDelete, err := FormulateRetryWorkflow(ctx, wf, false, "", nil) assert.NoError(t, err) assert.Equal(t, 2, len(wf.Status.Nodes)) - t2Node := wf.Status.Nodes.FindByDisplayName("t2") - assert.Nil(t, t2Node) - t3Node := wf.Status.Nodes.FindByDisplayName("t3") - assert.Nil(t, t3Node) + for _, nodeName := range needDeletedNodeNames { + node := wf.Status.Nodes.FindByDisplayName(nodeName) + assert.Nil(t, node) + } + + assert.Equal(t, 5, len(podsToDelete)) + slices.Sort(podsToDelete) + assert.Equal(t, needDeletedPodNames, podsToDelete) }