-
Notifications
You must be signed in to change notification settings - Fork 3.2k
/
dag.go
897 lines (808 loc) · 32.3 KB
/
dag.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
package controller
import (
"context"
"encoding/json"
"fmt"
"sort"
"strings"
"time"
log "github.com/sirupsen/logrus"
"github.com/argoproj/argo-workflows/v3/errors"
wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
"github.com/argoproj/argo-workflows/v3/util/expr/argoexpr"
"github.com/argoproj/argo-workflows/v3/util/template"
"github.com/argoproj/argo-workflows/v3/workflow/common"
controllercache "github.com/argoproj/argo-workflows/v3/workflow/controller/cache"
"github.com/argoproj/argo-workflows/v3/workflow/templateresolution"
)
// dagContext holds context information about this context's DAG
type dagContext struct {
// boundaryName is the node name of the boundary node to this DAG.
// This is used to incorporate into each of the task's node names.
boundaryName string
boundaryID string
// tasks are all the tasks in the template
tasks []wfv1.DAGTask
// visited keeps track of tasks we have already visited during an invocation of executeDAG
// in order to avoid duplicating work
visited map[string]bool
// tmpl is the template spec. it is needed to resolve hard-wired artifacts
tmpl *wfv1.Template
// wf is stored to formulate nodeIDs
wf *wfv1.Workflow
// tmplCtx is the context of template search.
tmplCtx *templateresolution.Context
// onExitTemplate is a flag denoting this template as part of an onExit handler. This is necessary to ensure that
// further nodes stemming from this template are allowed to run when using "ShutdownStrategy: Stop"
onExitTemplate bool
// dependencies is a list of all the tasks a specific task depends on. Because dependencies are computed using regex
// and regex is expensive, we cache the results so that they are only computed once per operation
dependencies map[string][]string
// dependsLogic is the resolved "depends" string of a particular task. A resolved "depends" simply contains
// task with their explicit results since we allow them to be omitted for convinience
// (i.e., "A || (B.Succeeded || B.Failed)" -> "(A.Succeeded || A.Skipped || A.Daemoned) || (B.Succeeded || B.Failed)").
// Because this resolved "depends" is computed using regex and regex is expensive, we cache the results so that they
// are only computed once per operation
dependsLogic map[string]string
}
func (d *dagContext) GetTaskDependencies(taskName string) []string {
if dependencies, ok := d.dependencies[taskName]; ok {
return dependencies
}
d.resolveDependencies(taskName)
return d.dependencies[taskName]
}
func (d *dagContext) GetTaskFinishedAtTime(taskName string) time.Time {
node := d.getTaskNode(taskName)
if node == nil {
return time.Time{}
}
if !node.FinishedAt.IsZero() {
return node.FinishedAt.Time
}
return node.StartedAt.Time
}
func (d *dagContext) GetTask(taskName string) *wfv1.DAGTask {
for _, task := range d.tasks {
if task.Name == taskName {
return &task
}
}
panic("target " + taskName + " does not exist")
}
func (d *dagContext) GetTaskDependsLogic(taskName string) string {
if logic, ok := d.dependsLogic[taskName]; ok {
return logic
}
d.resolveDependencies(taskName)
return d.dependsLogic[taskName]
}
func (d *dagContext) resolveDependencies(taskName string) {
dependencies, resolvedDependsLogic := common.GetTaskDependencies(d.GetTask(taskName), d)
var dependencyTasks []string
for dep := range dependencies {
dependencyTasks = append(dependencyTasks, dep)
}
d.dependencies[taskName] = dependencyTasks
d.dependsLogic[taskName] = resolvedDependsLogic
}
// taskNodeName formulates the nodeName for a dag task
func (d *dagContext) taskNodeName(taskName string) string {
return fmt.Sprintf("%s.%s", d.boundaryName, taskName)
}
// taskNodeID formulates the node ID for a dag task
func (d *dagContext) taskNodeID(taskName string) string {
nodeName := d.taskNodeName(taskName)
return d.wf.NodeID(nodeName)
}
// getTaskNode returns the node status of a task.
func (d *dagContext) getTaskNode(taskName string) *wfv1.NodeStatus {
nodeID := d.taskNodeID(taskName)
node, err := d.wf.Status.Nodes.Get(nodeID)
if err != nil {
log.Warnf("was unable to obtain the node for %s, taskName %s", nodeID, taskName)
return nil
}
return node
}
// assessDAGPhase assesses the overall DAG status
func (d *dagContext) assessDAGPhase(targetTasks []string, nodes wfv1.Nodes, isShutdown bool) (wfv1.NodePhase, error) {
// We cannot only rely on the DAG traversal. Conditionals, self-references,
// and ContinuesOn (every one of those features in unison) make this an undecidable problem.
// However, we can just use isShutdown to automatically fail the DAG.
if isShutdown {
return wfv1.NodeFailed, nil
}
// targetTaskPhases keeps track of all the phases of the target tasks. This is necessary because some target tasks may
// be omitted and will not have an explicit phase. We would still like to deduce a phase for those tasks in order to
// determine the overall phase of the DAG. To do so, an omitted task always inherits the phase of its parents, with
// preference of Failed or Error phases over Succeeded. This means that if a task in a branch fails, all of its descendents
// will be considered Failed unless they themselves complete with a different phase, in which case that different phase
// will take precedence as the branch phase for their descendents.
targetTaskPhases := make(map[string]wfv1.NodePhase)
for _, task := range targetTasks {
targetTaskPhases[d.taskNodeID(task)] = ""
}
boundaryNode, err := nodes.Get(d.boundaryID)
if err != nil {
return "", err
}
// BFS over the children of the DAG
uniqueQueue := newUniquePhaseNodeQueue(generatePhaseNodes(boundaryNode.Children, wfv1.NodeSucceeded)...)
for !uniqueQueue.empty() {
curr := uniqueQueue.pop()
node, err := nodes.Get(curr.nodeId)
if err != nil {
// this is okay, this means that
// we are still running
return wfv1.NodeRunning, nil
}
// We need to store the current branchPhase to remember the last completed phase in this branch so that we can apply it to omitted nodes
branchPhase := curr.phase
if !node.Fulfilled() {
return wfv1.NodeRunning, nil
}
// Only overwrite the branchPhase if this node completed. (If it didn't we can just inherit our parent's branchPhase).
if node.Completed() {
branchPhase = node.Phase
}
// This node is a target task, so it will not have any children. Store or deduce its phase
if previousPhase, isTargetTask := targetTaskPhases[node.ID]; isTargetTask {
// Since we want Failed or Errored phases to have preference over Succeeded in case of ambiguity, only update
// the deduced phase of the target task if it is not already Failed or Errored.
// Note that if the target task is NOT omitted (i.e. it Completed), then this check is moot, because every time
// we arrive at said target task it will have the same branchPhase.
if !previousPhase.FailedOrError() {
targetTaskPhases[node.ID] = branchPhase
}
}
if node.Type == wfv1.NodeTypeRetry {
uniqueQueue.add(generatePhaseNodes(getRetryNodeChildrenIds(node, nodes), branchPhase)...)
} else {
uniqueQueue.add(generatePhaseNodes(node.Children, branchPhase)...)
}
}
// We only succeed if all the target tasks have been considered (i.e. its nodes created) and there are no failures
failFast := d.tmpl.DAG.FailFast == nil || *d.tmpl.DAG.FailFast
result := wfv1.NodeSucceeded
for _, depName := range targetTasks {
branchPhase := targetTaskPhases[d.taskNodeID(depName)]
if branchPhase == "" {
result = wfv1.NodeRunning
// If failFast is disabled, we will want to let all tasks complete before checking for failures
if !failFast {
break
}
} else if branchPhase.FailedOrError() {
// If this target task has continueOn set for its current phase, then don't treat it as failed for the purposes
// of determining DAG status. This is so that target tasks with said continueOn do not fail the overall DAG.
// For non-leaf tasks, this is done by setting all of its dependents to allow for their failure or error in
// their "depends" clause during their respective "dependencies" to "depends" conversion. See "expandDependency"
// in ancestry.go
if task := d.GetTask(depName); task.ContinuesOn(branchPhase) {
continue
}
result = branchPhase
// If failFast is enabled, don't check to see if other target tasks are complete and fail now instead
if failFast {
break
}
}
}
return result, nil
}
func (woc *wfOperationCtx) executeDAG(ctx context.Context, nodeName string, tmplCtx *templateresolution.Context, templateScope string, tmpl *wfv1.Template, orgTmpl wfv1.TemplateReferenceHolder, opts *executeTemplateOpts) (*wfv1.NodeStatus, error) {
node, err := woc.wf.GetNodeByName(nodeName)
if err != nil {
node = woc.initializeExecutableNode(nodeName, wfv1.NodeTypeDAG, templateScope, tmpl, orgTmpl, opts.boundaryID, wfv1.NodeRunning, opts.nodeFlag)
}
defer func() {
node, err := woc.wf.Status.Nodes.Get(node.ID)
if err != nil {
// CRITICAL ERROR IF THIS BRANCH IS REACHED -> PANIC
panic(fmt.Sprintf("expected node for %s due to preceded initializeExecutableNode but couldn't find it", node.ID))
}
if node.Fulfilled() {
woc.killDaemonedChildren(node.ID)
}
}()
dagCtx := &dagContext{
boundaryName: nodeName,
boundaryID: node.ID,
tasks: tmpl.DAG.Tasks,
visited: make(map[string]bool),
tmpl: tmpl,
wf: woc.wf,
tmplCtx: tmplCtx,
onExitTemplate: opts.onExitTemplate,
dependencies: make(map[string][]string),
dependsLogic: make(map[string]string),
}
// Identify our target tasks. If user did not specify any, then we choose all tasks which have
// no dependants.
var targetTasks []string
if tmpl.DAG.Target == "" {
targetTasks = dagCtx.findLeafTaskNames(tmpl.DAG.Tasks)
} else {
targetTasks = strings.Split(tmpl.DAG.Target, " ")
}
// kick off execution of each target task asynchronously
for _, taskName := range targetTasks {
woc.executeDAGTask(ctx, dagCtx, taskName)
// It is possible that target tasks are not reconsidered (i.e. executeDAGTask is not called on them) once they are
// complete (since the DAG itself will have succeeded). To ensure that their exit handlers are run we also run them here. Note that
// calls to runOnExitNode are idempotent: it is fine if they are called more than once for the same task.
taskNode := dagCtx.getTaskNode(taskName)
if taskNode != nil {
task := dagCtx.GetTask(taskName)
scope, err := woc.buildLocalScopeFromTask(dagCtx, task)
if err != nil {
woc.markNodeError(node.Name, err)
return node, err
}
scope.addParamToScope(fmt.Sprintf("tasks.%s.status", task.Name), string(taskNode.Phase))
_, err = woc.executeTmplLifeCycleHook(ctx, scope, dagCtx.GetTask(taskName).Hooks, taskNode, dagCtx.boundaryID, dagCtx.tmplCtx, "tasks."+taskName)
if err != nil {
woc.markNodeError(node.Name, err)
return node, err
}
if taskNode.Fulfilled() {
if taskNode.Completed() {
// Run the node's onExit node, if any. Since this is a target task, we don't need to consider the status
// of the onExit node before continuing. That will be done in assesDAGPhase
_, _, err := woc.runOnExitNode(ctx, dagCtx.GetTask(taskName).GetExitHook(woc.execWf.Spec.Arguments), taskNode, dagCtx.boundaryID, dagCtx.tmplCtx, "tasks."+taskName, scope)
if err != nil {
return node, err
}
}
}
}
}
// check if we are still running any tasks in this dag and return early if we do
dagPhase, err := dagCtx.assessDAGPhase(targetTasks, woc.wf.Status.Nodes, woc.GetShutdownStrategy().Enabled())
if err != nil {
return nil, err
}
switch dagPhase {
case wfv1.NodeRunning:
return node, nil
case wfv1.NodeError, wfv1.NodeFailed:
err = woc.updateOutboundNodesForTargetTasks(dagCtx, targetTasks, nodeName)
if err != nil {
return nil, err
}
_ = woc.markNodePhase(nodeName, dagPhase)
return node, nil
}
// set outputs from tasks in order for DAG templates to support outputs
scope := createScope(tmpl)
for _, task := range tmpl.DAG.Tasks {
taskNode := dagCtx.getTaskNode(task.Name)
if taskNode == nil {
// Can happen when dag.target was specified
continue
}
prefix := fmt.Sprintf("tasks.%s", task.Name)
if taskNode.Type == wfv1.NodeTypeTaskGroup {
childNodes := make([]wfv1.NodeStatus, len(taskNode.Children))
for i, childID := range taskNode.Children {
childNode, err := woc.wf.Status.Nodes.Get(childID)
if err != nil {
woc.log.Errorf("was unable to obtain node for %s", childID)
return nil, fmt.Errorf("Critical error, unable to find %s", childID)
}
childNodes[i] = *childNode
}
err := woc.processAggregateNodeOutputs(scope, prefix, childNodes)
if err != nil {
woc.log.Errorf("unable to processAggregateNodeOutputs")
return nil, errors.InternalWrapError(err)
}
}
woc.buildLocalScope(scope, prefix, taskNode)
woc.addOutputsToGlobalScope(taskNode.Outputs)
}
outputs, err := getTemplateOutputsFromScope(tmpl, scope)
if err != nil {
woc.log.Errorf("unable to get outputs")
return node, err
}
if outputs != nil {
node.Outputs = outputs
woc.wf.Status.Nodes.Set(node.ID, *node)
}
if node.MemoizationStatus != nil {
c := woc.controller.cacheFactory.GetCache(controllercache.ConfigMapCache, node.MemoizationStatus.CacheName)
err := c.Save(ctx, node.MemoizationStatus.Key, node.ID, node.Outputs)
if err != nil {
woc.log.WithFields(log.Fields{"nodeID": node.ID}).WithError(err).Error("Failed to save node outputs to cache")
node.Phase = wfv1.NodeError
}
}
err = woc.updateOutboundNodesForTargetTasks(dagCtx, targetTasks, nodeName)
if err != nil {
return nil, err
}
return woc.markNodePhase(nodeName, wfv1.NodeSucceeded), nil
}
func (woc *wfOperationCtx) updateOutboundNodesForTargetTasks(dagCtx *dagContext, targetTasks []string, nodeName string) error {
// set the outbound nodes from the target tasks
outbound := make([]string, 0)
for _, depName := range targetTasks {
depNode := dagCtx.getTaskNode(depName)
if depNode == nil {
woc.log.Println(depName)
continue
}
outboundNodeIDs := woc.getOutboundNodes(depNode.ID)
outbound = append(outbound, outboundNodeIDs...)
}
node, err := woc.wf.GetNodeByName(nodeName)
if err != nil {
woc.log.Warnf("was unable to obtain node by name for %s", nodeName)
return err
}
node.OutboundNodes = outbound
woc.wf.Status.Nodes.Set(node.ID, *node)
woc.log.Infof("Outbound nodes of %s set to %s", node.ID, outbound)
return nil
}
// executeDAGTask traverses and executes the upward chain of dependencies of a task
func (woc *wfOperationCtx) executeDAGTask(ctx context.Context, dagCtx *dagContext, taskName string) {
if _, ok := dagCtx.visited[taskName]; ok {
return
}
dagCtx.visited[taskName] = true
node := dagCtx.getTaskNode(taskName)
task := dagCtx.GetTask(taskName)
log := woc.log.WithField("taskName", taskName)
if node != nil && (node.Fulfilled() || node.Phase == wfv1.NodeRunning) {
scope, err := woc.buildLocalScopeFromTask(dagCtx, task)
if err != nil {
log.Error("Failed to build local scope from task")
woc.markNodeError(node.Name, err)
return
}
scope.addParamToScope(fmt.Sprintf("tasks.%s.status", task.Name), string(node.Phase))
hookCompleted, err := woc.executeTmplLifeCycleHook(ctx, scope, dagCtx.GetTask(taskName).Hooks, node, dagCtx.boundaryID, dagCtx.tmplCtx, "tasks."+taskName)
if err != nil {
woc.markNodeError(node.Name, err)
}
// Check all hooks are completes
if !hookCompleted {
return
}
}
if node != nil && node.Fulfilled() {
// Collect the completed task metrics
_, tmpl, _, _ := dagCtx.tmplCtx.ResolveTemplate(task)
if tmpl != nil && tmpl.Metrics != nil {
if prevNodeStatus, ok := woc.preExecutionNodePhases[node.ID]; ok && !prevNodeStatus.Fulfilled() {
localScope, realTimeScope := woc.prepareMetricScope(node)
woc.computeMetrics(tmpl.Metrics.Prometheus, localScope, realTimeScope, false)
}
}
processedTmpl, err := common.ProcessArgs(tmpl, &task.Arguments, woc.globalParams, map[string]string{}, true, woc.wf.Namespace, woc.controller.configMapInformer.GetIndexer())
if err != nil {
woc.markNodeError(node.Name, err)
}
// Release acquired lock completed task.
if tmpl != nil {
woc.controller.syncManager.Release(woc.wf, node.ID, processedTmpl.Synchronization)
}
scope, err := woc.buildLocalScopeFromTask(dagCtx, task)
if err != nil {
woc.markNodeError(node.Name, err)
log.Error("Failed to build local scope from task")
return
}
scope.addParamToScope(fmt.Sprintf("tasks.%s.status", task.Name), string(node.Phase))
if node.Completed() {
// Run the node's onExit node, if any.
hasOnExitNode, onExitNode, err := woc.runOnExitNode(ctx, task.GetExitHook(woc.execWf.Spec.Arguments), node, dagCtx.boundaryID, dagCtx.tmplCtx, "tasks."+taskName, scope)
if hasOnExitNode && (onExitNode == nil || !onExitNode.Fulfilled() || err != nil) {
// The onExit node is either not complete or has errored out, return.
return
}
}
return
}
// The template scope of this dag.
dagTemplateScope := dagCtx.tmplCtx.GetTemplateScope()
// Check if our dependencies completed. If not, recurse our parents executing them if necessary
nodeName := dagCtx.taskNodeName(taskName)
taskDependencies := dagCtx.GetTaskDependencies(taskName)
// error condition taken care of via a nil check
taskGroupNode, _ := woc.wf.GetNodeByName(nodeName)
if taskGroupNode != nil && taskGroupNode.Type != wfv1.NodeTypeTaskGroup {
taskGroupNode = nil
}
// connectDependencies is a helper to connect our dependencies to current task as children
connectDependencies := func(taskNodeName string) {
if len(taskDependencies) == 0 || taskGroupNode != nil {
// if we had no dependencies, then we are a root task, and we should connect the
// boundary node as our parent
if taskGroupNode == nil {
woc.addChildNode(dagCtx.boundaryName, taskNodeName)
} else {
woc.addChildNode(taskGroupNode.Name, taskNodeName)
}
} else {
// Otherwise, add all outbound nodes of our dependencies as parents to this node
for _, depName := range taskDependencies {
depNode := dagCtx.getTaskNode(depName)
outboundNodeIDs := woc.getOutboundNodes(depNode.ID)
for _, outNodeID := range outboundNodeIDs {
nodeName, err := woc.wf.Status.Nodes.GetName(outNodeID)
if err != nil {
woc.log.Errorf("was unable to obtain node for %s", outNodeID)
return
}
woc.addChildNode(nodeName, taskNodeName)
}
}
}
}
if dagCtx.GetTaskDependsLogic(taskName) != "" {
// Recurse into all of this node's dependencies
for _, dep := range taskDependencies {
woc.executeDAGTask(ctx, dagCtx, dep)
}
execute, proceed, err := dagCtx.evaluateDependsLogic(taskName)
if err != nil {
woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, dagTemplateScope, task, dagCtx.boundaryID, wfv1.NodeError, &wfv1.NodeFlag{}, err.Error())
connectDependencies(nodeName)
return
}
if !proceed {
// This node's dependencies are not completed yet, return
return
}
if !execute {
// Given the results of this node's dependencies, this node should not be executed. Mark it omitted
woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, dagTemplateScope, task, dagCtx.boundaryID, wfv1.NodeOmitted, &wfv1.NodeFlag{}, "omitted: depends condition not met")
connectDependencies(nodeName)
return
}
}
// All our dependencies were satisfied and successful. It's our turn to run
// First resolve/substitute params/artifacts from our dependencies
newTask, err := woc.resolveDependencyReferences(dagCtx, task)
if err != nil {
woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, dagTemplateScope, task, dagCtx.boundaryID, wfv1.NodeError, &wfv1.NodeFlag{}, err.Error())
connectDependencies(nodeName)
return
}
// Next, expand the DAG's withItems/withParams/withSequence (if any). If there was none, then
// expandedTasks will be a single element list of the same task
expandedTasks, err := expandTask(*newTask)
if err != nil {
woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, dagTemplateScope, task, dagCtx.boundaryID, wfv1.NodeError, &wfv1.NodeFlag{}, err.Error())
connectDependencies(nodeName)
return
}
// If DAG task has withParam of with withSequence then we need to create virtual node of type TaskGroup.
// For example, if we had task A with withItems of ['foo', 'bar'] which expanded to ['A(0:foo)', 'A(1:bar)'], we still
// need to create a node for A.
if task.ShouldExpand() {
// DAG task with empty withParams list should be skipped
if len(expandedTasks) == 0 {
skipReason := "Skipped, empty params"
woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, dagTemplateScope, task, dagCtx.boundaryID, wfv1.NodeSkipped, &wfv1.NodeFlag{}, skipReason)
connectDependencies(nodeName)
} else if taskGroupNode == nil {
connectDependencies(nodeName)
taskGroupNode = woc.initializeNode(nodeName, wfv1.NodeTypeTaskGroup, dagTemplateScope, task, dagCtx.boundaryID, wfv1.NodeRunning, &wfv1.NodeFlag{}, "")
}
}
for _, t := range expandedTasks {
taskNodeName := dagCtx.taskNodeName(t.Name)
node = dagCtx.getTaskNode(t.Name)
if node == nil {
woc.log.Infof("All of node %s dependencies %v completed", taskNodeName, taskDependencies)
// Add the child relationship from our dependency's outbound nodes to this node.
connectDependencies(taskNodeName)
// Check the task's when clause to decide if it should execute
proceed, err := shouldExecute(t.When)
if err != nil {
woc.initializeNode(taskNodeName, wfv1.NodeTypeSkipped, dagTemplateScope, task, dagCtx.boundaryID, wfv1.NodeError, &wfv1.NodeFlag{}, err.Error())
continue
}
if !proceed {
skipReason := fmt.Sprintf("when '%s' evaluated false", t.When)
woc.initializeNode(taskNodeName, wfv1.NodeTypeSkipped, dagTemplateScope, task, dagCtx.boundaryID, wfv1.NodeSkipped, &wfv1.NodeFlag{}, skipReason)
continue
}
}
// Finally execute the template
node, err = woc.executeTemplate(ctx, taskNodeName, &t, dagCtx.tmplCtx, t.Arguments, &executeTemplateOpts{boundaryID: dagCtx.boundaryID, onExitTemplate: dagCtx.onExitTemplate})
if err != nil {
switch err {
case ErrDeadlineExceeded:
return
case ErrParallelismReached:
case ErrMaxDepthExceeded:
case ErrTimeout:
_ = woc.markNodePhase(taskNodeName, wfv1.NodeFailed, err.Error())
return
default:
_ = woc.markNodeError(taskNodeName, fmt.Errorf("task '%s' errored: %v", taskNodeName, err))
return
}
}
// Some scenario, Node will be nil e.g: when parallelism reached.
if node == nil {
return
}
if node.Completed() {
scope, err := woc.buildLocalScopeFromTask(dagCtx, task)
if err != nil {
woc.markNodeError(node.Name, err)
}
scope.addParamToScope(fmt.Sprintf("tasks.%s.status", task.Name), string(node.Phase))
// if the node type is NodeTypeRetry, and its last child is completed, it will be completed after woc.executeTemplate;
hasOnExitNode, onExitNode, err := woc.runOnExitNode(ctx, task.GetExitHook(woc.execWf.Spec.Arguments), node, dagCtx.boundaryID, dagCtx.tmplCtx, "tasks."+taskName, scope)
if hasOnExitNode && (onExitNode == nil || !onExitNode.Fulfilled() || err != nil) {
// The onExit node is either not complete or has errored out, return.
return
}
}
}
if taskGroupNode != nil {
groupPhase := wfv1.NodeSucceeded
for _, t := range expandedTasks {
// Add the child relationship from our dependency's outbound nodes to this node.
node := dagCtx.getTaskNode(t.Name)
if node == nil || !node.Fulfilled() {
return
}
if node.FailedOrError() {
groupPhase = node.Phase
}
}
woc.markNodePhase(taskGroupNode.Name, groupPhase)
}
}
func (woc *wfOperationCtx) buildLocalScopeFromTask(dagCtx *dagContext, task *wfv1.DAGTask) (*wfScope, error) {
// build up the scope
scope := createScope(dagCtx.tmpl)
woc.addOutputsToLocalScope("workflow", woc.wf.Status.Outputs, scope)
ancestors := common.GetTaskAncestry(dagCtx, task.Name)
for _, ancestor := range ancestors {
ancestorNode := dagCtx.getTaskNode(ancestor)
if ancestorNode == nil {
return nil, errors.InternalErrorf("Ancestor task node %s not found", ancestor)
}
prefix := fmt.Sprintf("tasks.%s", ancestor)
if ancestorNode.Type == wfv1.NodeTypeTaskGroup {
var ancestorNodes []wfv1.NodeStatus
for _, node := range woc.wf.Status.Nodes {
if node.BoundaryID == dagCtx.boundaryID && strings.HasPrefix(node.Name, ancestorNode.Name+"(") {
ancestorNodes = append(ancestorNodes, node)
}
}
_, _, templateStored, err := dagCtx.tmplCtx.ResolveTemplate(ancestorNode)
if err != nil {
return nil, errors.InternalWrapError(err)
}
// A new template was stored during resolution, persist it
if templateStored {
woc.updated = true
}
err = woc.processAggregateNodeOutputs(scope, prefix, ancestorNodes)
if err != nil {
return nil, errors.InternalWrapError(err)
}
} else {
woc.buildLocalScope(scope, prefix, ancestorNode)
}
}
return scope, nil
}
// resolveDependencyReferences replaces any references to outputs of task dependencies, or artifacts in the inputs
// NOTE: by now, input parameters should have been substituted throughout the template
func (woc *wfOperationCtx) resolveDependencyReferences(dagCtx *dagContext, task *wfv1.DAGTask) (*wfv1.DAGTask, error) {
scope, err := woc.buildLocalScopeFromTask(dagCtx, task)
if err != nil {
return nil, err
}
// Perform replacement
// Replace woc.volumes
err = woc.substituteParamsInVolumes(scope.getParameters())
if err != nil {
return nil, err
}
// Replace task's parameters
taskBytes, err := json.Marshal(task)
if err != nil {
return nil, errors.InternalWrapError(err)
}
newTaskStr, err := template.Replace(string(taskBytes), woc.globalParams.Merge(scope.getParameters()), true)
if err != nil {
return nil, err
}
var newTask wfv1.DAGTask
err = json.Unmarshal([]byte(newTaskStr), &newTask)
if err != nil {
return nil, errors.InternalWrapError(err)
}
// If we are not executing, don't attempt to resolve any artifact references. We only check if we are executing after
// the initial parameter resolution, since it's likely that the "when" clause will contain parameter references.
proceed, err := shouldExecute(newTask.When)
if err != nil {
// If we got an error, it might be because our "when" clause contains a task-expansion parameter (e.g. {{item}}).
// Since we don't perform task-expansion until later and task-expansion parameters won't get resolved here,
// we continue execution as normal
if newTask.ShouldExpand() {
proceed = true
} else {
return nil, err
}
}
if !proceed {
// We can simply return here; the fact that this task won't execute will be reconciled later on in execution
return &newTask, nil
}
// replace all artifact references
for j, art := range newTask.Arguments.Artifacts {
if art.From == "" {
continue
}
resolvedArt, err := scope.resolveArtifact(&art)
if err != nil {
if strings.Contains(err.Error(), "Unable to resolve") && art.Optional {
woc.log.Warnf("Optional artifact '%s' was not found; it won't be available as an input", art.Name)
continue
}
return nil, err
}
resolvedArt.Name = art.Name
newTask.Arguments.Artifacts[j] = *resolvedArt
}
return &newTask, nil
}
// findLeafTaskNames finds the names of all tasks whom no other nodes depend on.
// This list of tasks is used as the default list of targets when dag.targets is omitted.
func (d *dagContext) findLeafTaskNames(tasks []wfv1.DAGTask) []string {
taskIsLeaf := make(map[string]bool)
for _, task := range tasks {
if _, ok := taskIsLeaf[task.Name]; !ok {
taskIsLeaf[task.Name] = true
}
for _, dependency := range d.GetTaskDependencies(task.Name) {
taskIsLeaf[dependency] = false
}
}
leafTaskNames := make([]string, 0)
for taskName, isLeaf := range taskIsLeaf {
if isLeaf {
leafTaskNames = append(leafTaskNames, taskName)
}
}
sort.Strings(leafTaskNames) // execute tasks in a predictable order
return leafTaskNames
}
// expandTask expands a single DAG task containing withItems, withParams, withSequence into multiple parallel tasks
// We want to be lazy with expanding. Unfortunately this is not quite possible as the When field might rely on
// expansion to work with the shouldExecute function. To address this we apply a trick, we try to expand, if we fail, we then
// check shouldExecute, if shouldExecute returns false, we continue on as normal else error out
func expandTask(task wfv1.DAGTask) ([]wfv1.DAGTask, error) {
var err error
var items []wfv1.Item
if len(task.WithItems) > 0 {
items = task.WithItems
} else if task.WithParam != "" {
err = json.Unmarshal([]byte(task.WithParam), &items)
if err != nil {
mustExec, mustExecErr := shouldExecute(task.When)
if mustExecErr != nil || mustExec {
return nil, errors.Errorf(errors.CodeBadRequest, "withParam value could not be parsed as a JSON list: %s: %v", strings.TrimSpace(task.WithParam), err)
}
}
} else if task.WithSequence != nil {
items, err = expandSequence(task.WithSequence)
if err != nil {
mustExec, mustExecErr := shouldExecute(task.When)
if mustExecErr != nil || mustExec {
return nil, err
}
}
} else {
return []wfv1.DAGTask{task}, nil
}
taskBytes, err := json.Marshal(task)
if err != nil {
return nil, errors.InternalWrapError(err)
}
// these fields can be very large (>100m) and marshalling 10k x 100m = 6GB of memory used and
// very poor performance, so we just nil them out
task.WithItems = nil
task.WithParam = ""
task.WithSequence = nil
tmpl, err := template.NewTemplate(string(taskBytes))
if err != nil {
return nil, fmt.Errorf("unable to parse argo variable: %w", err)
}
expandedTasks := make([]wfv1.DAGTask, 0)
for i, item := range items {
var newTask wfv1.DAGTask
newTaskName, err := processItem(tmpl, task.Name, i, item, &newTask, task.When)
if err != nil {
return nil, err
}
newTask.Name = newTaskName
newTask.Template = task.Template
expandedTasks = append(expandedTasks, newTask)
}
return expandedTasks, nil
}
type TaskResults struct {
Succeeded bool `json:"Succeeded"`
Failed bool `json:"Failed"`
Errored bool `json:"Errored"`
Skipped bool `json:"Skipped"`
Omitted bool `json:"Omitted"`
Daemoned bool `json:"Daemoned"`
AnySucceeded bool `json:"AnySucceeded"`
AllFailed bool `json:"AllFailed"`
}
// evaluateDependsLogic returns whether a node should execute and proceed. proceed means that all of its dependencies are
// completed and execute means that given the results of its dependencies, this node should execute.
func (d *dagContext) evaluateDependsLogic(taskName string) (bool, bool, error) {
node := d.getTaskNode(taskName)
if node != nil {
return true, true, nil
}
evalScope := make(map[string]TaskResults)
for _, taskName := range d.GetTaskDependencies(taskName) {
// If the task is still running, we should not proceed.
depNode := d.getTaskNode(taskName)
if depNode == nil || !depNode.Fulfilled() {
return false, false, nil
}
// If a task happens to have an onExit node, don't proceed until the onExit node is fulfilled
if onExitNode, err := d.wf.GetNodeByName(common.GenerateOnExitNodeName(depNode.Name)); onExitNode != nil {
if err != nil {
return false, false, err
}
if !onExitNode.Fulfilled() {
return false, false, nil
}
}
evalTaskName := strings.Replace(taskName, "-", "_", -1)
if _, ok := evalScope[evalTaskName]; ok {
continue
}
anySucceeded := false
allFailed := false
if depNode.Type == wfv1.NodeTypeTaskGroup {
allFailed = len(depNode.Children) > 0
for _, childNodeID := range depNode.Children {
childNodePhase, err := d.wf.Status.Nodes.GetPhase(childNodeID)
if err != nil {
log.Warnf("was unable to obtain node for %s", childNodeID)
allFailed = false // we don't know if all failed
continue
}
anySucceeded = anySucceeded || *childNodePhase == wfv1.NodeSucceeded
allFailed = allFailed && *childNodePhase == wfv1.NodeFailed
}
}
evalScope[evalTaskName] = TaskResults{
Succeeded: depNode.Phase == wfv1.NodeSucceeded,
Failed: depNode.Phase == wfv1.NodeFailed,
Errored: depNode.Phase == wfv1.NodeError,
Skipped: depNode.Phase == wfv1.NodeSkipped,
Omitted: depNode.Phase == wfv1.NodeOmitted,
Daemoned: depNode.IsDaemoned() && depNode.Phase != wfv1.NodePending,
AnySucceeded: anySucceeded,
AllFailed: allFailed,
}
}
evalLogic := strings.Replace(d.GetTaskDependsLogic(taskName), "-", "_", -1)
execute, err := argoexpr.EvalBool(evalLogic, evalScope)
if err != nil {
return false, false, fmt.Errorf("unable to evaluate expression '%s': %s", evalLogic, err)
}
return execute, true, nil
}