-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
pipeline_spec_builder.py
2097 lines (1807 loc) · 93.7 KB
/
pipeline_spec_builder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2021-2022 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Functions for creating PipelineSpec proto objects."""
import copy
import json
import typing
from typing import (Any, DefaultDict, Dict, List, Mapping, Optional, Tuple,
Union)
import warnings
from google.protobuf import json_format
from google.protobuf import struct_pb2
import kfp
from kfp import dsl
from kfp.compiler import compiler_utils
from kfp.dsl import component_factory
from kfp.dsl import for_loop
from kfp.dsl import pipeline_channel
from kfp.dsl import pipeline_config
from kfp.dsl import pipeline_context
from kfp.dsl import pipeline_task
from kfp.dsl import placeholders
from kfp.dsl import structures
from kfp.dsl import tasks_group
from kfp.dsl import utils
from kfp.dsl.types import artifact_types
from kfp.dsl.types import type_utils
from kfp.pipeline_spec import pipeline_spec_pb2
import yaml
# must be defined here to avoid circular imports
group_type_to_dsl_class = {
tasks_group.TasksGroupType.PIPELINE: pipeline_context.Pipeline,
tasks_group.TasksGroupType.CONDITION: tasks_group.Condition,
tasks_group.TasksGroupType.FOR_LOOP: tasks_group.ParallelFor,
tasks_group.TasksGroupType.EXIT_HANDLER: tasks_group.ExitHandler,
}
def to_protobuf_value(value: type_utils.PARAMETER_TYPES) -> struct_pb2.Value:
"""Creates a google.protobuf.struct_pb2.Value message out of a provide
value.
Args:
value: The value to be converted to Value message.
Returns:
A google.protobuf.struct_pb2.Value message.
Raises:
ValueError if the given value is not one of the parameter types.
"""
# bool check must be above (int, float) check because bool is a subclass of int so isinstance(True, int) == True
if isinstance(value, bool):
return struct_pb2.Value(bool_value=value)
elif isinstance(value, str):
return struct_pb2.Value(string_value=value)
elif isinstance(value, (int, float)):
return struct_pb2.Value(number_value=value)
elif isinstance(value, dict):
return struct_pb2.Value(
struct_value=struct_pb2.Struct(
fields={k: to_protobuf_value(v) for k, v in value.items()}))
elif isinstance(value, list):
return struct_pb2.Value(
list_value=struct_pb2.ListValue(
values=[to_protobuf_value(v) for v in value]))
else:
raise ValueError('Value must be one of the following types: '
'str, int, float, bool, dict, and list. Got: '
f'"{value}" of type "{type(value)}".')
def build_task_spec_for_task(
task: pipeline_task.PipelineTask,
parent_component_inputs: pipeline_spec_pb2.ComponentInputsSpec,
tasks_in_current_dag: List[str],
) -> pipeline_spec_pb2.PipelineTaskSpec:
"""Builds PipelineTaskSpec for a pipeline task.
A task input may reference an output outside its immediate DAG.
For instance::
random_num = random_num_op(...)
with dsl.Condition(random_num.output > 5):
print_op('%s > 5' % random_num.output)
In this example, `dsl.Condition` forms a subDAG with one task from `print_op`
inside the subDAG. The task of `print_op` references output from `random_num`
task, which is outside the sub-DAG. When compiling to IR, such cross DAG
reference is disallowed. So we need to "punch a hole" in the sub-DAG to make
the input available in the subDAG component inputs if it's not already there,
Next, we can call this method to fix the tasks inside the subDAG to make them
reference the component inputs instead of directly referencing the original
producer task.
Args:
task: The task to build a PipelineTaskSpec for.
parent_component_inputs: The task's parent component's input specs.
tasks_in_current_dag: The list of tasks names for tasks in the same dag.
Returns:
A PipelineTaskSpec object representing the task.
"""
pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()
pipeline_task_spec.task_info.name = (
task._task_spec.display_name or task.name)
# Use task.name for component_ref.name because we may customize component
# spec for individual tasks to work around the lack of optional inputs
# support in IR.
pipeline_task_spec.component_ref.name = (
utils.sanitize_component_name(task.name))
pipeline_task_spec.caching_options.enable_cache = (
task._task_spec.enable_caching)
if task._task_spec.retry_policy is not None:
pipeline_task_spec.retry_policy.CopyFrom(
task._task_spec.retry_policy.to_proto())
# Inject resource fields into inputs
if task.container_spec and task.container_spec.resources:
for key, val in task.container_spec.resources.__dict__.items():
if val and pipeline_channel.extract_pipeline_channels_from_any(val):
task.inputs[key] = val
for input_name, input_value in task.inputs.items():
# Since LoopParameterArgument and LoopArtifactArgument and LoopArgumentVariable are narrower
# types than PipelineParameterChannel, start with them.
if isinstance(input_value, for_loop.LoopParameterArgument):
component_input_parameter = (
compiler_utils.additional_input_name_for_pipeline_channel(
input_value))
assert component_input_parameter in parent_component_inputs.parameters, \
f'component_input_parameter: {component_input_parameter} not found. All inputs: {parent_component_inputs}'
pipeline_task_spec.inputs.parameters[
input_name].component_input_parameter = (
component_input_parameter)
elif isinstance(input_value, for_loop.LoopArtifactArgument):
component_input_artifact = (
compiler_utils.additional_input_name_for_pipeline_channel(
input_value))
assert component_input_artifact in parent_component_inputs.artifacts, \
f'component_input_artifact: {component_input_artifact} not found. All inputs: {parent_component_inputs}'
pipeline_task_spec.inputs.artifacts[
input_name].component_input_artifact = (
component_input_artifact)
elif isinstance(input_value, for_loop.LoopArgumentVariable):
component_input_parameter = (
compiler_utils.additional_input_name_for_pipeline_channel(
input_value.loop_argument))
assert component_input_parameter in parent_component_inputs.parameters, \
f'component_input_parameter: {component_input_parameter} not found. All inputs: {parent_component_inputs}'
pipeline_task_spec.inputs.parameters[
input_name].component_input_parameter = (
component_input_parameter)
pipeline_task_spec.inputs.parameters[
input_name].parameter_expression_selector = (
f'parseJson(string_value)["{input_value.subvar_name}"]')
elif isinstance(input_value,
pipeline_channel.PipelineArtifactChannel) or (
isinstance(input_value, dsl.Collected) and
input_value.is_artifact_channel):
if input_value.task_name:
# Value is produced by an upstream task.
if input_value.task_name in tasks_in_current_dag:
# Dependent task within the same DAG.
pipeline_task_spec.inputs.artifacts[
input_name].task_output_artifact.producer_task = (
utils.sanitize_task_name(input_value.task_name))
pipeline_task_spec.inputs.artifacts[
input_name].task_output_artifact.output_artifact_key = (
input_value.name)
else:
# Dependent task not from the same DAG.
component_input_artifact = (
compiler_utils.
additional_input_name_for_pipeline_channel(input_value))
assert component_input_artifact in parent_component_inputs.artifacts, \
f'component_input_artifact: {component_input_artifact} not found. All inputs: {parent_component_inputs}'
pipeline_task_spec.inputs.artifacts[
input_name].component_input_artifact = (
component_input_artifact)
else:
component_input_artifact = input_value.full_name
if component_input_artifact not in parent_component_inputs.artifacts:
component_input_artifact = (
compiler_utils.
additional_input_name_for_pipeline_channel(input_value))
pipeline_task_spec.inputs.artifacts[
input_name].component_input_artifact = (
component_input_artifact)
elif isinstance(input_value,
pipeline_channel.PipelineParameterChannel) or (
isinstance(input_value, dsl.Collected) and
not input_value.is_artifact_channel):
if input_value.task_name:
# Value is produced by an upstream task.
if input_value.task_name in tasks_in_current_dag:
# Dependent task within the same DAG.
pipeline_task_spec.inputs.parameters[
input_name].task_output_parameter.producer_task = (
utils.sanitize_task_name(input_value.task_name))
pipeline_task_spec.inputs.parameters[
input_name].task_output_parameter.output_parameter_key = (
input_value.name)
else:
# Dependent task not from the same DAG.
component_input_parameter = (
compiler_utils.
additional_input_name_for_pipeline_channel(input_value))
assert component_input_parameter in parent_component_inputs.parameters, \
f'component_input_parameter: {component_input_parameter} not found. All inputs: {parent_component_inputs}'
pipeline_task_spec.inputs.parameters[
input_name].component_input_parameter = (
component_input_parameter)
else:
# Value is from pipeline input.
component_input_parameter = input_value.full_name
if component_input_parameter not in parent_component_inputs.parameters:
component_input_parameter = (
compiler_utils.
additional_input_name_for_pipeline_channel(input_value))
pipeline_task_spec.inputs.parameters[
input_name].component_input_parameter = (
component_input_parameter)
elif isinstance(input_value, (str, int, float, bool, dict, list)):
pipeline_channels = (
pipeline_channel.extract_pipeline_channels_from_any(input_value)
)
for channel in pipeline_channels:
# NOTE: case like this p3 = print_and_return_str(s='Project = {}'.format(project))
# triggers this code
# value contains PipelineChannel placeholders which needs to be
# replaced. And the input needs to be added to the task spec.
# Form the name for the compiler injected input, and make sure it
# doesn't collide with any existing input names.
additional_input_name = (
compiler_utils.additional_input_name_for_pipeline_channel(
channel))
# We don't expect collision to happen because we prefix the name
# of additional input with 'pipelinechannel--'. But just in case
# collision did happend, throw a RuntimeError so that we don't
# get surprise at runtime.
for existing_input_name, _ in task.inputs.items():
if existing_input_name == additional_input_name:
raise RuntimeError(
f'Name collision between existing input name {existing_input_name} and compiler injected input name {additional_input_name}'
)
additional_input_placeholder = placeholders.InputValuePlaceholder(
additional_input_name)._to_string()
if isinstance(input_value, str):
input_value = input_value.replace(
channel.pattern, additional_input_placeholder)
else:
input_value = compiler_utils.recursive_replace_placeholders(
input_value, channel.pattern,
additional_input_placeholder)
if channel.task_name:
# Value is produced by an upstream task.
if channel.task_name in tasks_in_current_dag:
# Dependent task within the same DAG.
pipeline_task_spec.inputs.parameters[
additional_input_name].task_output_parameter.producer_task = (
utils.sanitize_task_name(channel.task_name))
pipeline_task_spec.inputs.parameters[
additional_input_name].task_output_parameter.output_parameter_key = (
channel.name)
else:
# Dependent task not from the same DAG.
component_input_parameter = (
compiler_utils.
additional_input_name_for_pipeline_channel(channel))
assert component_input_parameter in parent_component_inputs.parameters, \
f'component_input_parameter: {component_input_parameter} not found. All inputs: {parent_component_inputs}'
pipeline_task_spec.inputs.parameters[
additional_input_name].component_input_parameter = (
component_input_parameter)
else:
# Value is from pipeline input. (or loop?)
component_input_parameter = channel.full_name
if component_input_parameter not in parent_component_inputs.parameters:
component_input_parameter = (
compiler_utils.
additional_input_name_for_pipeline_channel(channel))
pipeline_task_spec.inputs.parameters[
additional_input_name].component_input_parameter = (
component_input_parameter)
pipeline_task_spec.inputs.parameters[
input_name].runtime_value.constant.CopyFrom(
to_protobuf_value(input_value))
else:
raise ValueError(
'Input argument supports only the following types: '
'str, int, float, bool, dict, and list.'
f'Got {input_value} of type {type(input_value)}.')
return pipeline_task_spec
def build_component_spec_for_exit_task(
task: pipeline_task.PipelineTask,) -> pipeline_spec_pb2.ComponentSpec:
"""Builds ComponentSpec for an exit task.
Args:
task: The task to build a ComponentSpec for.
Returns:
A ComponentSpec object for the exit task.
"""
return build_component_spec_for_task(
task=task, is_compiled_component=False, is_exit_task=True)
def build_component_spec_for_task(
task: pipeline_task.PipelineTask,
is_compiled_component: bool,
is_exit_task: bool = False,
) -> pipeline_spec_pb2.ComponentSpec:
"""Builds ComponentSpec for a pipeline task.
Args:
task: The task to build a ComponentSpec for.
is_exit_task: Whether the task is used as exit task in Exit Handler.
Returns:
A ComponentSpec object for the task.
"""
for input_name, input_spec in (task.component_spec.inputs or {}).items():
if not is_exit_task and type_utils.is_task_final_status_type(
input_spec.type
) and not is_compiled_component and not task._ignore_upstream_failure_tag:
raise ValueError(
f'PipelineTaskFinalStatus can only be used in an exit task. Parameter {input_name} of a non exit task has type PipelineTaskFinalStatus.'
)
component_spec = _build_component_spec_from_component_spec_structure(
task.component_spec)
component_spec.executor_label = utils.sanitize_executor_label(task.name)
return component_spec
def _build_component_spec_from_component_spec_structure(
component_spec_struct: structures.ComponentSpec
) -> pipeline_spec_pb2.ComponentSpec:
"""Builds ComponentSpec proto from ComponentSpec structure."""
component_spec = pipeline_spec_pb2.ComponentSpec()
for input_name, input_spec in (component_spec_struct.inputs or {}).items():
# Special handling for PipelineTaskFinalStatus first.
if type_utils.is_task_final_status_type(input_spec.type):
component_spec.input_definitions.parameters[
input_name].parameter_type = pipeline_spec_pb2.ParameterType.TASK_FINAL_STATUS
component_spec.input_definitions.parameters[
input_name].is_optional = True
if input_spec.description:
component_spec.input_definitions.parameters[
input_name].description = input_spec.description
elif type_utils.is_parameter_type(input_spec.type):
component_spec.input_definitions.parameters[
input_name].parameter_type = type_utils.get_parameter_type(
input_spec.type)
if input_spec.optional:
component_spec.input_definitions.parameters[
input_name].is_optional = True
_fill_in_component_input_default_value(
component_spec=component_spec,
input_name=input_name,
default_value=input_spec.default,
)
if input_spec.description:
component_spec.input_definitions.parameters[
input_name].description = input_spec.description
else:
component_spec.input_definitions.artifacts[
input_name].artifact_type.CopyFrom(
type_utils.bundled_artifact_to_artifact_proto(
input_spec.type))
component_spec.input_definitions.artifacts[
input_name].is_artifact_list = input_spec.is_artifact_list
if input_spec.optional:
component_spec.input_definitions.artifacts[
input_name].is_optional = True
if input_spec.description:
component_spec.input_definitions.artifacts[
input_name].description = input_spec.description
for output_name, output_spec in (component_spec_struct.outputs or
{}).items():
if type_utils.is_parameter_type(output_spec.type):
component_spec.output_definitions.parameters[
output_name].parameter_type = type_utils.get_parameter_type(
output_spec.type)
if output_spec.description:
component_spec.output_definitions.parameters[
output_name].description = output_spec.description
else:
component_spec.output_definitions.artifacts[
output_name].artifact_type.CopyFrom(
type_utils.bundled_artifact_to_artifact_proto(
output_spec.type))
component_spec.output_definitions.artifacts[
output_name].is_artifact_list = output_spec.is_artifact_list
if output_spec.description:
component_spec.output_definitions.artifacts[
output_name].description = output_spec.description
return component_spec
def connect_single_dag_output(
component_spec: pipeline_spec_pb2.ComponentSpec,
output_name: str,
output_channel: pipeline_channel.PipelineChannel,
) -> None:
"""Connects a DAG output to a subtask output when the subtask output
contains only one channel (i.e., not OneOfMixin).
Args:
component_spec: The component spec to modify its dag outputs.
output_name: The name of the dag output.
output_channel: The pipeline channel selected for the dag output.
"""
if isinstance(output_channel, pipeline_channel.PipelineArtifactChannel):
if output_name not in component_spec.output_definitions.artifacts:
raise ValueError(
f'Pipeline or component output not defined: {output_name}. You may be missing a type annotation.'
)
component_spec.dag.outputs.artifacts[
output_name].artifact_selectors.append(
pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec(
producer_subtask=output_channel.task_name,
output_artifact_key=output_channel.name,
))
elif isinstance(output_channel, pipeline_channel.PipelineParameterChannel):
if output_name not in component_spec.output_definitions.parameters:
raise ValueError(
f'Pipeline or component output not defined: {output_name}. You may be missing a type annotation.'
)
component_spec.dag.outputs.parameters[
output_name].value_from_parameter.producer_subtask = output_channel.task_name
component_spec.dag.outputs.parameters[
output_name].value_from_parameter.output_parameter_key = output_channel.name
def connect_oneof_dag_output(
component_spec: pipeline_spec_pb2.ComponentSpec,
output_name: str,
oneof_output: pipeline_channel.OneOfMixin,
) -> None:
"""Connects a output to the OneOf output returned by the DAG's internal
condition-branches group.
Args:
component_spec: The component spec to modify its DAG outputs.
output_name: The name of the DAG output.
oneof_output: The OneOfMixin object returned by the pipeline (OneOf in user code).
"""
if isinstance(oneof_output, pipeline_channel.OneOfArtifact):
if output_name not in component_spec.output_definitions.artifacts:
raise ValueError(
f'Pipeline or component output not defined: {output_name}. You may be missing a type annotation.'
)
for channel in oneof_output.channels:
component_spec.dag.outputs.artifacts[
output_name].artifact_selectors.append(
pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec(
producer_subtask=channel.task_name,
output_artifact_key=channel.name,
))
if isinstance(oneof_output, pipeline_channel.OneOfParameter):
if output_name not in component_spec.output_definitions.parameters:
raise ValueError(
f'Pipeline or component output not defined: {output_name}. You may be missing a type annotation.'
)
for channel in oneof_output.channels:
component_spec.dag.outputs.parameters[
output_name].value_from_oneof.parameter_selectors.append(
pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec(
producer_subtask=channel.task_name,
output_parameter_key=channel.name,
))
def _build_dag_outputs(
component_spec: pipeline_spec_pb2.ComponentSpec,
dag_outputs: Dict[str, pipeline_channel.PipelineChannel],
) -> None:
"""Connects the DAG's outputs to a TaskGroup's ComponentSpec and validates
it is present in the component interface.
Args:
component_spec: The ComponentSpec.
dag_outputs: Dictionary of output key to output channel.
"""
for output_name, output_channel in dag_outputs.items():
if not isinstance(output_channel, pipeline_channel.PipelineChannel):
raise ValueError(
f"Got unknown pipeline output '{output_name}' of type {output_channel}."
)
connect_single_dag_output(component_spec, output_name, output_channel)
validate_dag_outputs(component_spec)
def validate_dag_outputs(
component_spec: pipeline_spec_pb2.ComponentSpec) -> None:
"""Validates the DAG's ComponentSpec specifies the source task for all of
its ComponentSpec inputs (input_definitions) and outputs
(output_definitions)."""
for output_name in component_spec.output_definitions.artifacts:
if output_name not in component_spec.dag.outputs.artifacts:
raise ValueError(f'Missing pipeline output: {output_name}.')
for output_name in component_spec.output_definitions.parameters:
if output_name not in component_spec.dag.outputs.parameters:
raise ValueError(f'Missing pipeline output: {output_name}.')
def build_oneof_dag_outputs(
component_spec: pipeline_spec_pb2.ComponentSpec,
oneof_outputs: Dict[str, pipeline_channel.OneOfMixin],
) -> None:
"""Connects the DAG's OneOf outputs to a TaskGroup's ComponentSpec and
validates it is present in the component interface.
Args:
component_spec: The ComponentSpec.
oneof_outputs: Dictionary of output key to OneOf output channel.
"""
for output_name, oneof_output in oneof_outputs.items():
for channel in oneof_output.channels:
if not isinstance(channel, pipeline_channel.PipelineChannel):
raise ValueError(
f"Got unknown pipeline output '{output_name}' of type {type(channel)}."
)
connect_oneof_dag_output(
component_spec,
output_name,
oneof_output,
)
validate_dag_outputs(component_spec)
def build_importer_spec_for_task(
task: pipeline_task.PipelineTask
) -> pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec:
"""Builds ImporterSpec for a pipeline task.
Args:
task: The task to build a ComponentSpec for.
Returns:
A ImporterSpec object for the task.
"""
type_schema = type_utils.bundled_artifact_to_artifact_proto(
task.importer_spec.schema_title)
importer_spec = pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec(
type_schema=type_schema, reimport=task.importer_spec.reimport)
if task.importer_spec.metadata:
metadata_protobuf_struct = struct_pb2.Struct()
metadata_protobuf_struct.update(task.importer_spec.metadata)
importer_spec.metadata.CopyFrom(metadata_protobuf_struct)
if isinstance(task.importer_spec.artifact_uri,
pipeline_channel.PipelineChannel):
importer_spec.artifact_uri.runtime_parameter = 'uri'
elif isinstance(task.importer_spec.artifact_uri, str):
importer_spec.artifact_uri.constant.string_value = task.importer_spec.artifact_uri
return importer_spec
def build_container_spec_for_task(
task: pipeline_task.PipelineTask
) -> pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec:
"""Builds PipelineContainerSpec for a pipeline task.
Args:
task: The task to build a ComponentSpec for.
Returns:
A PipelineContainerSpec object for the task.
"""
def convert_to_placeholder(input_value: str) -> str:
"""Checks if input is a pipeline channel and if so, converts to
compiler injected input name."""
pipeline_channels = (
pipeline_channel.extract_pipeline_channels_from_any(input_value))
if pipeline_channels:
assert len(pipeline_channels) == 1
channel = pipeline_channels[0]
additional_input_name = (
compiler_utils.additional_input_name_for_pipeline_channel(
channel))
additional_input_placeholder = placeholders.InputValuePlaceholder(
additional_input_name)._to_string()
input_value = input_value.replace(channel.pattern,
additional_input_placeholder)
return input_value
container_spec = (
pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec(
image=task.container_spec.image,
command=task.container_spec.command,
args=task.container_spec.args,
env=[
pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec
.EnvVar(name=name, value=value)
for name, value in (task.container_spec.env or {}).items()
]))
if task.container_spec.resources is not None:
if task.container_spec.resources.cpu_request is not None:
container_spec.resources.resource_cpu_request = (
convert_to_placeholder(
task.container_spec.resources.cpu_request))
if task.container_spec.resources.cpu_limit is not None:
container_spec.resources.resource_cpu_limit = (
convert_to_placeholder(task.container_spec.resources.cpu_limit))
if task.container_spec.resources.memory_request is not None:
container_spec.resources.resource_memory_request = (
convert_to_placeholder(
task.container_spec.resources.memory_request))
if task.container_spec.resources.memory_limit is not None:
container_spec.resources.resource_memory_limit = (
convert_to_placeholder(
task.container_spec.resources.memory_limit))
if task.container_spec.resources.accelerator_count is not None:
container_spec.resources.accelerator.CopyFrom(
pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec
.ResourceSpec.AcceleratorConfig(
resource_type=convert_to_placeholder(
task.container_spec.resources.accelerator_type),
resource_count=convert_to_placeholder(
task.container_spec.resources.accelerator_count),
type=convert_to_placeholder(
task.container_spec.resources.accelerator_type),
count=convert_to_placeholder(
int(task.container_spec.resources.accelerator_count)),
))
return container_spec
def _fill_in_component_input_default_value(
component_spec: pipeline_spec_pb2.ComponentSpec,
input_name: str,
default_value: Optional[type_utils.PARAMETER_TYPES],
) -> None:
"""Fills in the default of component input parameter.
Args:
component_spec: The ComponentSpec to update in place.
input_name: The name of the input parameter.
default_value: The default value of the input parameter.
"""
if default_value is None:
return
parameter_type = component_spec.input_definitions.parameters[
input_name].parameter_type
if pipeline_spec_pb2.ParameterType.NUMBER_INTEGER == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.number_value = default_value
elif pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.number_value = default_value
elif pipeline_spec_pb2.ParameterType.STRING == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.string_value = default_value
elif pipeline_spec_pb2.ParameterType.BOOLEAN == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.bool_value = default_value
elif pipeline_spec_pb2.ParameterType.STRUCT == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.CopyFrom(
to_protobuf_value(default_value))
elif pipeline_spec_pb2.ParameterType.LIST == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.CopyFrom(
to_protobuf_value(default_value))
def build_component_spec_for_group(
input_pipeline_channels: List[pipeline_channel.PipelineChannel],
output_pipeline_channels: Dict[str, pipeline_channel.PipelineChannel],
) -> pipeline_spec_pb2.ComponentSpec:
"""Builds ComponentSpec for a TasksGroup.
Args:
group: The group to build a ComponentSpec for.
pipeline_channels: The list of pipeline channels referenced by the group.
Returns:
A PipelineTaskSpec object representing the loop group.
"""
component_spec = pipeline_spec_pb2.ComponentSpec()
for channel in input_pipeline_channels:
input_name = compiler_utils.additional_input_name_for_pipeline_channel(
channel)
if isinstance(channel, (pipeline_channel.PipelineArtifactChannel,
for_loop.LoopArtifactArgument)):
component_spec.input_definitions.artifacts[
input_name].artifact_type.CopyFrom(
type_utils.bundled_artifact_to_artifact_proto(
channel.channel_type))
component_spec.input_definitions.artifacts[
input_name].is_artifact_list = channel.is_artifact_list
elif isinstance(channel,
(pipeline_channel.PipelineParameterChannel,
for_loop.LoopParameterArgument,
for_loop.LoopArgumentVariable, dsl.Collected)):
component_spec.input_definitions.parameters[
input_name].parameter_type = type_utils.get_parameter_type(
channel.channel_type)
else:
raise TypeError(
f'Expected PipelineParameterChannel, PipelineArtifactChannel, LoopParameterArgument, LoopArtifactArgument, LoopArgumentVariable, or Collected, got {type(channel)}.'
)
for output_name, output in output_pipeline_channels.items():
if isinstance(output, pipeline_channel.PipelineArtifactChannel):
component_spec.output_definitions.artifacts[
output_name].artifact_type.CopyFrom(
type_utils.bundled_artifact_to_artifact_proto(
output.channel_type))
component_spec.output_definitions.artifacts[
output_name].is_artifact_list = output.is_artifact_list
else:
component_spec.output_definitions.parameters[
output_name].parameter_type = type_utils.get_parameter_type(
output.channel_type)
return component_spec
def _pop_input_from_task_spec(
task_spec: pipeline_spec_pb2.PipelineTaskSpec,
input_name: str,
) -> None:
"""Removes an input from task spec inputs.
Args:
task_spec: The pipeline task spec to update in place.
input_name: The name of the input, which could be an artifact or paremeter.
"""
task_spec.inputs.artifacts.pop(input_name)
task_spec.inputs.parameters.pop(input_name)
if task_spec.inputs == pipeline_spec_pb2.TaskInputsSpec():
task_spec.ClearField('inputs')
def _update_task_spec_for_loop_group(
group: tasks_group.ParallelFor,
pipeline_task_spec: pipeline_spec_pb2.PipelineTaskSpec,
) -> None:
"""Updates PipelineTaskSpec for loop group.
Args:
group: The loop group to update task spec for.
pipeline_task_spec: The pipeline task spec to update in place.
"""
if group.items_is_pipeline_channel:
loop_items_channel = group.loop_argument.items_or_pipeline_channel
input_parameter_name = compiler_utils.additional_input_name_for_pipeline_channel(
loop_items_channel)
loop_argument_item_name = compiler_utils.additional_input_name_for_pipeline_channel(
group.loop_argument.full_name)
loop_arguments_item = f'{input_parameter_name}-{for_loop.LOOP_ITEM_NAME_BASE}'
assert loop_arguments_item == loop_argument_item_name
if isinstance(group.loop_argument, for_loop.LoopParameterArgument):
pipeline_task_spec.parameter_iterator.items.input_parameter = (
input_parameter_name)
pipeline_task_spec.parameter_iterator.item_input = (
loop_argument_item_name)
_pop_input_from_task_spec(
task_spec=pipeline_task_spec,
input_name=pipeline_task_spec.parameter_iterator.item_input)
elif isinstance(group.loop_argument, for_loop.LoopArtifactArgument):
input_artifact_name = compiler_utils.additional_input_name_for_pipeline_channel(
loop_items_channel)
pipeline_task_spec.artifact_iterator.items.input_artifact = input_artifact_name
pipeline_task_spec.artifact_iterator.item_input = (
loop_argument_item_name)
_pop_input_from_task_spec(
task_spec=pipeline_task_spec,
input_name=pipeline_task_spec.artifact_iterator.item_input)
else:
raise TypeError(
f'Expected LoopParameterArgument or LoopArtifactArgument, got {type(group.loop_argument)}.'
)
# If the loop items itself is a loop arguments variable, handle the
# subvar name.
if isinstance(loop_items_channel, for_loop.LoopArgumentVariable):
pipeline_task_spec.inputs.parameters[
input_parameter_name].parameter_expression_selector = (
f'parseJson(string_value)["{loop_items_channel.subvar_name}"]'
)
pipeline_task_spec.inputs.parameters[
input_parameter_name].component_input_parameter = (
compiler_utils.additional_input_name_for_pipeline_channel(
loop_items_channel.loop_argument))
else:
input_parameter_name = compiler_utils.additional_input_name_for_pipeline_channel(
group.loop_argument)
raw_values = group.loop_argument.items_or_pipeline_channel
pipeline_task_spec.parameter_iterator.items.raw = json.dumps(
raw_values, sort_keys=True)
pipeline_task_spec.parameter_iterator.item_input = (
input_parameter_name)
_pop_input_from_task_spec(
task_spec=pipeline_task_spec,
input_name=pipeline_task_spec.parameter_iterator.item_input)
if (group.parallelism_limit > 0):
pipeline_task_spec.iterator_policy.parallelism_limit = (
group.parallelism_limit)
def _binary_operations_to_cel_conjunctive(
operations: List[pipeline_channel.ConditionOperation]) -> str:
"""Converts a list of ConditionOperation to a CEL string with placeholders.
Each ConditionOperation will be joined the others via the conjunctive (&&).
Args:
operations: The binary operations to convert to convert and join.
Returns:
The binary operations as a CEL string.
"""
operands = [
_single_binary_operation_to_cel_condition(operation=bin_op)
for bin_op in operations
]
return ' && '.join(operands)
def _single_binary_operation_to_cel_condition(
operation: pipeline_channel.ConditionOperation) -> str:
"""Converts a ConditionOperation to a CEL string with placeholders.
Args:
operation: The binary operation to convert to a string.
Returns:
The binary operation as a CEL string.
"""
left_operand = operation.left_operand
right_operand = operation.right_operand
# cannot make comparisons involving particular types
for value_or_reference in [left_operand, right_operand]:
if isinstance(value_or_reference, pipeline_channel.PipelineChannel):
parameter_type = type_utils.get_parameter_type(
value_or_reference.channel_type)
if parameter_type in [
pipeline_spec_pb2.ParameterType.STRUCT,
pipeline_spec_pb2.ParameterType.LIST,
pipeline_spec_pb2.ParameterType
.PARAMETER_TYPE_ENUM_UNSPECIFIED,
]:
input_name = compiler_utils.additional_input_name_for_pipeline_channel(
value_or_reference)
raise ValueError(
f'Conditional requires primitive parameter values for comparison. Found input "{input_name}" of type {value_or_reference.channel_type} in pipeline definition instead.'
)
# ensure the types compared are the same or compatible
parameter_types = set()
for value_or_reference in [left_operand, right_operand]:
if isinstance(value_or_reference, pipeline_channel.PipelineChannel):
parameter_type = type_utils.get_parameter_type(
value_or_reference.channel_type)
else:
parameter_type = type_utils.get_parameter_type(
type(value_or_reference).__name__)
parameter_types.add(parameter_type)
if len(parameter_types) == 2:
# Two different types being compared. The only possible types are
# String, Boolean, Double and Integer. We'll promote the other type
# using the following precedence:
# String > Boolean > Double > Integer
if pipeline_spec_pb2.ParameterType.STRING in parameter_types:
canonical_parameter_type = pipeline_spec_pb2.ParameterType.STRING
elif pipeline_spec_pb2.ParameterType.BOOLEAN in parameter_types:
canonical_parameter_type = pipeline_spec_pb2.ParameterType.BOOLEAN
else:
# Must be a double and int, promote to double.
assert pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE in parameter_types, \
f'Types: {parameter_types} [{left_operand} {right_operand}]'
assert pipeline_spec_pb2.ParameterType.NUMBER_INTEGER in parameter_types, \
f'Types: {parameter_types} [{left_operand} {right_operand}]'
canonical_parameter_type = pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE
elif len(parameter_types) == 1: # Both operands are the same type.
canonical_parameter_type = parameter_types.pop()
else:
# Probably shouldn't happen.
raise ValueError(
f'Unable to determine operand types for "{left_operand}" and "{right_operand}"'
)
operand_values = []
for value_or_reference in [left_operand, right_operand]:
if isinstance(value_or_reference, pipeline_channel.PipelineChannel):
input_name = compiler_utils.additional_input_name_for_pipeline_channel(
value_or_reference)
operand_value = f"inputs.parameter_values['{input_name}']"
parameter_type = type_utils.get_parameter_type(
value_or_reference.channel_type)
if parameter_type == pipeline_spec_pb2.ParameterType.NUMBER_INTEGER:
operand_value = f'int({operand_value})'
elif isinstance(value_or_reference, str):
operand_value = f"'{value_or_reference}'"
parameter_type = pipeline_spec_pb2.ParameterType.STRING
elif isinstance(value_or_reference, bool):
# Booleans need to be compared as 'true' or 'false' in CEL.
operand_value = str(value_or_reference).lower()
parameter_type = pipeline_spec_pb2.ParameterType.BOOLEAN
elif isinstance(value_or_reference, int):
operand_value = str(value_or_reference)
parameter_type = pipeline_spec_pb2.ParameterType.NUMBER_INTEGER
else:
assert isinstance(value_or_reference, float), value_or_reference
operand_value = str(value_or_reference)
parameter_type = pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE
if parameter_type != canonical_parameter_type:
# Type-cast to so CEL does not complain.
if canonical_parameter_type == pipeline_spec_pb2.ParameterType.STRING:
assert parameter_type in [
pipeline_spec_pb2.ParameterType.BOOLEAN,
pipeline_spec_pb2.ParameterType.NUMBER_INTEGER,
pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE,
]
operand_value = f"'{operand_value}'"
elif canonical_parameter_type == pipeline_spec_pb2.ParameterType.BOOLEAN:
assert parameter_type in [
pipeline_spec_pb2.ParameterType.NUMBER_INTEGER,
pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE,
]
operand_value = 'true' if int(operand_value) == 0 else 'false'
else:
assert canonical_parameter_type == pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE
assert parameter_type == pipeline_spec_pb2.ParameterType.NUMBER_INTEGER
operand_value = f'double({operand_value})'
operand_values.append(operand_value)
left_operand_value, right_operand_value = tuple(operand_values)
condition_string = (