Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sdk): supporting bring your own container for arbitrary input and outputs #8066

Merged
merged 25 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2df8d19
support container_component decorator for function with no inputs
zichuan-scott-xu Jun 29, 2022
101b615
resolve review comments
zichuan-scott-xu Jul 1, 2022
56ed6d5
add sample tests for milestone 1
zichuan-scott-xu Jul 6, 2022
40c9b81
modify compiler test data
zichuan-scott-xu Jul 7, 2022
e9a8bf1
resolve reviews
zichuan-scott-xu Jul 7, 2022
56ac809
resolve reviews
zichuan-scott-xu Jul 7, 2022
702f3ef
WIP
zichuan-scott-xu Jul 11, 2022
b6eb5e2
implementation of function of no inputs
zichuan-scott-xu Jul 13, 2022
4f7745a
fixed sample test
zichuan-scott-xu Jul 14, 2022
8275381
re-fix sample test
zichuan-scott-xu Jul 14, 2022
7b92223
fix rebase merge conflict
zichuan-scott-xu Jul 15, 2022
dc064b5
resolve formatting
zichuan-scott-xu Jul 15, 2022
eb31781
resolve isort error for test data
zichuan-scott-xu Jul 15, 2022
8db6d89
resolve comments
zichuan-scott-xu Jul 18, 2022
b146ae0
fix nit
zichuan-scott-xu Jul 18, 2022
bb9484e
resolve nit
zichuan-scott-xu Jul 18, 2022
920a30f
add implementation for placeholders i/o, sample and compiler tests
zichuan-scott-xu Jul 22, 2022
85cb271
Merge branch 'kubeflow:master' into cuj3-milestone2
zichuan-scott-xu Jul 22, 2022
4199c72
resolve comments and merge logic for constructing container component
zichuan-scott-xu Jul 27, 2022
48ec85b
Merge branch 'cuj3-milestone2' of https://github.com/zichuan-scott-xu…
zichuan-scott-xu Jul 27, 2022
97a1e1a
resolve comments
zichuan-scott-xu Jul 28, 2022
2dc74c4
resolve comments
zichuan-scott-xu Aug 1, 2022
e8f393c
fix assertion messages
zichuan-scott-xu Aug 1, 2022
bb282dc
add error handling for accessing artifact by itself
zichuan-scott-xu Aug 4, 2022
fb26f91
add test for raising error for accessing artifact by itself
zichuan-scott-xu Aug 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sdk/python/kfp/compiler/_read_write_test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
'pipeline_with_task_final_status_yaml',
'component_with_pip_index_urls',
'container_component_with_no_inputs',
'two_step_pipeline_containerized',
],
'test_data_dir': 'sdk/python/kfp/compiler/test_data/pipelines',
'config': {
Expand All @@ -62,6 +63,8 @@
'output_metrics',
'preprocess',
'container_no_input',
'container_io',
'container_with_artifact_output',
],
'test_data_dir': 'sdk/python/kfp/compiler/test_data/components',
'config': {
Expand Down
68 changes: 68 additions & 0 deletions sdk/python/kfp/compiler/compiler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,74 @@ def hello_world_container() -> dsl.ContainerSpec:
['exec-hello-world-container']['container']['command'],
['echo', 'hello world'])

def test_compile_container_with_simple_io(self):

@dsl.container_component
def container_simple_io(text: str, output_path: dsl.OutputPath(str)):
return dsl.ContainerSpec(
image='python:3.7',
command=['echo', text],
args=['--output_path', output_path])

with tempfile.TemporaryDirectory() as tempdir:
output_json = os.path.join(tempdir, 'component.yaml')
compiler.Compiler().compile(
pipeline_func=container_simple_io,
package_path=output_json,
pipeline_name='container-simple-io')
with open(output_json, 'r') as f:
pipeline_spec = yaml.safe_load(f)
self.assertEqual(
pipeline_spec['components']['comp-container-simple-io']
['inputDefinitions']['parameters']['text']['parameterType'],
'STRING')
self.assertEqual(
pipeline_spec['components']['comp-container-simple-io']
['outputDefinitions']['parameters']['output_path']['parameterType'],
'STRING')

def test_compile_container_with_artifact_output(self):
from kfp.dsl import container_component
zichuan-scott-xu marked this conversation as resolved.
Show resolved Hide resolved
from kfp.dsl import ContainerSpec
from kfp.dsl import Model
from kfp.dsl import Output
from kfp.dsl import OutputPath

@container_component
def container_with_artifacts_output(
num_epochs: int, # also as an input
model: Output[Model],
model_config_path: OutputPath(str),
):
return ContainerSpec(
image='gcr.io/my-image',
command=['sh', 'run.sh'],
args=[
'--epochs',
num_epochs,
'--model_path',
model.uri,
'--model_config_path',
model_config_path,
])

with tempfile.TemporaryDirectory() as tempdir:
output_json = os.path.join(tempdir, 'component.yaml')
compiler.Compiler().compile(
pipeline_func=container_with_artifacts_output,
package_path=output_json,
pipeline_name='container-with-artifacts-output')
with open(output_json, 'r') as f:
pipeline_spec = yaml.safe_load(f)
self.assertEqual(
zichuan-scott-xu marked this conversation as resolved.
Show resolved Hide resolved
pipeline_spec['components']['comp-container-with-artifacts-output']
['inputDefinitions']['parameters']['num_epochs']['parameterType'],
'NUMBER_INTEGER')
self.assertIn(
'model',
pipeline_spec['components']['comp-container-with-artifacts-output']
['outputDefinitions']['artifacts'])


class TestCompileBadInput(unittest.TestCase):

Expand Down
31 changes: 31 additions & 0 deletions sdk/python/kfp/compiler/test_data/components/container_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2022 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from kfp.dsl import container_component
from kfp.dsl import ContainerSpec
from kfp.dsl import OutputPath


@container_component
def container_io(text: str, output_path: OutputPath(str)):
return ContainerSpec(
image='python:3.7',
command=['echo', text],
zichuan-scott-xu marked this conversation as resolved.
Show resolved Hide resolved
args=['--output_path', output_path])


if __name__ == '__main__':
from kfp import compiler
compiler.Compiler().compile(
pipeline_func=container_io,
package_path=__file__.replace('.py', '.yaml'))
44 changes: 44 additions & 0 deletions sdk/python/kfp/compiler/test_data/components/container_io.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
components:
comp-container-io:
executorLabel: exec-container-io
inputDefinitions:
parameters:
text:
parameterType: STRING
outputDefinitions:
parameters:
output_path:
parameterType: STRING
deploymentSpec:
executors:
exec-container-io:
container:
args:
- --output_path
- '{{$.outputs.parameters[''output_path''].output_file}}'
command:
- echo
- '{{$.inputs.parameters[''text'']}}'
image: python:3.7
pipelineInfo:
name: container-io
root:
dag:
tasks:
container-io:
cachingOptions:
enableCache: true
componentRef:
name: comp-container-io
inputs:
parameters:
text:
componentInputParameter: text
taskInfo:
name: container-io
inputDefinitions:
parameters:
text:
parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-beta.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright 2022 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from kfp.dsl import container_component
from kfp.dsl import ContainerSpec
from kfp.dsl import Model
from kfp.dsl import Output
from kfp.dsl import OutputPath


@container_component
def container_with_artifacts_output(
num_epochs: int, # also as an input
zichuan-scott-xu marked this conversation as resolved.
Show resolved Hide resolved
model: Output[Model],
model_config_path: OutputPath(str),
):
return ContainerSpec(
image='gcr.io/my-image',
command=['sh', 'run.sh'],
args=[
'--epochs',
num_epochs,
'--model_path',
model.uri,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if user just puts model here? Do we throw an error or return the whole artifact json? Also what if users access some arbitrary valid or invalid property/method here? Do we throw errors?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the reminder! The current design is to throw error in both cases and I will add the error handling logic before merging the PR.

'--model_config_path',
model_config_path,
])


if __name__ == '__main__':
from kfp import compiler
compiler.Compiler().compile(
pipeline_func=container_with_artifacts_output,
package_path=__file__.replace('.py', '.yaml'))
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
components:
comp-container-with-artifacts-output:
executorLabel: exec-container-with-artifacts-output
inputDefinitions:
parameters:
num_epochs:
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: system.Model
schemaVersion: 0.0.1
parameters:
model_config_path:
parameterType: STRING
deploymentSpec:
executors:
exec-container-with-artifacts-output:
container:
args:
- --epochs
- '{{$.inputs.parameters[''num_epochs'']}}'
- --model_path
- '{{$.outputs.artifacts[''model''].uri}}'
- --model_config_path
- '{{$.outputs.parameters[''model_config_path''].output_file}}'
command:
- sh
- run.sh
image: gcr.io/my-image
pipelineInfo:
name: container-with-artifacts-output
root:
dag:
tasks:
container-with-artifacts-output:
cachingOptions:
enableCache: true
componentRef:
name: comp-container-with-artifacts-output
inputs:
parameters:
num_epochs:
componentInputParameter: num_epochs
taskInfo:
name: container-with-artifacts-output
inputDefinitions:
parameters:
num_epochs:
parameterType: NUMBER_INTEGER
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-beta.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright 2022 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from kfp import compiler
from kfp import dsl


@dsl.container_component
zichuan-scott-xu marked this conversation as resolved.
Show resolved Hide resolved
def component1(text: str, output_gcs: dsl.Output[dsl.Dataset]):
return dsl.ContainerSpec(
image='google/cloud-sdk:slim',
command=[
'sh -c | set -e -x', 'echo', text, '| gsutil cp -', output_gcs.uri
])


@dsl.container_component
def component2(input_gcs: dsl.Input[dsl.Dataset]):
return dsl.ContainerSpec(
image='google/cloud-sdk:slim',
command=['sh', '-c', '|', 'set -e -x gsutil cat'],
args=[input_gcs.uri])


@dsl.pipeline(name='containerized-two-step-pipeline')
def my_pipeline(text: str = 'Hello world!'):
component_1 = component1(text=text).set_display_name('Producer')
component_2 = component2(input_gcs=component_1.outputs['output_gcs'])
component_2.set_display_name('Consumer')


if __name__ == '__main__':
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_parameters={'text': 'Hello KFP!'},
package_path=__file__.replace('.py', '.yaml'))
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
components:
comp-component1:
executorLabel: exec-component1
inputDefinitions:
parameters:
text:
parameterType: STRING
outputDefinitions:
artifacts:
output_gcs:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
comp-component2:
executorLabel: exec-component2
inputDefinitions:
artifacts:
input_gcs:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
deploymentSpec:
executors:
exec-component1:
container:
command:
- sh -c | set -e -x
- echo
- '{{$.inputs.parameters[''text'']}}'
- '| gsutil cp -'
- '{{$.outputs.artifacts[''output_gcs''].uri}}'
image: google/cloud-sdk:slim
exec-component2:
container:
args:
- '{{$.inputs.artifacts[''input_gcs''].uri}}'
command:
- sh
- -c
- '|'
- set -e -x gsutil cat
image: google/cloud-sdk:slim
pipelineInfo:
name: containerized-two-step-pipeline
root:
dag:
tasks:
component1:
cachingOptions:
enableCache: true
componentRef:
name: comp-component1
inputs:
parameters:
text:
componentInputParameter: text
taskInfo:
name: Producer
component2:
cachingOptions:
enableCache: true
componentRef:
name: comp-component2
dependentTasks:
- component1
inputs:
artifacts:
input_gcs:
taskOutputArtifact:
outputArtifactKey: output_gcs
producerTask: component1
taskInfo:
name: Consumer
inputDefinitions:
parameters:
text:
defaultValue: Hello KFP!
parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-beta.1
Loading