Skip to content

Commit

Permalink
fix(sdk): fix deserializing v1 component yaml defaults (#8639)
Browse files Browse the repository at this point in the history
* fix deserializing v1 defaults

* update test cases

* update type annotation

* add unit tests
  • Loading branch information
connor-mccarthy authored Dec 29, 2022
1 parent 01e097c commit 84241d6
Show file tree
Hide file tree
Showing 6 changed files with 402 additions and 8 deletions.
1 change: 1 addition & 0 deletions sdk/RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
## Bug fixes and other changes
* Fully support optional parameter inputs by witing `isOptional` field to IR [\#8612](https://github.com/kubeflow/pipelines/pull/8612)
* Add support for optional artifact inputs (toward feature parity with KFP SDK v1) [\#8623](https://github.com/kubeflow/pipelines/pull/8623)
* Fix bug deserializing v1 component YAML with boolean defaults, struct defaults, and array defaults [\#8639](https://github.com/kubeflow/pipelines/pull/8639)

## Documentation updates
# 2.0.0-beta.9
Expand Down
8 changes: 2 additions & 6 deletions sdk/python/kfp/compiler/pipeline_spec_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,15 +539,11 @@ def _fill_in_component_input_default_value(
parameter_type = component_spec.input_definitions.parameters[
input_name].parameter_type
if pipeline_spec_pb2.ParameterType.NUMBER_INTEGER == parameter_type:
# cast to int to support v1 component YAML where NUMBER_INTEGER defaults are included as strings
# for example, input Limit: https://raw.githubusercontent.com/kubeflow/pipelines/60a2612541ec08c6a85c237d2ec7525b12543a43/components/datasets/Chicago_Taxi_Trips/component.yaml
component_spec.input_definitions.parameters[
input_name].default_value.number_value = int(default_value)
# cast to int to support v1 component YAML where NUMBER_DOUBLE defaults are included as strings
# for example, input learning_rate: https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml
input_name].default_value.number_value = default_value
elif pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.number_value = float(default_value)
input_name].default_value.number_value = default_value
elif pipeline_spec_pb2.ParameterType.STRING == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.string_value = default_value
Expand Down
3 changes: 2 additions & 1 deletion sdk/python/kfp/components/structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,8 @@ def from_v1_component_spec(
type_ = spec.get('type')
optional = spec.get('optional', False) or 'default' in spec
default = spec.get('default')
default = type_utils.deserialize_v1_component_yaml_default(
type_=type_, default=default)

if isinstance(type_, str) and type_ == 'PipelineTaskFinalStatus':
inputs[utils.sanitize_input_name(spec['name'])] = InputSpec(
Expand All @@ -655,7 +657,6 @@ def from_v1_component_spec(

elif isinstance(type_, str) and type_.lower(
) in type_utils._PARAMETER_TYPES_MAPPING:
default = spec.get('default')
type_enum = type_utils._PARAMETER_TYPES_MAPPING[type_.lower()]
ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name(
type_enum)
Expand Down
182 changes: 182 additions & 0 deletions sdk/python/kfp/components/structures_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,5 +921,187 @@ def test_to_proto(self):
self.assertEqual(retry_policy_proto.backoff_max_duration.seconds, 3600)


class TestDeserializeV1ComponentYamlDefaults(unittest.TestCase):

def test_True(self):
comp_text = textwrap.dedent("""\
inputs:
- { name: val, type: Boolean, default: "True" }
implementation:
container:
image: alpine
command:
- sh
- -c
- |
echo $0
- { inputValue: val }
""")
comp = components.load_component_from_text(comp_text)
self.assertEqual(comp.component_spec.inputs['val'].default, True)
self.assertEqual(
comp.pipeline_spec.root.input_definitions.parameters['val']
.default_value.bool_value, True)

def test_true(self):
comp_text = textwrap.dedent("""\
inputs:
- { name: val, type: Boolean, default: "true" }
implementation:
container:
image: alpine
command:
- sh
- -c
- |
echo $0
- { inputValue: val }
""")
comp = components.load_component_from_text(comp_text)
self.assertEqual(comp.component_spec.inputs['val'].default, True)
self.assertEqual(
comp.pipeline_spec.root.input_definitions.parameters['val']
.default_value.bool_value, True)

def test_false(self):
comp_text = textwrap.dedent("""\
inputs:
- { name: val, type: Boolean, default: "false" }
implementation:
container:
image: alpine
command:
- sh
- -c
- |
echo $0
- { inputValue: val }
""")
comp = components.load_component_from_text(comp_text)
self.assertEqual(comp.component_spec.inputs['val'].default, False)
self.assertEqual(
comp.pipeline_spec.root.input_definitions.parameters['val']
.default_value.bool_value, False)

def test_False(self):
comp_text = textwrap.dedent("""\
inputs:
- { name: val, type: Boolean, default: "False" }
implementation:
container:
image: alpine
command:
- sh
- -c
- |
echo $0
- { inputValue: val }
""")
comp = components.load_component_from_text(comp_text)
self.assertEqual(comp.component_spec.inputs['val'].default, False)
self.assertEqual(
comp.pipeline_spec.root.input_definitions.parameters['val']
.default_value.bool_value, False)

def test_int(self):
comp_text = textwrap.dedent("""\
inputs:
- { name: val, type: Integer, default: "1" }
implementation:
container:
image: alpine
command:
- sh
- -c
- |
echo $0
- { inputValue: val }
""")
comp = components.load_component_from_text(comp_text)
self.assertEqual(comp.component_spec.inputs['val'].default, 1)
self.assertEqual(
comp.pipeline_spec.root.input_definitions.parameters['val']
.default_value.number_value, 1.0)

def test_float(self):
comp_text = textwrap.dedent("""\
inputs:
- { name: val, type: Float, default: "1.0" }
implementation:
container:
image: alpine
command:
- sh
- -c
- |
echo $0
- { inputValue: val }
""")
comp = components.load_component_from_text(comp_text)
self.assertEqual(comp.component_spec.inputs['val'].default, 1.0)
self.assertEqual(
comp.pipeline_spec.root.input_definitions.parameters['val']
.default_value.number_value, 1.0)

def test_struct(self):
comp_text = textwrap.dedent("""\
inputs:
- { name: val, type: JsonObject, default: '{"a": 1.0}' }
implementation:
container:
image: alpine
command:
- sh
- -c
- |
echo $0
- { inputValue: val }
""")
comp = components.load_component_from_text(comp_text)
self.assertEqual(comp.component_spec.inputs['val'].default, {'a': 1.0})
self.assertEqual(
dict(comp.pipeline_spec.root.input_definitions.parameters['val']
.default_value.struct_value), {'a': 1.0})

def test_array(self):
comp_text = textwrap.dedent("""\
inputs:
- { name: val, type: JsonObject, default: '["a", 1.0]' }
implementation:
container:
image: alpine
command:
- sh
- -c
- |
echo $0
- { inputValue: val }
""")
comp = components.load_component_from_text(comp_text)
self.assertEqual(comp.component_spec.inputs['val'].default, ['a', 1.0])
self.assertEqual(
list(comp.pipeline_spec.root.input_definitions.parameters['val']
.default_value.list_value), ['a', 1.0])

def test_artifact_with_dict_type_passes_through(self):
comp_text = textwrap.dedent("""\
inputs:
- { name: val, type: {Key: Val}}
implementation:
container:
image: alpine
command:
- sh
- -c
- |
echo $0
- { inputPath: val }
""")
comp = components.load_component_from_text(comp_text)
self.assertFalse(comp.component_spec.inputs['val'].optional)
self.assertFalse(comp.pipeline_spec.root.input_definitions
.artifacts['val'].is_optional)


if __name__ == '__main__':
unittest.main()
50 changes: 49 additions & 1 deletion sdk/python/kfp/components/types/type_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
# limitations under the License.
"""Utilities for component I/O type mapping."""

from distutils import util
import inspect
from typing import Any, Optional, Type, Union
import json
from typing import Any, Callable, Dict, Optional, Type, Union
import warnings

import kfp
Expand Down Expand Up @@ -60,6 +62,52 @@
'jsonarray': pipeline_spec_pb2.ParameterType.LIST,
}


def bool_cast_fn(default: Union[str, bool]) -> bool:
if isinstance(default, str):
default = util.strtobool(default) == 1
return default


def try_loading_json(default: str) -> Union[dict, list, str]:
try:
return json.loads(default)
except:
return default


_V1_DEFAULT_DESERIALIZER_MAPPING: Dict[str, Callable] = {
'integer': int,
'int': int,
'double': float,
'float': float,
'string': str,
'str': str,
'text': str,
'bool': bool_cast_fn,
'boolean': bool_cast_fn,
'dict': try_loading_json,
'list': try_loading_json,
'jsonobject': try_loading_json,
'jsonarray': try_loading_json,
}


def deserialize_v1_component_yaml_default(type_: str, default: Any) -> Any:
"""Deserializes v1 default values to correct in-memory types.
Typecasts for primitive types. Tries to load JSON for arrays and
structs.
"""
if default is None:
return default
if isinstance(type_, str):
cast_fn = _V1_DEFAULT_DESERIALIZER_MAPPING.get(type_.lower(),
lambda x: x)
return cast_fn(default)
return default


# Mapping primitive types to their IR message field names.
# This is used in constructing condition strings.
_PARAMETER_TYPES_VALUE_REFERENCE_MAPPING = {
Expand Down
Loading

0 comments on commit 84241d6

Please sign in to comment.