Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sdk): enable dependency-free runtime install of kfp #9886

Merged
merged 14 commits into from
Aug 24, 2023
Merged
12 changes: 1 addition & 11 deletions sdk/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,8 @@
# Current Version (in development)

## Features

## Breaking changes

## Deprecations

## Bug fixes and other changes

## Documentation updates
# 2.1.2

## Features
* Create "dependency-free" runtime package (only `typing_extensions` required) for Lightweight Python Components to reduce runtime dependency resolution errors [\#9710](https://github.com/kubeflow/pipelines/pull/9710), [\#9738](https://github.com/kubeflow/pipelines/pull/9738)
* Create "dependency-free" runtime package (only `typing_extensions` required) for Lightweight Python Components to reduce runtime dependency resolution errors [\#9710](https://github.com/kubeflow/pipelines/pull/9710), [\#9886](https://github.com/kubeflow/pipelines/pull/9886)

## Breaking changes

Expand Down
12 changes: 9 additions & 3 deletions sdk/python/kfp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@

TYPE_CHECK = True

from kfp import components
from kfp import dsl
from kfp.client import Client
import os

# compile-time only dependencies
if os.environ.get('_KFP_RUNTIME', 'false') != 'true':
# make `from kfp import components` and `from kfp import dsl` valid;
# related to namespace packaging issue
from kfp import components # noqa: keep unused import
from kfp import dsl # noqa: keep unused import
from kfp.client import Client # noqa: keep unused import
2 changes: 0 additions & 2 deletions sdk/python/kfp/cli/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@

_DOCKERFILE = 'Dockerfile'

# TODO: merge kfp_package_path into runtime-requirements.txt, once we have
# kfp_runtime package that is dependency-free.
_DOCKERFILE_TEMPLATE = '''
FROM {base_image}

Expand Down
86 changes: 46 additions & 40 deletions sdk/python/kfp/dsl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# runtime dependencies
__all__ = [
'component',
'container_component',
'pipeline',
'importer',
'ContainerSpec',
'Condition',
'ExitHandler',
'ParallelFor',
'Collected',
'Input',
'Output',
'InputPath',
'OutputPath',
'IfPresentPlaceholder',
'ConcatPlaceholder',
'PipelineTaskFinalStatus',
'PIPELINE_JOB_NAME_PLACEHOLDER',
'PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER',
'PIPELINE_JOB_ID_PLACEHOLDER',
'PIPELINE_TASK_NAME_PLACEHOLDER',
'PIPELINE_TASK_ID_PLACEHOLDER',
'PIPELINE_ROOT_PLACEHOLDER',
'PIPELINE_JOB_CREATE_TIME_UTC_PLACEHOLDER',
'PIPELINE_JOB_SCHEDULE_TIME_UTC_PLACEHOLDER',
'Artifact',
'ClassificationMetrics',
'Dataset',
Expand All @@ -47,29 +29,18 @@
'Metrics',
'Model',
'SlicedClassificationMetrics',
'PipelineTask',
'PIPELINE_JOB_NAME_PLACEHOLDER',
'PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER',
'PIPELINE_JOB_ID_PLACEHOLDER',
'PIPELINE_TASK_NAME_PLACEHOLDER',
'PIPELINE_TASK_ID_PLACEHOLDER',
'PIPELINE_ROOT_PLACEHOLDER',
'PIPELINE_JOB_CREATE_TIME_UTC_PLACEHOLDER',
'PIPELINE_JOB_SCHEDULE_TIME_UTC_PLACEHOLDER',
]
import os

try:
from typing import Annotated
except ImportError:
from typing_extensions import Annotated

from typing import TypeVar

from kfp.dsl.component_decorator import component
from kfp.dsl.container_component_decorator import container_component
from kfp.dsl.for_loop import Collected
from kfp.dsl.importer_node import importer
from kfp.dsl.pipeline_context import pipeline
from kfp.dsl.pipeline_task import PipelineTask
from kfp.dsl.placeholders import ConcatPlaceholder
from kfp.dsl.placeholders import IfPresentPlaceholder
from kfp.dsl.structures import ContainerSpec
from kfp.dsl.task_final_status import PipelineTaskFinalStatus
from kfp.dsl.tasks_group import Condition
from kfp.dsl.tasks_group import ExitHandler
from kfp.dsl.tasks_group import ParallelFor
from kfp.dsl.types.artifact_types import Artifact
from kfp.dsl.types.artifact_types import ClassificationMetrics
from kfp.dsl.types.artifact_types import Dataset
Expand All @@ -83,8 +54,14 @@
from kfp.dsl.types.type_annotations import OutputAnnotation
from kfp.dsl.types.type_annotations import OutputPath

# hack: constants and custom type generics have to be defined here to be captured by autodoc and autodocsumm used in ./docs/conf.py
try:
from typing import Annotated
except ImportError:
from typing_extensions import Annotated

from typing import TypeVar

# hack: constants and custom type generics have to be defined here to be captured by autodoc and autodocsumm used in ./docs/conf.py
PIPELINE_JOB_NAME_PLACEHOLDER = '{{$.pipeline_job_name}}'
"""A placeholder used to obtain a pipeline job name within a task at pipeline runtime.

Expand Down Expand Up @@ -247,3 +224,32 @@ def my_pipeline():
producer_task = artifact_producer()
artifact_consumer(model=producer_task.output)
"""

# compile-time only dependencies
if os.environ.get('_KFP_RUNTIME', 'false') != 'true':
from kfp.dsl.component_decorator import component
from kfp.dsl.container_component_decorator import container_component
from kfp.dsl.for_loop import Collected
from kfp.dsl.importer_node import importer
from kfp.dsl.pipeline_context import pipeline
from kfp.dsl.pipeline_task import PipelineTask
from kfp.dsl.placeholders import ConcatPlaceholder
from kfp.dsl.placeholders import IfPresentPlaceholder
from kfp.dsl.structures import ContainerSpec
from kfp.dsl.tasks_group import Condition
from kfp.dsl.tasks_group import ExitHandler
from kfp.dsl.tasks_group import ParallelFor
__all__.extend([
'component',
'container_component',
'pipeline',
'importer',
'ContainerSpec',
'Condition',
'ExitHandler',
'ParallelFor',
'Collected',
'IfPresentPlaceholder',
'ConcatPlaceholder',
'PipelineTask',
])
64 changes: 42 additions & 22 deletions sdk/python/kfp/dsl/component_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import warnings

import docstring_parser
import kfp
from kfp.dsl import container_component_artifact_channel
from kfp.dsl import container_component_class
from kfp.dsl import graph_component
Expand Down Expand Up @@ -109,24 +110,43 @@ def make_index_url_options(pip_index_urls: Optional[List[str]]) -> str:


def _get_packages_to_install_command(
package_list: Optional[List[str]] = None,
pip_index_urls: Optional[List[str]] = None) -> List[str]:
kfp_package_path: Optional[str] = None,
pip_index_urls: Optional[List[str]] = None,
packages_to_install: Optional[List[str]] = None,
install_kfp_package: bool = True,
target_image: Optional[str] = None,
) -> List[str]:
packages_to_install = packages_to_install or []
kfp_in_user_pkgs = any(pkg.startswith('kfp') for pkg in packages_to_install)
# if the user doesn't say "don't install", they aren't building a
# container component, and they haven't already specified a KFP dep
# themselves, we install KFP for them
inject_kfp_install = install_kfp_package and target_image is None and not kfp_in_user_pkgs
if inject_kfp_install:
if kfp_package_path:
packages_to_install.append(kfp_package_path)
else:
packages_to_install.extend(_get_injected_kfp_imports())

if packages_to_install:
concat_package_list = ' '.join(
[repr(str(package)) for package in packages_to_install])
index_url_options = make_index_url_options(pip_index_urls)

if not package_list:
return []
install_python_packages_script = _install_python_packages_script_template.format(
index_url_options=index_url_options,
concat_package_list=concat_package_list)
return ['sh', '-c', install_python_packages_script]

concat_package_list = ' '.join(
[repr(str(package)) for package in package_list])
index_url_options = make_index_url_options(pip_index_urls)
install_python_packages_script = _install_python_packages_script_template.format(
index_url_options=index_url_options,
concat_package_list=concat_package_list)
return ['sh', '-c', install_python_packages_script]
return []


def _get_default_kfp_package_path() -> str:
import kfp
return f'kfp=={kfp.__version__}'
def _get_injected_kfp_imports() -> List[str]:
return [
f'kfp=={kfp.__version__}',
'--no-deps',
'typing-extensions>=3.7.4,<5; python_version<"3.9"',
]


def _get_function_source_definition(func: Callable) -> str:
Expand Down Expand Up @@ -420,8 +440,9 @@ def _get_command_and_args_for_lightweight_component(
'-ec',
textwrap.dedent('''\
program_path=$(mktemp -d)

printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.dsl.executor_main \
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main \
--component_module_path \
"$program_path/ephemeral_component.py" \
"$@"
Expand Down Expand Up @@ -471,15 +492,14 @@ def create_component_from_func(
The decorator is defined under component_decorator.py. See the
decorator for the canonical documentation for this function.
"""
packages_to_install = packages_to_install or []

if install_kfp_package and target_image is None:
if kfp_package_path is None:
kfp_package_path = _get_default_kfp_package_path()
packages_to_install.append(kfp_package_path)

packages_to_install_command = _get_packages_to_install_command(
package_list=packages_to_install, pip_index_urls=pip_index_urls)
install_kfp_package=install_kfp_package,
target_image=target_image,
kfp_package_path=kfp_package_path,
packages_to_install=packages_to_install,
pip_index_urls=pip_index_urls,
)

command = []
args = []
Expand Down
103 changes: 92 additions & 11 deletions sdk/python/kfp/dsl/component_factory_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,31 +28,112 @@

class TestGetPackagesToInstallCommand(unittest.TestCase):

def test_with_no_packages_to_install(self):
def test_with_no_user_packages_to_install(self):
packages_to_install = []

command = component_factory._get_packages_to_install_command(
packages_to_install)
packages_to_install=packages_to_install)

self.assertEqual(command, [
'sh', '-c',
'\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n'
])

def test_with_no_user_packages_to_install_and_install_kfp_false(self):
packages_to_install = []

command = component_factory._get_packages_to_install_command(
packages_to_install=packages_to_install,
install_kfp_package=False,
)
self.assertEqual(command, [])

def test_with_no_user_packages_to_install_and_kfp_package_path(self):
packages_to_install = []

command = component_factory._get_packages_to_install_command(
packages_to_install=packages_to_install,
kfp_package_path='git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python'
)

self.assertEqual(command, [
'sh', '-c',
'\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python\' && "$0" "$@"\n'
])

def test_with_no_user_packages_to_install_and_kfp_package_path_and_install_kfp_false(
self):
packages_to_install = []

command = component_factory._get_packages_to_install_command(
packages_to_install=packages_to_install,
kfp_package_path='git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python',
install_kfp_package=False,
)
self.assertEqual(command, [])

def test_with_packages_to_install_and_no_pip_index_url(self):
def test_with_user_packages_to_install_and_kfp_package_path_and_install_kfp_false(
self):
packages_to_install = ['sklearn']

command = component_factory._get_packages_to_install_command(
packages_to_install=packages_to_install,
kfp_package_path='git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python',
install_kfp_package=False,
)

self.assertEqual(command, [
'sh', '-c',
'\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'sklearn\' && "$0" "$@"\n'
])

def test_with_no_user_packages_to_install_and_kfp_package_path_and_target_image(
self):
packages_to_install = []

command = component_factory._get_packages_to_install_command(
packages_to_install=packages_to_install,
target_image='gcr.io/my-kfp-image',
kfp_package_path='./sdk/python')

self.assertEqual(command, [])

def test_with_no_user_packages_to_install_and_kfp_package_path_and_target_image_and_install_kfp_false(
self):
packages_to_install = []

command = component_factory._get_packages_to_install_command(
packages_to_install=packages_to_install,
target_image='gcr.io/my-kfp-image',
kfp_package_path='./sdk/python',
install_kfp_package=False)

self.assertEqual(command, [])

def test_with_user_packages_to_install_and_no_pip_index_url(self):
packages_to_install = ['package1', 'package2']

command = component_factory._get_packages_to_install_command(
packages_to_install)
concat_command = ' '.join(command)
for package in packages_to_install:
self.assertTrue(package in concat_command)
packages_to_install=packages_to_install)

self.assertEqual(command, [
'sh', '-c',
'\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n'
])

def test_with_packages_to_install_with_pip_index_url(self):
packages_to_install = ['package1', 'package2']
pip_index_urls = ['https://myurl.org/simple']

command = component_factory._get_packages_to_install_command(
packages_to_install, pip_index_urls)
concat_command = ' '.join(command)
for package in packages_to_install + pip_index_urls:
self.assertTrue(package in concat_command)
packages_to_install=packages_to_install,
pip_index_urls=pip_index_urls,
)

self.assertEqual(command, [
'sh', '-c',
'\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location --index-url https://myurl.org/simple --trusted-host https://myurl.org/simple \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n'
])


class TestInvalidParameterName(unittest.TestCase):
Expand Down
Loading